From 8ab72c747a91ffb833f05e72196344f93fbb2092 Mon Sep 17 00:00:00 2001 From: Philip Hackstock <20710924+phackstock@users.noreply.github.com> Date: Tue, 9 May 2023 11:41:22 +0200 Subject: [PATCH 1/8] Remove mutable defaults --- nomenclature/__init__.py | 4 +++- nomenclature/codelist.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/nomenclature/__init__.py b/nomenclature/__init__.py index 927c6442..a471a0c6 100644 --- a/nomenclature/__init__.py +++ b/nomenclature/__init__.py @@ -30,7 +30,7 @@ __version__ = version("nomenclature-iamc") -def create_yaml_from_xlsx(source, target, sheet_name, col, attrs=[]): +def create_yaml_from_xlsx(source, target, sheet_name, col, attrs=None): """Parses an xlsx file with a codelist and writes a yaml file Parameters @@ -46,6 +46,8 @@ def create_yaml_from_xlsx(source, target, sheet_name, col, attrs=[]): attrs : list, optional Columns from `sheet_name` to use as attributes. """ + if attrs is None: + attrs = [] SPECIAL_CODELIST.get(col.lower(), CodeList).read_excel( name="", source=source, sheet_name=sheet_name, col=col, attrs=attrs ).to_yaml(target) diff --git a/nomenclature/codelist.py b/nomenclature/codelist.py index 2c4f67fe..c41cc15a 100644 --- a/nomenclature/codelist.py +++ b/nomenclature/codelist.py @@ -223,7 +223,7 @@ def from_directory(cls, name: str, path: Path, file_glob_pattern: str = "**/*"): return cls(name=name, mapping=mapping) @classmethod - def read_excel(cls, name, source, sheet_name, col, attrs=[]): + def read_excel(cls, name, source, sheet_name, col, attrs=None): """Parses an xlsx file with a codelist Parameters @@ -239,6 +239,8 @@ def read_excel(cls, name, source, sheet_name, col, attrs=[]): attrs : list, optional Columns from `sheet_name` to use as attributes. """ + if attrs is None: + attrs = [] codelist = pd.read_excel(source, sheet_name=sheet_name, usecols=[col] + attrs) # replace nan with None From 5316b7c5f58da9c3226e94333be6d9ff85fdd0bd Mon Sep 17 00:00:00 2001 From: Philip Hackstock <20710924+phackstock@users.noreply.github.com> Date: Tue, 9 May 2023 14:04:21 +0200 Subject: [PATCH 2/8] Remove unnecessary else --- nomenclature/processor/region.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/nomenclature/processor/region.py b/nomenclature/processor/region.py index 917c277c..91688278 100644 --- a/nomenclature/processor/region.py +++ b/nomenclature/processor/region.py @@ -82,10 +82,9 @@ def is_single_constituent_region(self): def rename_dict(self): if self.is_single_constituent_region: return {self.constituent_regions[0]: self.name} - else: - raise AttributeError( - "rename_dict is only available for single constituent regions" - ) + raise AttributeError( + "rename_dict is only available for single constituent regions" + ) class RegionAggregationMapping(BaseModel): From 2a334238f6897dae5f4c8396dc323feff8647e3d Mon Sep 17 00:00:00 2001 From: Philip Hackstock <20710924+phackstock@users.noreply.github.com> Date: Tue, 9 May 2023 14:05:00 +0200 Subject: [PATCH 3/8] Use getattr instead of __getattribute__ --- nomenclature/validation.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/nomenclature/validation.py b/nomenclature/validation.py index d4531152..58db5b0d 100644 --- a/nomenclature/validation.py +++ b/nomenclature/validation.py @@ -25,9 +25,7 @@ def validate(dsd, df, dimensions): error = False for dim in dimensions: - if invalid := dsd.__getattribute__(dim).validate_items( - df.__getattribute__(dim) - ): + if invalid := getattr(dsd, dim).validate_items(getattr(df, dim)): log_error(dim, invalid) error = True From 43856d94449587ff3fa13d7e0436c5be3e7b0464 Mon Sep 17 00:00:00 2001 From: Philip Hackstock <20710924+phackstock@users.noreply.github.com> Date: Tue, 9 May 2023 14:05:41 +0200 Subject: [PATCH 4/8] Rename for improved clarity --- nomenclature/codelist.py | 11 ++++---- nomenclature/processor/region.py | 47 +++++++++++++++++--------------- nomenclature/testing.py | 14 +++++----- 3 files changed, 37 insertions(+), 35 deletions(-) diff --git a/nomenclature/codelist.py b/nomenclature/codelist.py index c41cc15a..6167dd4d 100644 --- a/nomenclature/codelist.py +++ b/nomenclature/codelist.py @@ -1,10 +1,10 @@ +import logging from pathlib import Path from typing import ClassVar, Dict, List -import pandas as pd import numpy as np +import pandas as pd import yaml -import logging from jsonschema import validate from pyam.utils import write_sheet from pydantic import BaseModel, validator @@ -17,18 +17,17 @@ VariableRenameTargetError, ) - here = Path(__file__).parent.absolute() -def read_validation_schema(i): - with open(here / "validation_schemas" / f"{i}_schema.yaml", "r") as f: +def read_validation_schema(schema): + with open(here / "validation_schemas" / f"{schema}_schema.yaml", "r") as f: schema = yaml.safe_load(f) return schema SCHEMA_TYPES = ("variable", "tag", "region", "generic") -SCHEMA_MAPPING = dict([(i, read_validation_schema(i)) for i in SCHEMA_TYPES]) +SCHEMA_MAPPING = {schema: read_validation_schema(schema) for schema in SCHEMA_TYPES} class CodeList(BaseModel): diff --git a/nomenclature/processor/region.py b/nomenclature/processor/region.py index 91688278..ce8af6e4 100644 --- a/nomenclature/processor/region.py +++ b/nomenclature/processor/region.py @@ -222,11 +222,11 @@ def from_file(cls, file: Union[Path, str]): This function is used to convert a model mapping yaml file into a dictionary which is used to initialize a RegionAggregationMapping. """ - SCHEMA_FILE = here / "../validation_schemas" / "region_mapping_schema.yaml" + schema_file = here / "../validation_schemas" / "region_mapping_schema.yaml" file = Path(file) if isinstance(file, str) else file with open(file, "r") as f: mapping_input = yaml.safe_load(f) - with open(SCHEMA_FILE, "r") as f: + with open(schema_file, "r") as f: schema = yaml.safe_load(f) # Validate the input data using jsonschema @@ -244,24 +244,27 @@ def from_file(cls, file: Union[Path, str]): # Reformat the "native_regions" if "native_regions" in mapping_input: native_region_list: List[Dict] = [] - for nr in mapping_input["native_regions"]: - if isinstance(nr, str): - native_region_list.append({"name": nr}) - elif isinstance(nr, dict): + for native_region in mapping_input["native_regions"]: + if isinstance(native_region, str): + native_region_list.append({"name": native_region}) + elif isinstance(native_region, dict): native_region_list.append( - {"name": list(nr)[0], "rename": list(nr.values())[0]} + { + "name": list(native_region)[0], + "rename": list(native_region.values())[0], + } ) mapping_input["native_regions"] = native_region_list # Reformat the "common_regions" if "common_regions" in mapping_input: common_region_list: List[Dict[str, List[Dict[str, str]]]] = [] - for cr in mapping_input["common_regions"]: - cr_name = list(cr)[0] + for common_region in mapping_input["common_regions"]: + common_region_name = list(common_region)[0] common_region_list.append( { - "name": cr_name, - "constituent_regions": cr[cr_name], + "name": common_region_name, + "constituent_regions": common_region[common_region_name], } ) mapping_input["common_regions"] = common_region_list @@ -348,16 +351,16 @@ def from_directory(cls, path: DirectoryPath, dsd: DataStructureDefinition): for file in (f for f in path.glob("**/*") if f.suffix in {".yaml", ".yml"}): try: mapping = RegionAggregationMapping.from_file(file) - for m in mapping.model: - if m not in mapping_dict: - mapping_dict[m] = mapping + for model in mapping.model: + if model not in mapping_dict: + mapping_dict[model] = mapping else: errors.append( ErrorWrapper( ModelMappingCollisionError( - model=m, + model=model, file1=mapping.file, - file2=mapping_dict[m].file, + file2=mapping_dict[model].file, ), "__root__", ) @@ -456,18 +459,18 @@ def _apply_region_processing(self, model_df: IamDataFrame) -> IamDataFrame: # aggregate common regions if self.mappings[model].common_regions is not None: - for cr in self.mappings[model].common_regions: + for common_region in self.mappings[model].common_regions: # if a common region is consists of a single native region, rename - if cr.is_single_constituent_region: - _df = model_df.filter(region=cr.constituent_regions[0]).rename( - region=cr.rename_dict - ) + if common_region.is_single_constituent_region: + _df = model_df.filter( + region=common_region.constituent_regions[0] + ).rename(region=common_region.rename_dict) if not _df.empty: _processed_data.append(_df._data) continue # if there are multiple constituent regions, aggregate - regions = [cr.name, cr.constituent_regions] + regions = [common_region.name, common_region.constituent_regions] # first, perform 'simple' aggregation (no arguments) simple_vars = [ diff --git a/nomenclature/testing.py b/nomenclature/testing.py index 9c64b95d..4a9c31e0 100644 --- a/nomenclature/testing.py +++ b/nomenclature/testing.py @@ -63,11 +63,11 @@ def _check_mappings( raise FileNotFoundError(f"Mappings directory not found: {path / mappings}") -def _collect_requiredData_errors( - requiredDatadir: Path, dsd: DataStructureDefinition +def _collect_RequiredData_errors( + required_data_dir: Path, dsd: DataStructureDefinition ) -> None: errors: List[str] = [] - for file in (requiredDatadir).iterdir(): + for file in required_data_dir.iterdir(): try: RequiredDataValidator.from_file(file).validate_with_definition(dsd) except pydantic.ValidationError as pve: @@ -77,7 +77,7 @@ def _collect_requiredData_errors( raise ValueError(f"Found error(s) in required data files: {all_errors}") -def _check_requiredData( +def _check_RequiredData( path: Path, definitions: str = "definitions", dimensions: Optional[List[str]] = None, @@ -86,10 +86,10 @@ def _check_requiredData( dsd = DataStructureDefinition(path / definitions, dimensions) if required_data is None: if (path / "requiredData").is_dir(): - _collect_requiredData_errors(path / "required_data", dsd) + _collect_RequiredData_errors(path / "required_data", dsd) elif (path / required_data).is_dir(): - _collect_requiredData_errors(path / required_data, dsd) + _collect_RequiredData_errors(path / required_data, dsd) else: raise FileNotFoundError( f"Directory for required data not found at: {path / required_data}" @@ -143,7 +143,7 @@ def assert_valid_structure( f"`definitions` directory is empty: {path / definitions}" ) _check_mappings(path, definitions, dimensions, mappings) - _check_requiredData(path, definitions, dimensions, required_data) + _check_RequiredData(path, definitions, dimensions, required_data) # Todo: add function which runs `DataStructureDefinition(path).validate(scenario)` From 934e8c66abff95d29e115517d2f1d7a44baf8afd Mon Sep 17 00:00:00 2001 From: Philip Hackstock <20710924+phackstock@users.noreply.github.com> Date: Tue, 9 May 2023 14:06:07 +0200 Subject: [PATCH 5/8] Use dict comprehension --- nomenclature/codelist.py | 13 ++++++------- nomenclature/core.py | 2 +- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/nomenclature/codelist.py b/nomenclature/codelist.py index 6167dd4d..aa7b6d43 100644 --- a/nomenclature/codelist.py +++ b/nomenclature/codelist.py @@ -275,8 +275,8 @@ def to_yaml(self, path=None): """ class Dumper(yaml.Dumper): - def increase_indent(self, flow=False, *args, **kwargs): - return super().increase_indent(flow=flow, indentless=False) + def increase_indent(self, flow: bool = False, indentless: bool = False): + return super().increase_indent(flow=flow, indentless=indentless) # translate to list of nested dicts, replace None by empty field, write to file stream = ( @@ -289,11 +289,10 @@ def increase_indent(self, flow=False, *args, **kwargs): .replace(": nan\n", ":\n") ) - if path is not None: - with open(path, "w") as file: - file.write(stream) - else: + if path is None: return stream + with open(path, "w") as file: + file.write(stream) def to_pandas(self, sort_by_code: bool = False) -> pd.DataFrame: """Export the CodeList to a :class:`pandas.DataFrame` @@ -586,7 +585,7 @@ def hierarchy(self) -> List[str]: List[str] """ - return sorted(list(set(v.hierarchy for v in self.mapping.values()))) + return sorted(list({v.hierarchy for v in self.mapping.values()})) class MetaCodeList(CodeList): diff --git a/nomenclature/core.py b/nomenclature/core.py index e228f924..c3d8a1e9 100644 --- a/nomenclature/core.py +++ b/nomenclature/core.py @@ -10,7 +10,7 @@ logger = logging.getLogger(__name__) -@validate_arguments(config=dict(arbitrary_types_allowed=True)) +@validate_arguments(config={"arbitrary_types_allowed": True}) def process( df: pyam.IamDataFrame, dsd: DataStructureDefinition, From d63dc285987d94a8a0b150102c82919e5560bf22 Mon Sep 17 00:00:00 2001 From: Philip Hackstock <20710924+phackstock@users.noreply.github.com> Date: Wed, 17 May 2023 16:27:47 +0200 Subject: [PATCH 6/8] Update test data for indent --- tests/data/excel_io/validation_nc_list_arg.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/data/excel_io/validation_nc_list_arg.yaml b/tests/data/excel_io/validation_nc_list_arg.yaml index be52a5fa..2bdf07cf 100644 --- a/tests/data/excel_io/validation_nc_list_arg.yaml +++ b/tests/data/excel_io/validation_nc_list_arg.yaml @@ -2,8 +2,8 @@ description: Total primary energy consumption unit: EJ/yr region-aggregation: - - Primary Energy (mean): - method: mean + - Primary Energy (mean): + method: mean - Primary Energy (mean): description: Mean primary energy consumption unit: EJ/yr From ea06913e0855099d27e02707befb7bee2e471e29 Mon Sep 17 00:00:00 2001 From: Philip Hackstock <20710924+phackstock@users.noreply.github.com> Date: Wed, 17 May 2023 16:38:35 +0200 Subject: [PATCH 7/8] Use error instead of e --- nomenclature/processor/region.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/nomenclature/processor/region.py b/nomenclature/processor/region.py index ce8af6e4..e3be01f4 100644 --- a/nomenclature/processor/region.py +++ b/nomenclature/processor/region.py @@ -232,10 +232,10 @@ def from_file(cls, file: Union[Path, str]): # Validate the input data using jsonschema try: jsonschema.validate(mapping_input, schema) - except jsonschema.ValidationError as e: + except jsonschema.ValidationError as error: # Add file information in case of error raise jsonschema.ValidationError( - f"{e.message} in {get_relative_path(file)}" + f"{error.message} in {get_relative_path(file)}" ) # Add the file name to mapping_input @@ -365,8 +365,8 @@ def from_directory(cls, path: DirectoryPath, dsd: DataStructureDefinition): "__root__", ) ) - except (pydantic.ValidationError, jsonschema.ValidationError) as e: - errors.append(ErrorWrapper(e, "__root__")) + except (pydantic.ValidationError, jsonschema.ValidationError) as error: + errors.append(ErrorWrapper(error, "__root__")) if errors: raise pydantic.ValidationError(errors, model=RegionProcessor) @@ -542,13 +542,13 @@ def _aggregate_region(df, var, *regions, **kwargs): """Perform region aggregation with kwargs catching inconsistent-index errors""" try: return df.aggregate_region(var, *regions, **kwargs) - except ValueError as e: - if str(e) == "Inconsistent index between variable and weight!": + except ValueError as error: + if str(error) == "Inconsistent index between variable and weight!": logger.info( f"Could not aggregate '{var}' for region '{regions[0]}' ({kwargs})" ) else: - raise e + raise error def _compare_and_merge(original: pd.Series, aggregated: pd.Series) -> IamDataFrame: From 8a43e0818924edd6ccf6ebc8a93121f593435989 Mon Sep 17 00:00:00 2001 From: Philip Hackstock <20710924+phackstock@users.noreply.github.com> Date: Wed, 17 May 2023 16:38:49 +0200 Subject: [PATCH 8/8] Use getattr instaed of __getattr__ --- nomenclature/processor/required_data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nomenclature/processor/required_data.py b/nomenclature/processor/required_data.py index 6bdee157..cc03ea9f 100644 --- a/nomenclature/processor/required_data.py +++ b/nomenclature/processor/required_data.py @@ -63,8 +63,8 @@ def validate_with_definition(self, dsd: DataStructureDefinition) -> None: ("region", "region"), ("variable", "variables"), ): - if invalid := dsd.__getattribute__(dimension).validate_items( - self.__getattribute__(attribute_name) or [] + if invalid := getattr(dsd, dimension).validate_items( + getattr(self, attribute_name) or [] ): error_msg += ( f"The following {dimension}(s) were not found in the "