Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

General cleanup #244

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion nomenclature/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
__version__ = version("nomenclature-iamc")


def create_yaml_from_xlsx(source, target, sheet_name, col, attrs=[]):
def create_yaml_from_xlsx(source, target, sheet_name, col, attrs=None):
"""Parses an xlsx file with a codelist and writes a yaml file

Parameters
Expand All @@ -46,6 +46,8 @@ def create_yaml_from_xlsx(source, target, sheet_name, col, attrs=[]):
attrs : list, optional
Columns from `sheet_name` to use as attributes.
"""
if attrs is None:
attrs = []
SPECIAL_CODELIST.get(col.lower(), CodeList).read_excel(
name="", source=source, sheet_name=sheet_name, col=col, attrs=attrs
).to_yaml(target)
28 changes: 14 additions & 14 deletions nomenclature/codelist.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import logging
from pathlib import Path
from typing import ClassVar, Dict, List

import pandas as pd
import numpy as np
import pandas as pd
import yaml
import logging
from jsonschema import validate
from pyam.utils import write_sheet
from pydantic import BaseModel, validator
Expand All @@ -17,18 +17,17 @@
VariableRenameTargetError,
)


here = Path(__file__).parent.absolute()


def read_validation_schema(i):
with open(here / "validation_schemas" / f"{i}_schema.yaml", "r") as f:
def read_validation_schema(schema):
with open(here / "validation_schemas" / f"{schema}_schema.yaml", "r") as f:
schema = yaml.safe_load(f)
return schema


SCHEMA_TYPES = ("variable", "tag", "region", "generic")
SCHEMA_MAPPING = dict([(i, read_validation_schema(i)) for i in SCHEMA_TYPES])
SCHEMA_MAPPING = {schema: read_validation_schema(schema) for schema in SCHEMA_TYPES}


class CodeList(BaseModel):
Expand Down Expand Up @@ -223,7 +222,7 @@ def from_directory(cls, name: str, path: Path, file_glob_pattern: str = "**/*"):
return cls(name=name, mapping=mapping)

@classmethod
def read_excel(cls, name, source, sheet_name, col, attrs=[]):
def read_excel(cls, name, source, sheet_name, col, attrs=None):
"""Parses an xlsx file with a codelist

Parameters
Expand All @@ -239,6 +238,8 @@ def read_excel(cls, name, source, sheet_name, col, attrs=[]):
attrs : list, optional
Columns from `sheet_name` to use as attributes.
"""
if attrs is None:
attrs = []
codelist = pd.read_excel(source, sheet_name=sheet_name, usecols=[col] + attrs)

# replace nan with None
Expand Down Expand Up @@ -274,8 +275,8 @@ def to_yaml(self, path=None):
"""

class Dumper(yaml.Dumper):
def increase_indent(self, flow=False, *args, **kwargs):
return super().increase_indent(flow=flow, indentless=False)
def increase_indent(self, flow: bool = False, indentless: bool = False):
return super().increase_indent(flow=flow, indentless=indentless)

# translate to list of nested dicts, replace None by empty field, write to file
stream = (
Expand All @@ -288,11 +289,10 @@ def increase_indent(self, flow=False, *args, **kwargs):
.replace(": nan\n", ":\n")
)

if path is not None:
with open(path, "w") as file:
file.write(stream)
else:
if path is None:
return stream
with open(path, "w") as file:
file.write(stream)

def to_pandas(self, sort_by_code: bool = False) -> pd.DataFrame:
"""Export the CodeList to a :class:`pandas.DataFrame`
Expand Down Expand Up @@ -585,7 +585,7 @@ def hierarchy(self) -> List[str]:
List[str]

"""
return sorted(list(set(v.hierarchy for v in self.mapping.values())))
return sorted(list({v.hierarchy for v in self.mapping.values()}))


class MetaCodeList(CodeList):
Expand Down
2 changes: 1 addition & 1 deletion nomenclature/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
logger = logging.getLogger(__name__)


@validate_arguments(config=dict(arbitrary_types_allowed=True))
@validate_arguments(config={"arbitrary_types_allowed": True})
def process(
df: pyam.IamDataFrame,
dsd: DataStructureDefinition,
Expand Down
68 changes: 35 additions & 33 deletions nomenclature/processor/region.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,9 @@ def is_single_constituent_region(self):
def rename_dict(self):
if self.is_single_constituent_region:
return {self.constituent_regions[0]: self.name}
else:
raise AttributeError(
"rename_dict is only available for single constituent regions"
)
raise AttributeError(
"rename_dict is only available for single constituent regions"
)


class RegionAggregationMapping(BaseModel):
Expand Down Expand Up @@ -223,20 +222,20 @@ def from_file(cls, file: Union[Path, str]):
This function is used to convert a model mapping yaml file into a dictionary
which is used to initialize a RegionAggregationMapping.
"""
SCHEMA_FILE = here / "../validation_schemas" / "region_mapping_schema.yaml"
schema_file = here / "../validation_schemas" / "region_mapping_schema.yaml"
file = Path(file) if isinstance(file, str) else file
with open(file, "r") as f:
mapping_input = yaml.safe_load(f)
with open(SCHEMA_FILE, "r") as f:
with open(schema_file, "r") as f:
schema = yaml.safe_load(f)

# Validate the input data using jsonschema
try:
jsonschema.validate(mapping_input, schema)
except jsonschema.ValidationError as e:
except jsonschema.ValidationError as error:
# Add file information in case of error
raise jsonschema.ValidationError(
f"{e.message} in {get_relative_path(file)}"
f"{error.message} in {get_relative_path(file)}"
)

# Add the file name to mapping_input
Expand All @@ -245,24 +244,27 @@ def from_file(cls, file: Union[Path, str]):
# Reformat the "native_regions"
if "native_regions" in mapping_input:
native_region_list: List[Dict] = []
for nr in mapping_input["native_regions"]:
if isinstance(nr, str):
native_region_list.append({"name": nr})
elif isinstance(nr, dict):
for native_region in mapping_input["native_regions"]:
if isinstance(native_region, str):
native_region_list.append({"name": native_region})
elif isinstance(native_region, dict):
native_region_list.append(
{"name": list(nr)[0], "rename": list(nr.values())[0]}
{
"name": list(native_region)[0],
"rename": list(native_region.values())[0],
}
)
mapping_input["native_regions"] = native_region_list

# Reformat the "common_regions"
if "common_regions" in mapping_input:
common_region_list: List[Dict[str, List[Dict[str, str]]]] = []
for cr in mapping_input["common_regions"]:
cr_name = list(cr)[0]
for common_region in mapping_input["common_regions"]:
common_region_name = list(common_region)[0]
common_region_list.append(
{
"name": cr_name,
"constituent_regions": cr[cr_name],
"name": common_region_name,
"constituent_regions": common_region[common_region_name],
}
)
mapping_input["common_regions"] = common_region_list
Expand Down Expand Up @@ -349,22 +351,22 @@ def from_directory(cls, path: DirectoryPath, dsd: DataStructureDefinition):
for file in (f for f in path.glob("**/*") if f.suffix in {".yaml", ".yml"}):
try:
mapping = RegionAggregationMapping.from_file(file)
for m in mapping.model:
if m not in mapping_dict:
mapping_dict[m] = mapping
for model in mapping.model:
if model not in mapping_dict:
mapping_dict[model] = mapping
else:
errors.append(
ErrorWrapper(
ModelMappingCollisionError(
model=m,
model=model,
file1=mapping.file,
file2=mapping_dict[m].file,
file2=mapping_dict[model].file,
),
"__root__",
)
)
except (pydantic.ValidationError, jsonschema.ValidationError) as e:
errors.append(ErrorWrapper(e, "__root__"))
except (pydantic.ValidationError, jsonschema.ValidationError) as error:
errors.append(ErrorWrapper(error, "__root__"))

if errors:
raise pydantic.ValidationError(errors, model=RegionProcessor)
Expand Down Expand Up @@ -457,18 +459,18 @@ def _apply_region_processing(self, model_df: IamDataFrame) -> IamDataFrame:

# aggregate common regions
if self.mappings[model].common_regions is not None:
for cr in self.mappings[model].common_regions:
for common_region in self.mappings[model].common_regions:
# if a common region is consists of a single native region, rename
if cr.is_single_constituent_region:
_df = model_df.filter(region=cr.constituent_regions[0]).rename(
region=cr.rename_dict
)
if common_region.is_single_constituent_region:
_df = model_df.filter(
region=common_region.constituent_regions[0]
).rename(region=common_region.rename_dict)
if not _df.empty:
_processed_data.append(_df._data)
continue

# if there are multiple constituent regions, aggregate
regions = [cr.name, cr.constituent_regions]
regions = [common_region.name, common_region.constituent_regions]

# first, perform 'simple' aggregation (no arguments)
simple_vars = [
Expand Down Expand Up @@ -540,13 +542,13 @@ def _aggregate_region(df, var, *regions, **kwargs):
"""Perform region aggregation with kwargs catching inconsistent-index errors"""
try:
return df.aggregate_region(var, *regions, **kwargs)
except ValueError as e:
if str(e) == "Inconsistent index between variable and weight!":
except ValueError as error:
if str(error) == "Inconsistent index between variable and weight!":
logger.info(
f"Could not aggregate '{var}' for region '{regions[0]}' ({kwargs})"
)
else:
raise e
raise error


def _compare_and_merge(original: pd.Series, aggregated: pd.Series) -> IamDataFrame:
Expand Down
4 changes: 2 additions & 2 deletions nomenclature/processor/required_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ def validate_with_definition(self, dsd: DataStructureDefinition) -> None:
("region", "region"),
("variable", "variables"),
):
if invalid := dsd.__getattribute__(dimension).validate_items(
self.__getattribute__(attribute_name) or []
if invalid := getattr(dsd, dimension).validate_items(
getattr(self, attribute_name) or []
):
error_msg += (
f"The following {dimension}(s) were not found in the "
Expand Down
14 changes: 7 additions & 7 deletions nomenclature/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,11 @@ def _check_mappings(
raise FileNotFoundError(f"Mappings directory not found: {path / mappings}")


def _collect_requiredData_errors(
requiredDatadir: Path, dsd: DataStructureDefinition
def _collect_RequiredData_errors(
required_data_dir: Path, dsd: DataStructureDefinition
) -> None:
errors: List[str] = []
for file in (requiredDatadir).iterdir():
for file in required_data_dir.iterdir():
try:
RequiredDataValidator.from_file(file).validate_with_definition(dsd)
except pydantic.ValidationError as pve:
Expand All @@ -77,7 +77,7 @@ def _collect_requiredData_errors(
raise ValueError(f"Found error(s) in required data files: {all_errors}")


def _check_requiredData(
def _check_RequiredData(
path: Path,
definitions: str = "definitions",
dimensions: Optional[List[str]] = None,
Expand All @@ -86,10 +86,10 @@ def _check_requiredData(
dsd = DataStructureDefinition(path / definitions, dimensions)
if required_data is None:
if (path / "requiredData").is_dir():
_collect_requiredData_errors(path / "required_data", dsd)
_collect_RequiredData_errors(path / "required_data", dsd)

elif (path / required_data).is_dir():
_collect_requiredData_errors(path / required_data, dsd)
_collect_RequiredData_errors(path / required_data, dsd)
else:
raise FileNotFoundError(
f"Directory for required data not found at: {path / required_data}"
Expand Down Expand Up @@ -143,7 +143,7 @@ def assert_valid_structure(
f"`definitions` directory is empty: {path / definitions}"
)
_check_mappings(path, definitions, dimensions, mappings)
_check_requiredData(path, definitions, dimensions, required_data)
_check_RequiredData(path, definitions, dimensions, required_data)


# Todo: add function which runs `DataStructureDefinition(path).validate(scenario)`
4 changes: 1 addition & 3 deletions nomenclature/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,7 @@ def validate(dsd, df, dimensions):
error = False

for dim in dimensions:
if invalid := dsd.__getattribute__(dim).validate_items(
df.__getattribute__(dim)
):
if invalid := getattr(dsd, dim).validate_items(getattr(df, dim)):
log_error(dim, invalid)
error = True

Expand Down
4 changes: 2 additions & 2 deletions tests/data/excel_io/validation_nc_list_arg.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
description: Total primary energy consumption
unit: EJ/yr
region-aggregation:
- Primary Energy (mean):
method: mean
- Primary Energy (mean):
method: mean
- Primary Energy (mean):
description: Mean primary energy consumption
unit: EJ/yr
Expand Down