Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MetaCode and MetaCodeList classes with allowed_values attribute #246

Merged
merged 8 commits into from
May 12, 2023
13 changes: 13 additions & 0 deletions nomenclature/code.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,3 +209,16 @@ class RegionCode(Code):
"""

hierarchy: str = None


class MetaCode(Code):
"""Code object with allowed values list

Attributes
----------
allowed_values : Optional(list[any])
An optional list of allowed values

"""

allowed_values: Optional[List[Any]]
phackstock marked this conversation as resolved.
Show resolved Hide resolved
18 changes: 17 additions & 1 deletion nomenclature/codelist.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from pyam.utils import write_sheet
from pydantic import BaseModel, validator

from nomenclature.code import Code, VariableCode, RegionCode
from nomenclature.code import Code, VariableCode, RegionCode, MetaCode
from nomenclature.error.codelist import DuplicateCodeError
from nomenclature.error.variable import (
MissingWeightError,
Expand Down Expand Up @@ -624,3 +624,19 @@ def filter(self, hierarchy: str) -> "RegionCodeList":
"Use `RegionCodeList.hierarchy` method for available items."
)
raise ValueError(msg)


class MetaCodeList(CodeList):
"""A subclass of CodeList specified for MetaCodes

Attributes
----------
name : str
Name of the MetaCodeList
mapping : dict
Dictionary of `MetaCode` objects

"""

code_basis: ClassVar = MetaCode
phackstock marked this conversation as resolved.
Show resolved Hide resolved
validation_schema: ClassVar[str] = "generic"
287 changes: 148 additions & 139 deletions nomenclature/definition.py
Original file line number Diff line number Diff line change
@@ -1,139 +1,148 @@
import logging
from pathlib import Path

import pandas as pd
from pyam import IamDataFrame
from pyam.index import replace_index_labels
from pyam.logging import adjust_log_level

from nomenclature.codelist import CodeList, RegionCodeList, VariableCodeList
from nomenclature.validation import validate

logger = logging.getLogger(__name__)
SPECIAL_CODELIST = {"variable": VariableCodeList, "region": RegionCodeList}


class DataStructureDefinition:
"""Definition of datastructure codelists for dimensions used in the IAMC format"""

def __init__(self, path, dimensions=["region", "variable"]):
"""

Parameters
----------
path : str or path-like
The folder with the project definitions.
dimensions : list of str, optional
List of :meth:`CodeList` names. Each CodeList is initialized
from a sub-folder of `path` of that name.
"""
if not isinstance(path, Path):
path = Path(path)

if not path.is_dir():
raise NotADirectoryError(f"Definitions directory not found: {path}")

self.dimensions = dimensions
for dim in self.dimensions:
self.__setattr__(
dim, SPECIAL_CODELIST.get(dim, CodeList).from_directory(dim, path / dim)
)

empty = [d for d in self.dimensions if not self.__getattribute__(d)]
if empty:
raise ValueError(f"Empty codelist: {', '.join(empty)}")

def validate(self, df: IamDataFrame, dimensions: list = None) -> None:
"""Validate that the coordinates of `df` are defined in the codelists

Parameters
----------
df : :class:`pyam.IamDataFrame`
Scenario data to be validated against the codelists of this instance.
dimensions : list of str, optional
Dimensions to perform validation (defaults to all dimensions of self)

Returns
-------
None

Raises
------
ValueError
If `df` fails validation against any codelist.
"""
validate(self, df, dimensions=dimensions or self.dimensions)

def check_aggregate(self, df: IamDataFrame, **kwargs) -> None:
"""Check for consistency of scenario data along the variable hierarchy

Parameters
----------
df : :class:`pyam.IamDataFrame`
Scenario data to be checked for consistency along the variable hierarchy.
kwargs : Tolerance arguments for comparison of values
Passed to :any:`numpy.isclose` via :any:`pyam.IamDataFrame.check_aggregate`.

Returns
-------
:class:`pandas.DataFrame` or None
Data where a variable and its computed aggregate does not match.

Raises
------
ValueError
If the :any:`DataStructureDefinition` does not have a *variable* dimension.
"""
if "variable" not in self.dimensions:
raise ValueError("Aggregation check requires 'variable' dimension.")

lst = []

with adjust_log_level(level="WARNING"):
for code in df.variable:
attr = self.variable.mapping[code]
if attr.check_aggregate:
components = attr.components

# check if multiple lists of components are given for a code
if isinstance(components, dict):
for name, _components in components.items():
error = df.check_aggregate(code, _components, **kwargs)
if error is not None:
error.dropna(inplace=True)
# append components-name to variable column
error.index = replace_index_labels(
error.index, "variable", [f"{code} [{name}]"]
)
lst.append(error)

# else use components provided as single list or pyam-default (None)
else:
error = df.check_aggregate(code, components, **kwargs)
if error is not None:
lst.append(error.dropna())

if lst:
# there may be empty dataframes due to `dropna()` above
error = pd.concat(lst)
return error if not error.empty else None

def to_excel(
self, excel_writer, sheet_name=None, sort_by_code: bool = False, **kwargs
):
"""Write the *variable* codelist to an Excel sheet

Parameters
----------
excel_writer : path-like, file-like, or ExcelWriter object
File path as string or :class:`pathlib.Path`,
or existing :class:`pandas.ExcelWriter`.
sheet_name : str, optional
Name of sheet that will have the codelist. If *None*, use the codelist name.
sort_by_code : bool, optional
Sort the codelist before exporting to file.
**kwargs
Passed to :class:`pandas.ExcelWriter` (if *excel_writer* is path-like).
"""
# TODO write all dimensions to the file
self.variable.to_excel(excel_writer, sheet_name, sort_by_code, **kwargs)
import logging
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure why GitHub thinks this entire file was changed?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was wondering the same, might even be a GitHub issue.

from pathlib import Path

import pandas as pd
from pyam import IamDataFrame
from pyam.index import replace_index_labels
from pyam.logging import adjust_log_level

from nomenclature.codelist import (
CodeList,
RegionCodeList,
VariableCodeList,
MetaCodeList,
)
from nomenclature.validation import validate

logger = logging.getLogger(__name__)
SPECIAL_CODELIST = {
"variable": VariableCodeList,
"region": RegionCodeList,
"meta": MetaCodeList,
}


class DataStructureDefinition:
"""Definition of datastructure codelists for dimensions used in the IAMC format"""

def __init__(self, path, dimensions=["region", "variable"]):
"""

Parameters
----------
path : str or path-like
The folder with the project definitions.
dimensions : list of str, optional
List of :meth:`CodeList` names. Each CodeList is initialized
from a sub-folder of `path` of that name.
"""
if not isinstance(path, Path):
path = Path(path)

if not path.is_dir():
raise NotADirectoryError(f"Definitions directory not found: {path}")

self.dimensions = dimensions
for dim in self.dimensions:
self.__setattr__(
dim, SPECIAL_CODELIST.get(dim, CodeList).from_directory(dim, path / dim)
)

empty = [d for d in self.dimensions if not self.__getattribute__(d)]
if empty:
raise ValueError(f"Empty codelist: {', '.join(empty)}")

def validate(self, df: IamDataFrame, dimensions: list = None) -> None:
"""Validate that the coordinates of `df` are defined in the codelists

Parameters
----------
df : :class:`pyam.IamDataFrame`
Scenario data to be validated against the codelists of this instance.
dimensions : list of str, optional
Dimensions to perform validation (defaults to all dimensions of self)

Returns
-------
None

Raises
------
ValueError
If `df` fails validation against any codelist.
"""
validate(self, df, dimensions=dimensions or self.dimensions)

def check_aggregate(self, df: IamDataFrame, **kwargs) -> None:
"""Check for consistency of scenario data along the variable hierarchy

Parameters
----------
df : :class:`pyam.IamDataFrame`
Scenario data to be checked for consistency along the variable hierarchy.
kwargs : Tolerance arguments for comparison of values
Passed to :any:`numpy.isclose` via :any:`pyam.IamDataFrame.check_aggregate`.

Returns
-------
:class:`pandas.DataFrame` or None
Data where a variable and its computed aggregate does not match.

Raises
------
ValueError
If the :any:`DataStructureDefinition` does not have a *variable* dimension.
"""
if "variable" not in self.dimensions:
raise ValueError("Aggregation check requires 'variable' dimension.")

lst = []

with adjust_log_level(level="WARNING"):
for code in df.variable:
attr = self.variable.mapping[code]
if attr.check_aggregate:
components = attr.components

# check if multiple lists of components are given for a code
if isinstance(components, dict):
for name, _components in components.items():
error = df.check_aggregate(code, _components, **kwargs)
if error is not None:
error.dropna(inplace=True)
# append components-name to variable column
error.index = replace_index_labels(
error.index, "variable", [f"{code} [{name}]"]
)
lst.append(error)

# else use components provided as single list or pyam-default (None)
else:
error = df.check_aggregate(code, components, **kwargs)
if error is not None:
lst.append(error.dropna())

if lst:
# there may be empty dataframes due to `dropna()` above
error = pd.concat(lst)
return error if not error.empty else None

def to_excel(
self, excel_writer, sheet_name=None, sort_by_code: bool = False, **kwargs
):
"""Write the *variable* codelist to an Excel sheet

Parameters
----------
excel_writer : path-like, file-like, or ExcelWriter object
File path as string or :class:`pathlib.Path`,
or existing :class:`pandas.ExcelWriter`.
sheet_name : str, optional
Name of sheet that will have the codelist. If *None*, use the codelist name.
sort_by_code : bool, optional
Sort the codelist before exporting to file.
**kwargs
Passed to :class:`pandas.ExcelWriter` (if *excel_writer* is path-like).
"""
# TODO write all dimensions to the file
self.variable.to_excel(excel_writer, sheet_name, sort_by_code, **kwargs)
14 changes: 12 additions & 2 deletions nomenclature/validation_schemas/generic_schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,18 @@ definitions:
type: object
# The lower-level dictionary are the attributes
additionalProperties:
type: [ string, number, boolean, "null" ]
oneOf:
- type: string
- type: number
- type: boolean
- type: "null"
- type: array
items:
oneOf:
- type: string
- type: number
- type: boolean
- type: "null"
additionalProperties: false
minProperties: 1
maxProperties: 1
- type: string
4 changes: 4 additions & 0 deletions tests/data/meta/meta_indicators_allowed_values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
- Meta category with boolean values:
allowed_values: [True, False]
- Meta cat with int values:
allowed_values: [1, 2, 3]
11 changes: 10 additions & 1 deletion tests/test_code.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pytest

from nomenclature.code import Code, VariableCode, RegionCode
from nomenclature.code import Code, VariableCode, RegionCode, MetaCode


def test_variable_without_unit_raises():
Expand Down Expand Up @@ -55,3 +55,12 @@ def test_RegionCode_hierarchy_attribute():
)

assert reg.hierarchy == "R5"


def test_MetaCode_allowed_values_attribute():
phackstock marked this conversation as resolved.
Show resolved Hide resolved
meta = MetaCode(
name="MetaCode test",
allowed_values=[True],
)

assert meta.allowed_values == [True]
Loading