Skip to content

Commit

Permalink
MetaCode and MetaCodeList classes with allowed_values attribute (#246)
Browse files Browse the repository at this point in the history
  • Loading branch information
GretchenSchowalter committed May 12, 2023
1 parent 885a5f0 commit ed6baf3
Show file tree
Hide file tree
Showing 7 changed files with 231 additions and 145 deletions.
13 changes: 13 additions & 0 deletions nomenclature/code.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,3 +209,16 @@ class RegionCode(Code):
"""

hierarchy: str = None


class MetaCode(Code):
"""Code object with allowed values list
Attributes
----------
allowed_values : Optional(list[any])
An optional list of allowed values
"""

allowed_values: Optional[List[Any]]
18 changes: 17 additions & 1 deletion nomenclature/codelist.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from pyam.utils import write_sheet
from pydantic import BaseModel, validator

from nomenclature.code import Code, VariableCode, RegionCode
from nomenclature.code import Code, VariableCode, RegionCode, MetaCode
from nomenclature.error.codelist import DuplicateCodeError
from nomenclature.error.variable import (
MissingWeightError,
Expand Down Expand Up @@ -624,3 +624,19 @@ def filter(self, hierarchy: str) -> "RegionCodeList":
"Use `RegionCodeList.hierarchy` method for available items."
)
raise ValueError(msg)


class MetaCodeList(CodeList):
"""A subclass of CodeList specified for MetaCodes
Attributes
----------
name : str
Name of the MetaCodeList
mapping : dict
Dictionary of `MetaCode` objects
"""

code_basis: ClassVar = MetaCode
validation_schema: ClassVar[str] = "generic"
287 changes: 148 additions & 139 deletions nomenclature/definition.py
Original file line number Diff line number Diff line change
@@ -1,139 +1,148 @@
import logging
from pathlib import Path

import pandas as pd
from pyam import IamDataFrame
from pyam.index import replace_index_labels
from pyam.logging import adjust_log_level

from nomenclature.codelist import CodeList, RegionCodeList, VariableCodeList
from nomenclature.validation import validate

logger = logging.getLogger(__name__)
SPECIAL_CODELIST = {"variable": VariableCodeList, "region": RegionCodeList}


class DataStructureDefinition:
"""Definition of datastructure codelists for dimensions used in the IAMC format"""

def __init__(self, path, dimensions=["region", "variable"]):
"""
Parameters
----------
path : str or path-like
The folder with the project definitions.
dimensions : list of str, optional
List of :meth:`CodeList` names. Each CodeList is initialized
from a sub-folder of `path` of that name.
"""
if not isinstance(path, Path):
path = Path(path)

if not path.is_dir():
raise NotADirectoryError(f"Definitions directory not found: {path}")

self.dimensions = dimensions
for dim in self.dimensions:
self.__setattr__(
dim, SPECIAL_CODELIST.get(dim, CodeList).from_directory(dim, path / dim)
)

empty = [d for d in self.dimensions if not self.__getattribute__(d)]
if empty:
raise ValueError(f"Empty codelist: {', '.join(empty)}")

def validate(self, df: IamDataFrame, dimensions: list = None) -> None:
"""Validate that the coordinates of `df` are defined in the codelists
Parameters
----------
df : :class:`pyam.IamDataFrame`
Scenario data to be validated against the codelists of this instance.
dimensions : list of str, optional
Dimensions to perform validation (defaults to all dimensions of self)
Returns
-------
None
Raises
------
ValueError
If `df` fails validation against any codelist.
"""
validate(self, df, dimensions=dimensions or self.dimensions)

def check_aggregate(self, df: IamDataFrame, **kwargs) -> None:
"""Check for consistency of scenario data along the variable hierarchy
Parameters
----------
df : :class:`pyam.IamDataFrame`
Scenario data to be checked for consistency along the variable hierarchy.
kwargs : Tolerance arguments for comparison of values
Passed to :any:`numpy.isclose` via :any:`pyam.IamDataFrame.check_aggregate`.
Returns
-------
:class:`pandas.DataFrame` or None
Data where a variable and its computed aggregate does not match.
Raises
------
ValueError
If the :any:`DataStructureDefinition` does not have a *variable* dimension.
"""
if "variable" not in self.dimensions:
raise ValueError("Aggregation check requires 'variable' dimension.")

lst = []

with adjust_log_level(level="WARNING"):
for code in df.variable:
attr = self.variable.mapping[code]
if attr.check_aggregate:
components = attr.components

# check if multiple lists of components are given for a code
if isinstance(components, dict):
for name, _components in components.items():
error = df.check_aggregate(code, _components, **kwargs)
if error is not None:
error.dropna(inplace=True)
# append components-name to variable column
error.index = replace_index_labels(
error.index, "variable", [f"{code} [{name}]"]
)
lst.append(error)

# else use components provided as single list or pyam-default (None)
else:
error = df.check_aggregate(code, components, **kwargs)
if error is not None:
lst.append(error.dropna())

if lst:
# there may be empty dataframes due to `dropna()` above
error = pd.concat(lst)
return error if not error.empty else None

def to_excel(
self, excel_writer, sheet_name=None, sort_by_code: bool = False, **kwargs
):
"""Write the *variable* codelist to an Excel sheet
Parameters
----------
excel_writer : path-like, file-like, or ExcelWriter object
File path as string or :class:`pathlib.Path`,
or existing :class:`pandas.ExcelWriter`.
sheet_name : str, optional
Name of sheet that will have the codelist. If *None*, use the codelist name.
sort_by_code : bool, optional
Sort the codelist before exporting to file.
**kwargs
Passed to :class:`pandas.ExcelWriter` (if *excel_writer* is path-like).
"""
# TODO write all dimensions to the file
self.variable.to_excel(excel_writer, sheet_name, sort_by_code, **kwargs)
import logging
from pathlib import Path

import pandas as pd
from pyam import IamDataFrame
from pyam.index import replace_index_labels
from pyam.logging import adjust_log_level

from nomenclature.codelist import (
CodeList,
RegionCodeList,
VariableCodeList,
MetaCodeList,
)
from nomenclature.validation import validate

logger = logging.getLogger(__name__)
SPECIAL_CODELIST = {
"variable": VariableCodeList,
"region": RegionCodeList,
"meta": MetaCodeList,
}


class DataStructureDefinition:
"""Definition of datastructure codelists for dimensions used in the IAMC format"""

def __init__(self, path, dimensions=["region", "variable"]):
"""
Parameters
----------
path : str or path-like
The folder with the project definitions.
dimensions : list of str, optional
List of :meth:`CodeList` names. Each CodeList is initialized
from a sub-folder of `path` of that name.
"""
if not isinstance(path, Path):
path = Path(path)

if not path.is_dir():
raise NotADirectoryError(f"Definitions directory not found: {path}")

self.dimensions = dimensions
for dim in self.dimensions:
self.__setattr__(
dim, SPECIAL_CODELIST.get(dim, CodeList).from_directory(dim, path / dim)
)

empty = [d for d in self.dimensions if not self.__getattribute__(d)]
if empty:
raise ValueError(f"Empty codelist: {', '.join(empty)}")

def validate(self, df: IamDataFrame, dimensions: list = None) -> None:
"""Validate that the coordinates of `df` are defined in the codelists
Parameters
----------
df : :class:`pyam.IamDataFrame`
Scenario data to be validated against the codelists of this instance.
dimensions : list of str, optional
Dimensions to perform validation (defaults to all dimensions of self)
Returns
-------
None
Raises
------
ValueError
If `df` fails validation against any codelist.
"""
validate(self, df, dimensions=dimensions or self.dimensions)

def check_aggregate(self, df: IamDataFrame, **kwargs) -> None:
"""Check for consistency of scenario data along the variable hierarchy
Parameters
----------
df : :class:`pyam.IamDataFrame`
Scenario data to be checked for consistency along the variable hierarchy.
kwargs : Tolerance arguments for comparison of values
Passed to :any:`numpy.isclose` via :any:`pyam.IamDataFrame.check_aggregate`.
Returns
-------
:class:`pandas.DataFrame` or None
Data where a variable and its computed aggregate does not match.
Raises
------
ValueError
If the :any:`DataStructureDefinition` does not have a *variable* dimension.
"""
if "variable" not in self.dimensions:
raise ValueError("Aggregation check requires 'variable' dimension.")

lst = []

with adjust_log_level(level="WARNING"):
for code in df.variable:
attr = self.variable.mapping[code]
if attr.check_aggregate:
components = attr.components

# check if multiple lists of components are given for a code
if isinstance(components, dict):
for name, _components in components.items():
error = df.check_aggregate(code, _components, **kwargs)
if error is not None:
error.dropna(inplace=True)
# append components-name to variable column
error.index = replace_index_labels(
error.index, "variable", [f"{code} [{name}]"]
)
lst.append(error)

# else use components provided as single list or pyam-default (None)
else:
error = df.check_aggregate(code, components, **kwargs)
if error is not None:
lst.append(error.dropna())

if lst:
# there may be empty dataframes due to `dropna()` above
error = pd.concat(lst)
return error if not error.empty else None

def to_excel(
self, excel_writer, sheet_name=None, sort_by_code: bool = False, **kwargs
):
"""Write the *variable* codelist to an Excel sheet
Parameters
----------
excel_writer : path-like, file-like, or ExcelWriter object
File path as string or :class:`pathlib.Path`,
or existing :class:`pandas.ExcelWriter`.
sheet_name : str, optional
Name of sheet that will have the codelist. If *None*, use the codelist name.
sort_by_code : bool, optional
Sort the codelist before exporting to file.
**kwargs
Passed to :class:`pandas.ExcelWriter` (if *excel_writer* is path-like).
"""
# TODO write all dimensions to the file
self.variable.to_excel(excel_writer, sheet_name, sort_by_code, **kwargs)
14 changes: 12 additions & 2 deletions nomenclature/validation_schemas/generic_schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,18 @@ definitions:
type: object
# The lower-level dictionary are the attributes
additionalProperties:
type: [ string, number, boolean, "null" ]
oneOf:
- type: string
- type: number
- type: boolean
- type: "null"
- type: array
items:
oneOf:
- type: string
- type: number
- type: boolean
- type: "null"
additionalProperties: false
minProperties: 1
maxProperties: 1
- type: string
4 changes: 4 additions & 0 deletions tests/data/meta/meta_indicators_allowed_values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
- Meta category with boolean values:
allowed_values: [True, False]
- Meta cat with int values:
allowed_values: [1, 2, 3]
11 changes: 10 additions & 1 deletion tests/test_code.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pytest

from nomenclature.code import Code, VariableCode, RegionCode
from nomenclature.code import Code, VariableCode, RegionCode, MetaCode


def test_variable_without_unit_raises():
Expand Down Expand Up @@ -55,3 +55,12 @@ def test_RegionCode_hierarchy_attribute():
)

assert reg.hierarchy == "R5"


def test_MetaCode_allowed_values_attribute():
meta = MetaCode(
name="MetaCode test",
allowed_values=[True],
)

assert meta.allowed_values == [True]
Loading

0 comments on commit ed6baf3

Please sign in to comment.