diff --git a/nomenclature/code.py b/nomenclature/code.py index 30fa06ec..48078923 100644 --- a/nomenclature/code.py +++ b/nomenclature/code.py @@ -209,3 +209,16 @@ class RegionCode(Code): """ hierarchy: str = None + + +class MetaCode(Code): + """Code object with allowed values list + + Attributes + ---------- + allowed_values : Optional(list[any]) + An optional list of allowed values + + """ + + allowed_values: Optional[List[Any]] diff --git a/nomenclature/codelist.py b/nomenclature/codelist.py index a9cb51ac..4df35961 100644 --- a/nomenclature/codelist.py +++ b/nomenclature/codelist.py @@ -9,7 +9,7 @@ from pyam.utils import write_sheet from pydantic import BaseModel, validator -from nomenclature.code import Code, VariableCode, RegionCode +from nomenclature.code import Code, VariableCode, RegionCode, MetaCode from nomenclature.error.codelist import DuplicateCodeError from nomenclature.error.variable import ( MissingWeightError, @@ -624,3 +624,19 @@ def filter(self, hierarchy: str) -> "RegionCodeList": "Use `RegionCodeList.hierarchy` method for available items." ) raise ValueError(msg) + + +class MetaCodeList(CodeList): + """A subclass of CodeList specified for MetaCodes + + Attributes + ---------- + name : str + Name of the MetaCodeList + mapping : dict + Dictionary of `MetaCode` objects + + """ + + code_basis: ClassVar = MetaCode + validation_schema: ClassVar[str] = "generic" diff --git a/nomenclature/definition.py b/nomenclature/definition.py index 45bab750..8f9b100f 100644 --- a/nomenclature/definition.py +++ b/nomenclature/definition.py @@ -1,139 +1,148 @@ -import logging -from pathlib import Path - -import pandas as pd -from pyam import IamDataFrame -from pyam.index import replace_index_labels -from pyam.logging import adjust_log_level - -from nomenclature.codelist import CodeList, RegionCodeList, VariableCodeList -from nomenclature.validation import validate - -logger = logging.getLogger(__name__) -SPECIAL_CODELIST = {"variable": VariableCodeList, "region": RegionCodeList} - - -class DataStructureDefinition: - """Definition of datastructure codelists for dimensions used in the IAMC format""" - - def __init__(self, path, dimensions=["region", "variable"]): - """ - - Parameters - ---------- - path : str or path-like - The folder with the project definitions. - dimensions : list of str, optional - List of :meth:`CodeList` names. Each CodeList is initialized - from a sub-folder of `path` of that name. - """ - if not isinstance(path, Path): - path = Path(path) - - if not path.is_dir(): - raise NotADirectoryError(f"Definitions directory not found: {path}") - - self.dimensions = dimensions - for dim in self.dimensions: - self.__setattr__( - dim, SPECIAL_CODELIST.get(dim, CodeList).from_directory(dim, path / dim) - ) - - empty = [d for d in self.dimensions if not self.__getattribute__(d)] - if empty: - raise ValueError(f"Empty codelist: {', '.join(empty)}") - - def validate(self, df: IamDataFrame, dimensions: list = None) -> None: - """Validate that the coordinates of `df` are defined in the codelists - - Parameters - ---------- - df : :class:`pyam.IamDataFrame` - Scenario data to be validated against the codelists of this instance. - dimensions : list of str, optional - Dimensions to perform validation (defaults to all dimensions of self) - - Returns - ------- - None - - Raises - ------ - ValueError - If `df` fails validation against any codelist. - """ - validate(self, df, dimensions=dimensions or self.dimensions) - - def check_aggregate(self, df: IamDataFrame, **kwargs) -> None: - """Check for consistency of scenario data along the variable hierarchy - - Parameters - ---------- - df : :class:`pyam.IamDataFrame` - Scenario data to be checked for consistency along the variable hierarchy. - kwargs : Tolerance arguments for comparison of values - Passed to :any:`numpy.isclose` via :any:`pyam.IamDataFrame.check_aggregate`. - - Returns - ------- - :class:`pandas.DataFrame` or None - Data where a variable and its computed aggregate does not match. - - Raises - ------ - ValueError - If the :any:`DataStructureDefinition` does not have a *variable* dimension. - """ - if "variable" not in self.dimensions: - raise ValueError("Aggregation check requires 'variable' dimension.") - - lst = [] - - with adjust_log_level(level="WARNING"): - for code in df.variable: - attr = self.variable.mapping[code] - if attr.check_aggregate: - components = attr.components - - # check if multiple lists of components are given for a code - if isinstance(components, dict): - for name, _components in components.items(): - error = df.check_aggregate(code, _components, **kwargs) - if error is not None: - error.dropna(inplace=True) - # append components-name to variable column - error.index = replace_index_labels( - error.index, "variable", [f"{code} [{name}]"] - ) - lst.append(error) - - # else use components provided as single list or pyam-default (None) - else: - error = df.check_aggregate(code, components, **kwargs) - if error is not None: - lst.append(error.dropna()) - - if lst: - # there may be empty dataframes due to `dropna()` above - error = pd.concat(lst) - return error if not error.empty else None - - def to_excel( - self, excel_writer, sheet_name=None, sort_by_code: bool = False, **kwargs - ): - """Write the *variable* codelist to an Excel sheet - - Parameters - ---------- - excel_writer : path-like, file-like, or ExcelWriter object - File path as string or :class:`pathlib.Path`, - or existing :class:`pandas.ExcelWriter`. - sheet_name : str, optional - Name of sheet that will have the codelist. If *None*, use the codelist name. - sort_by_code : bool, optional - Sort the codelist before exporting to file. - **kwargs - Passed to :class:`pandas.ExcelWriter` (if *excel_writer* is path-like). - """ - # TODO write all dimensions to the file - self.variable.to_excel(excel_writer, sheet_name, sort_by_code, **kwargs) +import logging +from pathlib import Path + +import pandas as pd +from pyam import IamDataFrame +from pyam.index import replace_index_labels +from pyam.logging import adjust_log_level + +from nomenclature.codelist import ( + CodeList, + RegionCodeList, + VariableCodeList, + MetaCodeList, +) +from nomenclature.validation import validate + +logger = logging.getLogger(__name__) +SPECIAL_CODELIST = { + "variable": VariableCodeList, + "region": RegionCodeList, + "meta": MetaCodeList, +} + + +class DataStructureDefinition: + """Definition of datastructure codelists for dimensions used in the IAMC format""" + + def __init__(self, path, dimensions=["region", "variable"]): + """ + + Parameters + ---------- + path : str or path-like + The folder with the project definitions. + dimensions : list of str, optional + List of :meth:`CodeList` names. Each CodeList is initialized + from a sub-folder of `path` of that name. + """ + if not isinstance(path, Path): + path = Path(path) + + if not path.is_dir(): + raise NotADirectoryError(f"Definitions directory not found: {path}") + + self.dimensions = dimensions + for dim in self.dimensions: + self.__setattr__( + dim, SPECIAL_CODELIST.get(dim, CodeList).from_directory(dim, path / dim) + ) + + empty = [d for d in self.dimensions if not self.__getattribute__(d)] + if empty: + raise ValueError(f"Empty codelist: {', '.join(empty)}") + + def validate(self, df: IamDataFrame, dimensions: list = None) -> None: + """Validate that the coordinates of `df` are defined in the codelists + + Parameters + ---------- + df : :class:`pyam.IamDataFrame` + Scenario data to be validated against the codelists of this instance. + dimensions : list of str, optional + Dimensions to perform validation (defaults to all dimensions of self) + + Returns + ------- + None + + Raises + ------ + ValueError + If `df` fails validation against any codelist. + """ + validate(self, df, dimensions=dimensions or self.dimensions) + + def check_aggregate(self, df: IamDataFrame, **kwargs) -> None: + """Check for consistency of scenario data along the variable hierarchy + + Parameters + ---------- + df : :class:`pyam.IamDataFrame` + Scenario data to be checked for consistency along the variable hierarchy. + kwargs : Tolerance arguments for comparison of values + Passed to :any:`numpy.isclose` via :any:`pyam.IamDataFrame.check_aggregate`. + + Returns + ------- + :class:`pandas.DataFrame` or None + Data where a variable and its computed aggregate does not match. + + Raises + ------ + ValueError + If the :any:`DataStructureDefinition` does not have a *variable* dimension. + """ + if "variable" not in self.dimensions: + raise ValueError("Aggregation check requires 'variable' dimension.") + + lst = [] + + with adjust_log_level(level="WARNING"): + for code in df.variable: + attr = self.variable.mapping[code] + if attr.check_aggregate: + components = attr.components + + # check if multiple lists of components are given for a code + if isinstance(components, dict): + for name, _components in components.items(): + error = df.check_aggregate(code, _components, **kwargs) + if error is not None: + error.dropna(inplace=True) + # append components-name to variable column + error.index = replace_index_labels( + error.index, "variable", [f"{code} [{name}]"] + ) + lst.append(error) + + # else use components provided as single list or pyam-default (None) + else: + error = df.check_aggregate(code, components, **kwargs) + if error is not None: + lst.append(error.dropna()) + + if lst: + # there may be empty dataframes due to `dropna()` above + error = pd.concat(lst) + return error if not error.empty else None + + def to_excel( + self, excel_writer, sheet_name=None, sort_by_code: bool = False, **kwargs + ): + """Write the *variable* codelist to an Excel sheet + + Parameters + ---------- + excel_writer : path-like, file-like, or ExcelWriter object + File path as string or :class:`pathlib.Path`, + or existing :class:`pandas.ExcelWriter`. + sheet_name : str, optional + Name of sheet that will have the codelist. If *None*, use the codelist name. + sort_by_code : bool, optional + Sort the codelist before exporting to file. + **kwargs + Passed to :class:`pandas.ExcelWriter` (if *excel_writer* is path-like). + """ + # TODO write all dimensions to the file + self.variable.to_excel(excel_writer, sheet_name, sort_by_code, **kwargs) diff --git a/nomenclature/validation_schemas/generic_schema.yaml b/nomenclature/validation_schemas/generic_schema.yaml index 1bf8d313..0ebb86ab 100644 --- a/nomenclature/validation_schemas/generic_schema.yaml +++ b/nomenclature/validation_schemas/generic_schema.yaml @@ -17,8 +17,18 @@ definitions: type: object # The lower-level dictionary are the attributes additionalProperties: - type: [ string, number, boolean, "null" ] + oneOf: + - type: string + - type: number + - type: boolean + - type: "null" + - type: array + items: + oneOf: + - type: string + - type: number + - type: boolean + - type: "null" additionalProperties: false minProperties: 1 - maxProperties: 1 - type: string diff --git a/tests/data/meta/meta_indicators_allowed_values.yaml b/tests/data/meta/meta_indicators_allowed_values.yaml new file mode 100644 index 00000000..761c05d4 --- /dev/null +++ b/tests/data/meta/meta_indicators_allowed_values.yaml @@ -0,0 +1,4 @@ +- Meta category with boolean values: + allowed_values: [True, False] +- Meta cat with int values: + allowed_values: [1, 2, 3] \ No newline at end of file diff --git a/tests/test_code.py b/tests/test_code.py index dd88f45a..d4d86b82 100644 --- a/tests/test_code.py +++ b/tests/test_code.py @@ -1,6 +1,6 @@ import pytest -from nomenclature.code import Code, VariableCode, RegionCode +from nomenclature.code import Code, VariableCode, RegionCode, MetaCode def test_variable_without_unit_raises(): @@ -55,3 +55,12 @@ def test_RegionCode_hierarchy_attribute(): ) assert reg.hierarchy == "R5" + + +def test_MetaCode_allowed_values_attribute(): + meta = MetaCode( + name="MetaCode test", + allowed_values=[True], + ) + + assert meta.allowed_values == [True] diff --git a/tests/test_codelist.py b/tests/test_codelist.py index 8842564e..018f836b 100644 --- a/tests/test_codelist.py +++ b/tests/test_codelist.py @@ -3,8 +3,13 @@ import pandas.testing as pdt import logging -from nomenclature.code import Code, RegionCode -from nomenclature.codelist import CodeList, VariableCodeList, RegionCodeList +from nomenclature.code import Code, RegionCode, MetaCode +from nomenclature.codelist import ( + CodeList, + VariableCodeList, + RegionCodeList, + MetaCodeList, +) from nomenclature.error.codelist import DuplicateCodeError from conftest import TEST_DATA_DIR, remove_file_from_mapping @@ -306,3 +311,23 @@ def test_codelist_general_filter_No_Elements(caplog): assert len(caplog.records) == 1 assert caplog.records[0].levelname == "WARNING" assert caplog.records[0].message == "Formatted data is empty!" + + +def test_MetaCodeList_from_directory(): + obs = MetaCodeList.from_directory(name="Meta", path=TEST_DATA_DIR / "meta") + mapping = { + "Meta category with boolean values": MetaCode( + name="Meta category with boolean values", + description=None, + extra_attributes={"file": "meta/meta_indicators_allowed_values.yaml"}, + allowed_values=[True, False], + ), + "Meta cat with int values": MetaCode( + name="Meta cat with int values", + description=None, + extra_attributes={"file": "meta/meta_indicators_allowed_values.yaml"}, + allowed_values=[1, 2, 3], + ), + } + exp = MetaCodeList(name="Meta", mapping=mapping) + assert obs == exp