Skip to content

Commit

Permalink
Add an Iamslice (#657)
Browse files Browse the repository at this point in the history
Co-authored-by: Jonas Hoersch <jonas.hoersch@climateanalytics.org>
  • Loading branch information
danielhuppmann and coroa authored May 12, 2022
1 parent 764a85e commit c5e8f9c
Show file tree
Hide file tree
Showing 10 changed files with 240 additions and 49 deletions.
4 changes: 4 additions & 0 deletions RELEASE_NOTES.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# Next release

- [#657](https://github.com/IAMconsortium/pyam/pull/657) Add an `IamSlice` class

# Release v1.4.0

## Highlights
Expand Down
1 change: 1 addition & 0 deletions doc/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ and methods.
api/general
api/iamdataframe
api/database
api/slice
api/filtering
api/compute
api/plotting
Expand Down
7 changes: 7 additions & 0 deletions doc/source/api/slice.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
.. currentmodule:: pyam

The **IamSlice** class
======================

.. autoclass:: IamSlice
:members: dimensions, time, info
9 changes: 5 additions & 4 deletions pyam/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,17 @@
from importlib_metadata import version, PackageNotFoundError

from pyam.core import *
from pyam.slice import IamSlice # noqa: F401
from pyam.utils import *
from pyam.statistics import *
from pyam.timeseries import *
from pyam.read_ixmp import *
from pyam.logging import *
from pyam.run_control import *
from pyam.iiasa import read_iiasa
from pyam.datareader import read_worldbank
from pyam.unfccc import read_unfccc
from pyam.testing import assert_iamframe_equal
from pyam.iiasa import read_iiasa # noqa: F401
from pyam.datareader import read_worldbank # noqa: F401
from pyam.unfccc import read_unfccc # noqa: F401
from pyam.testing import assert_iamframe_equal # noqa: F401

from pyam.logging import defer_logging_config

Expand Down
93 changes: 64 additions & 29 deletions pyam/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

from tempfile import TemporaryDirectory

from pyam.slice import IamSlice
from pyam.filter import filter_by_time_domain, filter_by_year, filter_by_dt_arg

try:
Expand Down Expand Up @@ -255,7 +256,9 @@ def _finalize(self, data, append, **args):

def __getitem__(self, key):
_key_check = [key] if isstr(key) else key
if key == "value":
if isinstance(key, IamSlice):
return IamDataFrame(self._data.loc[key])
elif key == "value":
return pd.Series(self._data.values, name="value")
elif set(_key_check).issubset(self.meta.columns):
return self.meta.__getitem__(key)
Expand Down Expand Up @@ -291,7 +294,7 @@ def info(self, n=80, meta_rows=5, memory_usage=False):
c2 = n - c1 - 5
info += "\n".join(
[
f" * {i:{c1}}: {print_list(get_index_levels(self._data, i), c2)}"
f" * {i:{c1}}: {print_list(getattr(self, i), c2)}"
for i in self.index.names
]
)
Expand All @@ -300,7 +303,7 @@ def info(self, n=80, meta_rows=5, memory_usage=False):
info += "\nTimeseries data coordinates:\n"
info += "\n".join(
[
f" {i:{c1}}: {print_list(get_index_levels(self._data, i), c2)}"
f" {i:{c1}}: {print_list(getattr(self, i), c2)}"
for i in self.dimensions
if i not in self.index.names
]
Expand Down Expand Up @@ -414,13 +417,16 @@ def list_or_str(x):
def time(self):
"""The time index, i.e., axis labels related to the time domain.
The returned type is
- :class:`pandas.Int64Index` if the time_domain is 'year'
- :class:`pandas.DatetimeIndex` if the time domain is 'datetime'
- :class:`pandas.Index` if the time domain is 'mixed'
Returns
-------
- A :class:`pandas.Int64Index` if the :attr:`time_domain` is 'year'
- A :class:`pandas.DatetimeIndex` if the :attr:`time_domain` is 'datetime'
- A :class:`pandas.Index` if the :attr:`time_domain` is 'mixed'
"""
if self._time is None:
self._time = pd.Index(get_index_levels(self._data, self.time_col))
self._time = pd.Index(
self._data.index.unique(level=self.time_col).values, name="time"
)

return self._time

Expand Down Expand Up @@ -1712,38 +1718,67 @@ def _exclude_on_fail(self, df):
)
)

def slice(self, keep=True, **kwargs):
"""Return a (filtered) slice object of the IamDataFrame timeseries data index
Parameters
----------
keep : bool, optional
Keep all scenarios satisfying the filters (if *True*) or the inverse.
**kwargs
Arguments for filtering. See the "Notes".
Returns
-------
:class:`IamSlice`
Notes
-----
The following arguments are available for filtering:
- 'meta' columns: filter by string value of that column
- 'model', 'scenario', 'region', 'variable', 'unit':
string or list of strings, where `*` can be used as a wildcard
- 'level': the "depth" of entries in the variable column (number of '|')
(excluding the strings given in the 'variable' argument)
- 'year': takes an integer (int/np.int64), a list of integers or
a range. Note that the last year of a range is not included,
so `range(2010, 2015)` is interpreted as `[2010, ..., 2014]`
- 'time_domain': can be "year" or "datetime"
- arguments for filtering by `datetime.datetime` or np.datetime64
('month', 'hour', 'time')
- 'regexp=True' disables pseudo-regexp syntax in `pattern_match()`
"""

if not isinstance(keep, bool):
raise ValueError(f"Value of `keep` must be a boolean, found: {keep}")

_keep = self._apply_filters(**kwargs)
_keep = _keep if keep else ~_keep

return (
IamSlice(_keep)
if isinstance(_keep, pd.Series)
else IamSlice(_keep, self._data.index)
)

def filter(self, keep=True, inplace=False, **kwargs):
"""Return a (copy of a) filtered (downselected) IamDataFrame
Parameters
----------
keep : bool, optional
keep all scenarios satisfying the filters (if True) or the inverse
Keep all scenarios satisfying the filters (if *True*) or the inverse.
inplace : bool, optional
if True, do operation inplace and return None
filters by kwargs:
The following columns are available for filtering:
- 'meta' columns: filter by string value of that column
- 'model', 'scenario', 'region', 'variable', 'unit':
string or list of strings, where `*` can be used as a wildcard
- 'level': the maximum "depth" of IAM variables (number of '|')
(excluding the strings given in the 'variable' argument)
- 'year': takes an integer (int/np.int64), a list of integers or
a range. Note that the last year of a range is not included,
so `range(2010, 2015)` is interpreted as `[2010, ..., 2014]`
- 'time_domain': can be "year" or "datetime"
- arguments for filtering by `datetime.datetime` or np.datetime64
('month', 'hour', 'time')
- 'regexp=True' disables pseudo-regexp syntax in `pattern_match()`
If *True*, do operation inplace and return *None*.
**kwargs
Passed to :meth:`slice`.
"""
if not isinstance(keep, bool):
raise ValueError(f"Cannot filter by `keep={keep}`, must be a boolean!")

# downselect `data` rows and clean up index
_keep = self._apply_filters(**kwargs)
_keep = _keep if keep else ~_keep
ret = self.copy() if not inplace else self
ret._data = ret._data[_keep]
ret._data = ret._data[self.slice(keep=keep, **kwargs)]
ret._data.index = ret._data.index.remove_unused_levels()

# swap time for year if downselected to years-only
Expand Down
88 changes: 88 additions & 0 deletions pyam/slice.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import pandas as pd
from pyam.utils import print_list


class IamSlice(pd.Series):
"""A slice object of the IamDataFrame timeseries data index"""

@property
def _constructor(self):
return IamSlice

_internal_names = pd.Series._internal_names + ["_iamcache"]
_internal_names_set = set(_internal_names)

def __init__(self, data=None, index=None, **kwargs):
super().__init__(data, index, **kwargs)
self._iamcache = dict()

def __dir__(self):
return self.dimensions + super().__dir__()

def __getattr__(self, attr):
try:
return super().__getattr__(attr)
except AttributeError:
cache = object.__getattribute__(self, "_iamcache")
ret = cache.get(attr)
if ret is not None:
return ret.tolist()

if attr in self.dimensions:
ret = cache[attr] = self.index[self].unique(level=attr)
return ret.tolist()

raise

def __len__(self):
return self.sum()

@property
def dimensions(self):
"""Return the list of index names & data coordinates"""
return self.index.names

@property
def time(self):
"""The time index, i.e., axis labels related to the time domain.
Returns
-------
- A :class:`pandas.Int64Index` if the time-domain is 'year'
- A :class:`pandas.DatetimeIndex` if the time-domain is 'datetime'
- A :class:`pandas.Index` if the time-domain is 'mixed'
"""
ret = self._iamcache.get("time")
if ret is None:
ret = self._iamcache["time"] = (
self.index[self].unique(level=self.time_col).rename("time")
)
return ret

@property
def time_col(self):
return "year" if "year" in self.dimensions else "time"

def __repr__(self):
return self.info()

def info(self, n=80):
"""Print a summary of the represented index dimensions and data coordinates
Parameters
----------
n : int
The maximum line length
"""
# concatenate list of index dimensions and levels
info = f"{type(self)}\nIndex dimensions and data coordinates:\n"
c1 = max([len(i) for i in self.dimensions]) + 1
c2 = n - c1 - 5
info += "\n".join(
[
f" {i:{c1}}: {print_list(getattr(self, i), c2)}"
for i in self.dimensions
]
)

return info
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@

DTS_MAPPING = {2005: TEST_DTS[0], 2010: TEST_DTS[1]}

EXP_DATETIME_INDEX = pd.DatetimeIndex(["2005-06-17T00:00:00"])
EXP_DATETIME_INDEX = pd.DatetimeIndex(["2005-06-17T00:00:00"], name="time")


TEST_DF = pd.DataFrame(
Expand Down
21 changes: 12 additions & 9 deletions tests/test_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,22 @@
from .conftest import EXP_DATETIME_INDEX


def test_filter_error_illegal_column(test_df):
@pytest.mark.parametrize("method", ("filter", "slice"))
def test_filter_error_illegal_column(test_df, method):
# filtering by column `foo` is not valid
pytest.raises(ValueError, test_df.filter, foo="test")
pytest.raises(ValueError, getattr(test_df, method), foo="test")


def test_filter_error_keep(test_df):
@pytest.mark.parametrize("method", ("filter", "slice"))
def test_filter_error_keep(test_df, method):
# string or non-starred dict was mis-interpreted as `keep` kwarg, see #253
pytest.raises(ValueError, test_df.filter, model="foo", keep=1)
pytest.raises(ValueError, test_df.filter, dict(model="foo"))
pytest.raises(ValueError, getattr(test_df, method), model="foo", keep=1)
pytest.raises(ValueError, getattr(test_df, method), dict(model="foo"))


def test_filter_year(test_df):
obs = test_df.filter(year=2005)
@pytest.mark.parametrize("method", ("filter", "slice"))
def test_filter_year(test_df, method):
obs = getattr(test_df, method)(year=2005)
if test_df.time_col == "year":
assert obs.year == [2005]
else:
Expand All @@ -45,14 +48,14 @@ def test_filter_mixed_time_domain(test_df_mixed, arg_year, arg_time):
# filtering to datetime-only works as expected
obs = test_df_mixed.filter(**arg_time)
assert obs.time_domain == "datetime"
pdt.assert_index_equal(obs.time, pd.DatetimeIndex(["2010-07-21"]))
pdt.assert_index_equal(obs.time, pd.DatetimeIndex(["2010-07-21"], name="time"))

# filtering to year-only works as expected including changing of time domain
obs = test_df_mixed.filter(**arg_year)
assert obs.time_col == "year"
assert obs.time_domain == "year"
assert obs.year == [2005]
pdt.assert_index_equal(obs.time, pd.Int64Index([2005]))
pdt.assert_index_equal(obs.time, pd.Int64Index([2005], name="time"))


def test_filter_time_domain_raises(test_df_year):
Expand Down
52 changes: 52 additions & 0 deletions tests/test_slice.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import pandas as pd
import pytest


def test_slice_len(test_df_year):
"""Check the length of a slice"""

assert len(test_df_year.slice(scenario="scen_a")) == 4


def test_slice_index_attributes(test_df):
# assert that the index and data column attributes are set correctly in an IamSlice

s = test_df.slice()

assert s.model == ["model_a"]
assert s.scenario == ["scen_a", "scen_b"]
assert s.region == ["World"]
assert s.variable == ["Primary Energy", "Primary Energy|Coal"]
assert s.unit == ["EJ/yr"]
if test_df.time_col == "year":
assert s.year == [2005, 2010]
else:
match = "'IamSlice' object has no attribute 'year'"
with pytest.raises(AttributeError, match=match):
s.year
assert s.time.equals(pd.Index(test_df.data[test_df.time_col].unique()))


def test_filtered_slice_index_attributes(test_df_year):
# assert that the attributes are set correctly in a filtered IamSlice

s = test_df_year.slice(scenario="scen_b")
assert s.scenario == ["scen_b"]


def test_print(test_df_year):
"""Assert that `print(IamSlice)` (and `info()`) returns as expected"""
exp = "\n".join(
[
"<class 'pyam.slice.IamSlice'>",
"Index dimensions and data coordinates:",
" model : model_a (1)",
" scenario : scen_a, scen_b (2)",
" region : World (1)",
" variable : Primary Energy, Primary Energy|Coal (2)",
" unit : EJ/yr (1)",
" year : 2005, 2010 (2)",
]
)
obs = test_df_year.slice().info()
assert obs == exp
Loading

0 comments on commit c5e8f9c

Please sign in to comment.