From ebcf032758a879d4a683562cb89140563b4f3151 Mon Sep 17 00:00:00 2001 From: Daniel Huppmann Date: Tue, 17 Dec 2024 07:01:13 +0100 Subject: [PATCH] Add `sort_data()` method (#897) --- RELEASE_NOTES.md | 1 + pyam/core.py | 22 ++++++++ pyam/utils.py | 16 +++--- tests/test_core.py | 15 +++--- tests/test_data.py | 51 +++++++++++++++++++ ...{test_data_unfccc.py => test_io_unfccc.py} | 0 ...data_worldbank.py => test_io_worldbank.py} | 0 7 files changed, 89 insertions(+), 16 deletions(-) create mode 100644 tests/test_data.py rename tests/{test_data_unfccc.py => test_io_unfccc.py} (100%) rename tests/{test_data_worldbank.py => test_io_worldbank.py} (100%) diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 4d6262a30..808b70eeb 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -2,6 +2,7 @@ The next release must be bumped to v3.0.0. +- [#896](https://github.com/IAMconsortium/pyam/pull/896) Add `sort_data()` method - [#896](https://github.com/IAMconsortium/pyam/pull/896) Sort columns of `timeseries()` with mixed time domain - [#893](https://github.com/IAMconsortium/pyam/pull/893) No sorting of timeseries data on initialization or append - [#879](https://github.com/IAMconsortium/pyam/pull/879) Add `read_netcdf()` function diff --git a/pyam/core.py b/pyam/core.py index d8dac9043..ab8e21fe0 100755 --- a/pyam/core.py +++ b/pyam/core.py @@ -426,6 +426,28 @@ def data(self): return pd.DataFrame([], columns=self.dimensions + ["value"]) return self._data.reset_index() + def sort_data(self, inplace=False): + """Sort timeseries data by index and coordinates + + Parameters + ---------- + inplace : bool, optional + If True, do operation inplace and return None. + + Returns + ------- + :class:`IamDataFrame` or None + The modified :class:`IamDataFrame` or None if `inplace=True`. + """ + ret = self.copy() if not inplace else self + ret._data.sort_index( + key=compare_year_time if ret.time_col == "year" else None, + inplace=True, + ) + ret._set_attributes() + if not inplace: + return ret + def get_data_column(self, column): """Return a `column` from the timeseries data in long format diff --git a/pyam/utils.py b/pyam/utils.py index 90c79ed80..0e1763fd9 100644 --- a/pyam/utils.py +++ b/pyam/utils.py @@ -606,13 +606,15 @@ def print_list(x, n): # utility method to compare years (as integer) and datetime for index-sorting def compare_year_time(x): - return pd.Index([ - # set year lower than first timestep of that year (2010 < 2010-01-01 00:00) - datetime(time, 1, 1, 0, 0, 0) - timedelta(0, 0.01) - if isinstance(time, int) - else time - for time in x - ]) + return pd.Index( + [ + # set year lower than first timestep of that year (2010 < 2010-01-01 00:00) + datetime(time, 1, 1, 0, 0, 0) - timedelta(0, 0.01) + if isinstance(time, int) + else time + for time in x + ] + ) def to_time(x): diff --git a/tests/test_core.py b/tests/test_core.py index a37e227ac..7349cd6c5 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -573,9 +573,8 @@ def test_timeseries_wide(test_pd_df, unsort): def test_timeseries_mixed_time_domain(test_pd_df): """Assert that timeseries is shown as expected from mixed time-domain data""" test_pd_df = test_pd_df.rename(columns={2005: "2010-01-01 00:00"}) - exp = ( - test_pd_df.set_index(IAMC_IDX)[[2010, "2010-01-01 00:00"]] - .rename(columns={"2010-01-01 00:00": datetime(2010, 1, 1, 0, 0)}) + exp = test_pd_df.set_index(IAMC_IDX)[[2010, "2010-01-01 00:00"]].rename( + columns={"2010-01-01 00:00": datetime(2010, 1, 1, 0, 0)} ) obs = IamDataFrame(test_pd_df).timeseries() @@ -738,9 +737,7 @@ def test_normalize(test_df): if "year" in test_df.data: obs = test_df.normalize(year=2005).data.reset_index(drop=True) else: - obs = test_df.normalize(time=datetime(2005, 6, 17)).data.reset_index( - drop=True - ) + obs = test_df.normalize(time=datetime(2005, 6, 17)).data.reset_index(drop=True) pdt.assert_frame_equal(obs, exp) @@ -759,9 +756,9 @@ def test_offset(test_df, padding): if "year" in test_df.data: obs = test_df.offset(year=2005, **kwargs).data.reset_index(drop=True) else: - obs = test_df.offset( - time=datetime(2005, 6, 17), **kwargs - ).data.reset_index(drop=True) + obs = test_df.offset(time=datetime(2005, 6, 17), **kwargs).data.reset_index( + drop=True + ) pdt.assert_frame_equal(obs, exp) diff --git a/tests/test_data.py b/tests/test_data.py new file mode 100644 index 000000000..52a006265 --- /dev/null +++ b/tests/test_data.py @@ -0,0 +1,51 @@ +from datetime import datetime + +import pytest +from pandas import testing as pdt + +from pyam import IamDataFrame + + +@pytest.mark.parametrize("inplace", [True, False]) +def test_data_sort(test_df, inplace): + """Assert that data can be sorted as expected""" + + # revert order of _data + df = IamDataFrame(test_df.data.iloc[[5, 4, 3, 2, 1, 0]]) + + # assert that data is not sorted as expected + with pytest.raises(AssertionError): + pdt.assert_frame_equal(df.data, test_df.data) + + # assert that data is sorted as expected + if inplace: + obs = df.copy() + obs.sort_data(inplace=True) + else: + obs = df.sort_data() + pdt.assert_frame_equal(obs.data, test_df.data) + + +@pytest.mark.parametrize("inplace", [True, False]) +def test_data_sort_mixed_time_domain(test_df_year, inplace): + """Assert that timeseries with mixed time domain can be sorted as expected""" + + # TODO implement mixed df in conftest.py + mixed_data = test_df_year.data + mixed_data.year.replace({2005: datetime(2005, 1, 1, 0, 0)}, inplace=True) + mixed_data.rename(columns={"time": "year"}, inplace=True) + + # revert order of _data + df = IamDataFrame(mixed_data.iloc[[5, 4, 3, 2, 1, 0]]) + + # assert that data is not sorted as expected + with pytest.raises(AssertionError): + pdt.assert_frame_equal(df.data, mixed_data) + + # assert that data is sorted as expected + if inplace: + obs = df.copy() + obs.sort_data(inplace=True) + else: + obs = df.sort_data() + pdt.assert_frame_equal(obs.data, mixed_data) diff --git a/tests/test_data_unfccc.py b/tests/test_io_unfccc.py similarity index 100% rename from tests/test_data_unfccc.py rename to tests/test_io_unfccc.py diff --git a/tests/test_data_worldbank.py b/tests/test_io_worldbank.py similarity index 100% rename from tests/test_data_worldbank.py rename to tests/test_io_worldbank.py