Skip to content

Commit

Permalink
Add sort_data() method (#897)
Browse files Browse the repository at this point in the history
  • Loading branch information
danielhuppmann authored Dec 17, 2024
1 parent 1ef2e7b commit ebcf032
Show file tree
Hide file tree
Showing 7 changed files with 89 additions and 16 deletions.
1 change: 1 addition & 0 deletions RELEASE_NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

The next release must be bumped to v3.0.0.

- [#896](https://github.com/IAMconsortium/pyam/pull/896) Add `sort_data()` method
- [#896](https://github.com/IAMconsortium/pyam/pull/896) Sort columns of `timeseries()` with mixed time domain
- [#893](https://github.com/IAMconsortium/pyam/pull/893) No sorting of timeseries data on initialization or append
- [#879](https://github.com/IAMconsortium/pyam/pull/879) Add `read_netcdf()` function
Expand Down
22 changes: 22 additions & 0 deletions pyam/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,28 @@ def data(self):
return pd.DataFrame([], columns=self.dimensions + ["value"])
return self._data.reset_index()

def sort_data(self, inplace=False):
"""Sort timeseries data by index and coordinates
Parameters
----------
inplace : bool, optional
If True, do operation inplace and return None.
Returns
-------
:class:`IamDataFrame` or None
The modified :class:`IamDataFrame` or None if `inplace=True`.
"""
ret = self.copy() if not inplace else self
ret._data.sort_index(
key=compare_year_time if ret.time_col == "year" else None,
inplace=True,
)
ret._set_attributes()
if not inplace:
return ret

def get_data_column(self, column):
"""Return a `column` from the timeseries data in long format
Expand Down
16 changes: 9 additions & 7 deletions pyam/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -606,13 +606,15 @@ def print_list(x, n):

# utility method to compare years (as integer) and datetime for index-sorting
def compare_year_time(x):
return pd.Index([
# set year lower than first timestep of that year (2010 < 2010-01-01 00:00)
datetime(time, 1, 1, 0, 0, 0) - timedelta(0, 0.01)
if isinstance(time, int)
else time
for time in x
])
return pd.Index(
[
# set year lower than first timestep of that year (2010 < 2010-01-01 00:00)
datetime(time, 1, 1, 0, 0, 0) - timedelta(0, 0.01)
if isinstance(time, int)
else time
for time in x
]
)


def to_time(x):
Expand Down
15 changes: 6 additions & 9 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -573,9 +573,8 @@ def test_timeseries_wide(test_pd_df, unsort):
def test_timeseries_mixed_time_domain(test_pd_df):
"""Assert that timeseries is shown as expected from mixed time-domain data"""
test_pd_df = test_pd_df.rename(columns={2005: "2010-01-01 00:00"})
exp = (
test_pd_df.set_index(IAMC_IDX)[[2010, "2010-01-01 00:00"]]
.rename(columns={"2010-01-01 00:00": datetime(2010, 1, 1, 0, 0)})
exp = test_pd_df.set_index(IAMC_IDX)[[2010, "2010-01-01 00:00"]].rename(
columns={"2010-01-01 00:00": datetime(2010, 1, 1, 0, 0)}
)

obs = IamDataFrame(test_pd_df).timeseries()
Expand Down Expand Up @@ -738,9 +737,7 @@ def test_normalize(test_df):
if "year" in test_df.data:
obs = test_df.normalize(year=2005).data.reset_index(drop=True)
else:
obs = test_df.normalize(time=datetime(2005, 6, 17)).data.reset_index(
drop=True
)
obs = test_df.normalize(time=datetime(2005, 6, 17)).data.reset_index(drop=True)
pdt.assert_frame_equal(obs, exp)


Expand All @@ -759,9 +756,9 @@ def test_offset(test_df, padding):
if "year" in test_df.data:
obs = test_df.offset(year=2005, **kwargs).data.reset_index(drop=True)
else:
obs = test_df.offset(
time=datetime(2005, 6, 17), **kwargs
).data.reset_index(drop=True)
obs = test_df.offset(time=datetime(2005, 6, 17), **kwargs).data.reset_index(
drop=True
)
pdt.assert_frame_equal(obs, exp)


Expand Down
51 changes: 51 additions & 0 deletions tests/test_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
from datetime import datetime

import pytest
from pandas import testing as pdt

from pyam import IamDataFrame


@pytest.mark.parametrize("inplace", [True, False])
def test_data_sort(test_df, inplace):
"""Assert that data can be sorted as expected"""

# revert order of _data
df = IamDataFrame(test_df.data.iloc[[5, 4, 3, 2, 1, 0]])

# assert that data is not sorted as expected
with pytest.raises(AssertionError):
pdt.assert_frame_equal(df.data, test_df.data)

# assert that data is sorted as expected
if inplace:
obs = df.copy()
obs.sort_data(inplace=True)
else:
obs = df.sort_data()
pdt.assert_frame_equal(obs.data, test_df.data)


@pytest.mark.parametrize("inplace", [True, False])
def test_data_sort_mixed_time_domain(test_df_year, inplace):
"""Assert that timeseries with mixed time domain can be sorted as expected"""

# TODO implement mixed df in conftest.py
mixed_data = test_df_year.data
mixed_data.year.replace({2005: datetime(2005, 1, 1, 0, 0)}, inplace=True)
mixed_data.rename(columns={"time": "year"}, inplace=True)

# revert order of _data
df = IamDataFrame(mixed_data.iloc[[5, 4, 3, 2, 1, 0]])

# assert that data is not sorted as expected
with pytest.raises(AssertionError):
pdt.assert_frame_equal(df.data, mixed_data)

# assert that data is sorted as expected
if inplace:
obs = df.copy()
obs.sort_data(inplace=True)
else:
obs = df.sort_data()
pdt.assert_frame_equal(obs.data, mixed_data)
File renamed without changes.
File renamed without changes.

0 comments on commit ebcf032

Please sign in to comment.