Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support initializing with meta dataframe in long format #801

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions RELEASE_NOTES.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# Next Release

- [#796](https://github.com/IAMconsortium/pyam/pull/796] Raise explicit error message if no connection to IIASA manager service
- [#794](https://github.com/IAMconsortium/pyam/pull/794] Fixed wrong color codes for AR6 Illustrative Pathways
- [#801](https://github.com/IAMconsortium/pyam/pull/801) Support initializing with `meta` dataframe in long format
- [#796](https://github.com/IAMconsortium/pyam/pull/796) Raise explicit error message if no connection to IIASA manager service
- [#794](https://github.com/IAMconsortium/pyam/pull/794) Fixed wrong color codes for AR6 Illustrative Pathways

# Release v2.0.0

Expand Down
13 changes: 10 additions & 3 deletions pyam/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,13 @@ class IamDataFrame(object):

Parameters
----------
data : :class:`pandas.DataFrame` or file-like object as str or :class:`pathlib.Path`
data : :class:`pandas.DataFrame`, :class:`pathlib.Path` or file-like object
Scenario timeseries data following the IAMC data format or
a supported variation as pandas object or a path to a file.
meta : :class:`pandas.DataFrame`, optional
A dataframe with suitable 'meta' indicators for the new instance.
The index will be downselected to scenarios present in `data`.
A dataframe with suitable 'meta' indicators in wide (indicator as column name)
or long (key/value columns) format.
The dataframe will be downselected to scenarios present in `data`.
index : list, optional
Columns to use for resulting IamDataFrame index.
kwargs
Expand Down Expand Up @@ -147,10 +148,16 @@ def _init(self, data, meta=None, index=DEFAULT_META_INDEX, **kwargs):

# if meta is given explicitly, verify that index and column names are valid
if meta is not None:
if meta.index.names == [None]:
meta.set_index(index, inplace=True)
if not meta.index.names == index:
raise ValueError(
f"Incompatible `index={index}` with `meta.index={meta.index.names}`"
)
# if meta is in "long" format as key-value columns, cast to wide format
if len(meta.columns) == 2 and all(meta.columns == ["key", "value"]):
meta = meta.pivot(values="value", columns="key")
meta.columns.name = None

# try casting to Path if file-like is string or LocalPath or pytest.LocalPath
try:
Expand Down
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
["model_a", "scen_b", 2, np.nan],
],
columns=META_IDX + META_COLS,
).set_index(META_IDX)
)


FULL_FEATURE_DF = pd.DataFrame(
Expand Down
45 changes: 37 additions & 8 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,14 @@
columns=["model", "scenario", "region", 2010, 2020],
).set_index(["model", "region"])


META_DF = pd.DataFrame(
[
["model_a", "scen_a", 1],
["model_a", "scen_b", np.nan],
["model_a", "scen_c", 2],
["model_a", "scen_a", 1, "foo"],
["model_a", "scen_b", np.nan, "bar"],
["model_a", "scen_c", 2, "baz"],
],
columns=META_IDX + ["foo"],
columns=META_IDX + ["number", "string"],
).set_index(META_IDX)


Expand Down Expand Up @@ -157,17 +158,45 @@ def test_init_df_with_extra_col(test_pd_df):
pd.testing.assert_frame_equal(obs, exp)


def test_init_df_with_meta(test_pd_df):
# pass explicit meta dataframe with a scenario that doesn't exist in data
df = IamDataFrame(test_pd_df, meta=META_DF[["foo"]])
def test_init_df_with_meta_with_index(test_pd_df):
# pass indexed meta dataframe with a scenario that doesn't exist in data
df = IamDataFrame(test_pd_df, meta=META_DF)

# check that scenario not existing in data is removed during initialization
pd.testing.assert_frame_equal(df.meta, META_DF.iloc[[0, 1]])
assert df.scenario == ["scen_a", "scen_b"]


def test_init_df_with_meta_no_index(test_pd_df):
# pass meta without index with a scenario that doesn't exist in data
df = IamDataFrame(test_pd_df, meta=META_DF.reset_index())

# check that scenario not existing in data is removed during initialization
pd.testing.assert_frame_equal(df.meta, META_DF.iloc[[0, 1]])
assert df.scenario == ["scen_a", "scen_b"]


def test_init_df_with_meta_key_value(test_pd_df):
# pass meta with key-value columns with a scenario that doesn't exist in data

meta_df = pd.DataFrame(
[
["model_a", "scen_a", "number", 1],
["model_a", "scen_a", "string", "foo"],
["model_a", "scen_b", "string", "bar"],
["model_a", "scen_c", "number", 2],
],
columns=META_IDX + ["key", "value"],
)
df = IamDataFrame(test_pd_df, meta=meta_df)

# check that scenario not existing in data is removed during initialization
pd.testing.assert_frame_equal(df.meta, META_DF.iloc[[0, 1]], check_dtype=False)
assert df.scenario == ["scen_a", "scen_b"]


def test_init_df_with_meta_exclude_raises(test_pd_df):
# pass explicit meta dataframe with a scenario that
# pass explicit meta dataframe with a legacy "exclude" column
meta = META_DF.copy()
meta["exclude"] = False
with pytest.raises(ValueError, match="Illegal columns in `meta`: 'exclude'"):
Expand Down
Loading