Skip to content

Commit

Permalink
add weighted quantiles
Browse files Browse the repository at this point in the history
  • Loading branch information
gidden committed Aug 12, 2022
1 parent d104630 commit 3563b43
Show file tree
Hide file tree
Showing 3 changed files with 101 additions and 0 deletions.
68 changes: 68 additions & 0 deletions pyam/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import logging
import os
import sys
import wquantiles

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -2310,6 +2311,73 @@ def diff(self, mapping, periods=1, append=False):
# append to `self` or return as `IamDataFrame`
return self._finalize(_value, append=append)

def quantiles(self, quantiles, weights=None, level=['model', 'scenario'], append=False):
"""Compute the optionally weighted quantiles of data grouped by `level`.
For example, the following will provide the interquartile range and median value
of CO2 emissions across all models and scenarios in a given dataset:
.. code-block:: python
df.filter(variable='Emissions|CO2').quantiles([0.25, 0.5, 0.75])
Parameters
----------
quantiles : collection
Group of quantile values to compute
weights : pd.Series, optional
Series indexed by `level`
level : collection, optional
The index columns to compute quantiles over
append : bool, optional
Whether to append computed timeseries data to this instance.
"""
if len(self.variable) > 1:
raise ValueError(
'quantiles() currently supports only 1 variable, and this'
f'dataframe has {len(self.variable)}'
)
df = self.timeseries()
model = 'unweighted' if weights is None else 'weighted' # can make this a kwarg

# get weights aligned with model/scenario in data
if weights is None:
df['weight'] = 1.0
else:
df = df.join(weights, how='inner')
w = df['weight']
df.drop('weight', axis='columns', inplace=True)

# prep data for processing
df = (
df
.reset_index(level=level)
.drop(columns=level)
)

dfs = []
# indexed over region, variable, and unit
idxs = df.index.drop_duplicates()
for idx, q in itertools.product(idxs, quantiles):
data = pd.Series(
wquantiles.quantile(df.loc[idx].values.T, w.values, q),
index=pd.Series(df.columns, name='year'),
name='value',
)
kwargs = {idxs.names[i]: idx[i] for i in range(len(idx))}
dfs.append(
IamDataFrame(
data,
model=model,
scenario=f'quantile_{q}', # can make this a kwarg
**kwargs
)
)

# append to `self` or return as `IamDataFrame`
return self._finalize(concat(dfs), append=append)


def _to_file_format(self, iamc_index):
"""Return a dataframe suitable for writing to a file"""
df = self.timeseries(iamc_index=iamc_index).reset_index()
Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ install_requires =
xlrd >= 2.0
setuptools >= 41
setuptools_scm
wquantiles
# required explicitly for Python 3.7
importlib_metadata
setup_requires =
Expand Down
32 changes: 32 additions & 0 deletions tests/test_feature_quantiles.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from pyam import IamDataFrame
import pytest
from pyam.testing import assert_iamframe_equal
import pandas as pd


def test_qunatile_one_variable(test_pd_df):
"""Tests interquartile range of standard test df
Because it is only two datapoints, the only 'new' computation
is the median
"""
df = IamDataFrame(test_pd_df)
quantiles = (0.25, 0.5, 0.75)
obs = df.filter(variable='Primary Energy').quantiles(quantiles)
exp = IamDataFrame(
pd.DataFrame({
'scenario': [f'quantile_{q}' for q in quantiles],
'2005': [1, (1. + 2) / 2, 2],
'2010': [6, (6 + 7) / 2, 7],
}),
model="unweighted",
region="World",
variable="Primary Energy",
unit="EJ/yr",
)
assert_iamframe_equal(exp, obs)

def test_quantile_multiple_variables(test_pd_df):
df = IamDataFrame(test_pd_df)
with pytest.raises(ValueError):
df.quantiles((0.25, 0.5))

0 comments on commit 3563b43

Please sign in to comment.