IAMconsortium · danielhuppmann · Dec 15, 2022 · Aug 12, 2022 · Oct 12, 2022 · Oct 12, 2022
diff --git a/pyam/compute.py b/pyam/compute.py
@@ -22,6 +22,71 @@ class IamComputeAccessor:
     def __init__(self, df):
         self._df = df
 
+    def quantiles(
+        self, quantiles, weights=None, level=["model", "scenario"], append=False
+    ):
+        """Compute the optionally weighted quantiles of data grouped by `level`.
+
+        For example, the following will provide the interquartile range and median value
+        of CO2 emissions across all models and scenarios in a given dataset:
+
+        .. code-block:: python
+
+            df.filter(variable='Emissions|CO2').quantiles([0.25, 0.5, 0.75])
+
+        Parameters
+        ----------
+        quantiles : collection
+            Group of quantile values to compute
+        weights : pd.Series, optional
+            Series indexed by `level`
+        level : collection, optional
+            The index columns to compute quantiles over
+        append : bool, optional
+            Whether to append computed timeseries data to this instance.
+        """
+        self_df = self._df
+        if len(self_df.variable) > 1:
+            raise ValueError(
+                "quantiles() currently supports only 1 variable, and this"
+                f"dataframe has {len(self_df.variable)}"
+            )
+        df = self_df.timeseries()
+        model = "unweighted" if weights is None else "weighted"  # can make this a kwarg
+
+        # get weights aligned with model/scenario in data
+        if weights is None:
+            df["weight"] = 1.0
+        else:
+            df = df.join(weights, how="inner")
+        w = df["weight"]
+        df.drop("weight", axis="columns", inplace=True)
+
+        # prep data for processing
+        df = df.reset_index(level=level).drop(columns=level)
+
+        dfs = []
+        # indexed over region, variable, and unit
+        idxs = df.index.drop_duplicates()
+        for idx, q in itertools.product(idxs, quantiles):
+            data = pd.Series(
+                wquantiles.quantile(df.loc[idx].values.T, w.values, q),
+                index=pd.Series(df.columns, name="year"),
+                name="value",
+            )
+            kwargs = {idxs.names[i]: idx[i] for i in range(len(idx))}
+            dfs.append(
+                IamDataFrame(
+                    data,
+                    model=model,
+                    scenario=f"quantile_{q}",  # can make this a kwarg
+                    **kwargs,
+                )
+            )
+
+        # append to `self` or return as `IamDataFrame`
+        return self_df._finalize(concat(dfs), append=append)
+
     def growth_rate(self, mapping, append=False):
         """Compute the annualized growth rate of a timeseries along the time dimension
 

diff --git a/pyam/core.py b/pyam/core.py
@@ -4,6 +4,7 @@
 import logging
 import os
 import sys
+import wquantiles
 
 import numpy as np
 import pandas as pd

diff --git a/setup.cfg b/setup.cfg
@@ -41,6 +41,7 @@ install_requires =
     six
     setuptools >= 41
     setuptools_scm
+    wquantiles
     # required explicitly for Python 3.7
     importlib_metadata
     xlsxwriter

diff --git a/tests/test_feature_quantiles.py b/tests/test_feature_quantiles.py
@@ -0,0 +1,35 @@
+from pyam import IamDataFrame
+import pytest
+from pyam.testing import assert_iamframe_equal
+import pandas as pd
+
+
+def test_qunatile_one_variable(test_pd_df):
+    """Tests interquartile range of standard test df
+
+    Because it is only two datapoints, the only 'new' computation
+    is the median
+    """
+    df = IamDataFrame(test_pd_df)
+    quantiles = (0.25, 0.5, 0.75)
+    obs = df.filter(variable="Primary Energy").quantiles(quantiles)
+    exp = IamDataFrame(
+        pd.DataFrame(
+            {
+                "scenario": [f"quantile_{q}" for q in quantiles],
+                "2005": [1, (1.0 + 2) / 2, 2],
+                "2010": [6, (6 + 7) / 2, 7],
+            }
+        ),
+        model="unweighted",
+        region="World",
+        variable="Primary Energy",
+        unit="EJ/yr",
+    )
+    assert_iamframe_equal(exp, obs)
+
+
+def test_quantile_multiple_variables(test_pd_df):
+    df = IamDataFrame(test_pd_df)
+    with pytest.raises(ValueError):
+        df.compute.quantiles((0.25, 0.5))