Add ranked probability score (#71)

* add ranked probability score * fix lint issues in brier tests * fix lint issues in rps * change rps observations to be categories * fix bug in torch rps * fix failing tests --------- Co-authored-by: Francesco Zanetta <zanetta.francesco@gmail.com>
frazane · Oct 7, 2024 · 1160b36 · 1160b36
1 parent 01e231e
commit 1160b36
Show file tree

Hide file tree

Showing 12 changed files with 177 additions and 8 deletions.
diff --git a/docs/api/brier.md b/docs/api/brier.md
@@ -1,3 +1,5 @@
 # Brier Score
 
 ::: scoringrules.brier_score
+
+::: scoringrules.rps_score
diff --git a/scoringrules/__init__.py b/scoringrules/__init__.py
@@ -1,6 +1,9 @@
 from importlib.metadata import version
 
-from scoringrules._brier import brier_score
+from scoringrules._brier import (
+    brier_score,
+    rps_score,
+)
 from scoringrules._crps import (
     crps_beta,
     crps_binomial,
@@ -149,6 +152,7 @@
     "logs_tt",
     "logs_uniform",
     "brier_score",
+    "rps_score",
     "gksuv_ensemble",
     "gksmv_ensemble",
     "error_spread_score",

diff --git a/scoringrules/_brier.py b/scoringrules/_brier.py
@@ -1,5 +1,6 @@
 import typing as tp
 
+from scoringrules.backend import backends
 from scoringrules.core import brier
 
 if tp.TYPE_CHECKING:
@@ -37,7 +38,57 @@ def brier_score(
         The computed Brier Score.
 
     """
-    return brier.brier_score(obs=observations, forecasts=forecasts, backend=backend)
+    return brier.brier_score(obs=observations, fct=forecasts, backend=backend)
 
 
-__all__ = ["brier_score"]
+def rps_score(
+    observations: "ArrayLike",
+    forecasts: "ArrayLike",
+    /,
+    axis: int = -1,
+    *,
+    backend: "Backend" = None,
+) -> "Array":
+    r"""
+    Compute the (Discrete) Ranked Probability Score (RPS).
+
+    Suppose the outcome corresponds to one of $K$ ordered categories. The RPS is defined as
+
+    $$ RPS(f, y) = \sum_{k=1}^{K}(\tilde{f}_{k} - \tilde{y}_{k})^2, $$
+
+    where $f \in [0, 1]^{K}$ is a vector of length $K$ containing forecast probabilities
+    that each of the $K$ categories will occur, and $y \in \{0, 1\}^{K}$ is a vector of
+    length $K$, with the $k$-th element equal to one if the $k$-th category occurs. We
+    have $\sum_{k=1}^{K} y_{k} = \sum_{k=1}^{K} f_{k} = 1$, and, for $k = 1, \dots, K$,
+    $\tilde{y}_{k} = \sum_{i=1}^{k} y_{i}$ and $\tilde{f}_{k} = \sum_{i=1}^{k} f_{i}$.
+
+    Parameters
+    ----------
+    observations:
+        Array of 0's and 1's corresponding to unobserved and observed categories
+    forecasts :
+        Array of forecast probabilities for each category.
+    axis: int
+        The axis corresponding to the categories. Default is the last axis.
+    backend: str
+        The name of the backend used for computations. Defaults to 'numpy'.
+
+    Returns
+    -------
+    score:
+        The computed Ranked Probability Score.
+
+    """
+    B = backends.active if backend is None else backends[backend]
+    forecasts = B.asarray(forecasts)
+
+    if axis != -1:
+        forecasts = B.moveaxis(forecasts, axis, -1)
+
+    return brier.rps_score(obs=observations, fct=forecasts, backend=backend)
+
+
+__all__ = [
+    "brier_score",
+    "rps_score",
+]
diff --git a/scoringrules/backend/base.py b/scoringrules/backend/base.py
@@ -71,6 +71,17 @@ def sum(
     ) -> "Array":
         """Calculate the sum of the input array ``x``."""
 
+    @abc.abstractmethod
+    def cumsum(
+        self,
+        x: "Array",
+        /,
+        *,
+        axis: int | tuple[int, ...] | None = None,
+        dtype: Dtype | None = None,
+    ) -> "Array":
+        """Calculate the cumulative sum of the input array ``x``."""
+
     @abc.abstractmethod
     def unique_values(self, x: "Array", /) -> "Array":
         """Return the unique elements of an input array ``x``."""

diff --git a/scoringrules/backend/jax.py b/scoringrules/backend/jax.py
@@ -93,6 +93,16 @@ def sum(
     ) -> "Array":
         return jnp.sum(x, axis=axis, dtype=dtype, keepdims=keepdims)
 
+    def cumsum(
+        self,
+        x: "Array",
+        /,
+        axis: int | tuple[int, ...] | None = None,
+        *,
+        dtype: Dtype | None = None,
+    ) -> "Array":
+        return jnp.cumsum(x, axis=axis, dtype=dtype)
+
     def unique_values(self, x: "Array") -> "Array":
         return jnp.unique(x)
 
@@ -249,6 +259,9 @@ def where(self, condition: "Array", x1: "Array", x2: "Array") -> "Array":
     def size(self, x: "Array") -> int:
         return x.size
 
+    def indices(self, dimensions: tuple) -> "Array":
+        return jnp.indices(dimensions)
+
 
 if __name__ == "__main__":
     B = JaxBackend()

diff --git a/scoringrules/backend/numpy.py b/scoringrules/backend/numpy.py
@@ -93,6 +93,16 @@ def sum(
     ) -> "NDArray":
         return np.sum(x, axis=axis, dtype=dtype, keepdims=keepdims)
 
+    def cumsum(
+        self,
+        x: "NDArray",
+        /,
+        axis: int | tuple[int, ...] | None = None,
+        *,
+        dtype: Dtype | None = None,
+    ) -> "NDArray":
+        return np.cumsum(x, axis=axis, dtype=dtype)
+
     def unique_values(self, x: "NDArray") -> "NDArray":
         return np.unique(x)
 
@@ -245,6 +255,9 @@ def where(self, condition: "NDArray", x1: "NDArray", x2: "NDArray") -> "NDArray"
     def size(self, x: "NDArray") -> int:
         return x.size
 
+    def indices(self, dimensions: tuple) -> "NDArray":
+        return np.indices(dimensions)
+
 
 class NumbaBackend(NumpyBackend):
     """Numba backend."""

diff --git a/scoringrules/backend/torch.py b/scoringrules/backend/torch.py
@@ -92,6 +92,14 @@ def sum(
     ) -> "Tensor":
         return torch.sum(x, axis=axis, keepdim=keepdims)
 
+    def cumsum(
+        self,
+        x: "Tensor",
+        /,
+        axis: int | tuple[int, ...] | None = None,
+    ) -> "Tensor":
+        return torch.cumsum(x, dim=axis)
+
     def unique_values(self, x: "Tensor", /) -> "Tensor":
         return torch.unique(x)
 
@@ -129,7 +137,7 @@ def arange(
         *,
         dtype: Dtype | None = None,
     ) -> "Tensor":
-        return torch.arange(start)  # TODO: fix this
+        return torch.arange(start, stop, step, dtype=dtype)  # TODO: fix this
 
     def zeros(
         self,
@@ -261,3 +269,12 @@ def where(self, condition: "Tensor", x: "Tensor", y: "Tensor") -> "Tensor":
 
     def size(self, x: "Tensor") -> int:
         return x.numel()
+
+    def indices(self, dimensions: tuple) -> "Tensor":
+        ranges = [self.arange(s) for s in dimensions]
+        if ranges == []:
+            indices = self.asarray(ranges)
+        else:
+            index_grids = torch.meshgrid(*ranges, indexing="ij")
+            indices = torch.stack(index_grids)
+        return indices
diff --git a/scoringrules/core/brier.py b/scoringrules/core/brier.py
@@ -18,7 +18,27 @@ def brier_score(
     if B.any(fct < 0.0) or B.any(fct > 1.0 + EPSILON):
         raise ValueError("Forecasted probabilities must be within 0 and 1.")
 
-    if not set(B.unique_values(obs)) <= {0, 1}:
+    if not set(v.item() for v in B.unique_values(obs)) <= {0, 1}:
         raise ValueError("Observations must be 0, 1, or NaN.")
 
     return B.asarray((fct - obs) ** 2)
+
+
+def rps_score(
+    obs: "ArrayLike",
+    fct: "ArrayLike",
+    backend: "Backend" = None,
+) -> "Array":
+    """Compute the Ranked Probability Score for ordinal categorical forecasts."""
+    B = backends.active if backend is None else backends[backend]
+    obs, fct = map(B.asarray, (obs, fct))
+
+    if B.any(fct < 0.0) or B.any(fct > 1.0 + EPSILON):
+        raise ValueError("Forecast probabilities must be between 0 and 1.")
+
+    categories = B.arange(1, fct.shape[-1] + 1)
+    obs_one_hot = B.where(B.expand_dims(obs, -1) == categories, 1, 0)
+
+    return B.sum(
+        (B.cumsum(fct, axis=-1) - B.cumsum(obs_one_hot, axis=-1)) ** 2, axis=-1
+    )
diff --git a/scoringrules/core/crps/_approx.py b/scoringrules/core/crps/_approx.py
@@ -61,7 +61,7 @@ def _crps_ensemble_pwm(
     M: int = fct.shape[-1]
     expected_diff = B.sum(B.abs(obs[..., None] - fct), axis=-1) / M
     β_0 = B.sum(fct, axis=-1) / M
-    β_1 = B.sum(fct * B.arange(M), axis=-1) / (M * (M - 1.0))
+    β_1 = B.sum(fct * B.arange(0, M), axis=-1) / (M * (M - 1.0))
     return expected_diff + β_0 - 2.0 * β_1
 
 

diff --git a/scoringrules/core/crps/_closed.py b/scoringrules/core/crps/_closed.py
@@ -477,7 +477,7 @@ def hypergeometric(
 
     # if n is a scalar, x always has the same shape, which simplifies the computation
     if B.size(n) == 1:
-        x = B.arange(n + 1)
+        x = B.arange(0, n + 1)
         out_ndims = B.max(B.asarray([_input.ndim for _input in [obs, M, m, N]]), axis=0)
         x = B.expand_dims(x, axis=tuple(range(-out_ndims, 0)))
         x, M, m, N = B.broadcast_arrays(x, M, m, N)

diff --git a/scoringrules/core/stats.py b/scoringrules/core/stats.py
@@ -243,7 +243,7 @@ def _inner(m, M, n, N):
         k, M, n, N = B.broadcast_arrays(k, M, n, N)
         _iter = zip(k.ravel(), M.ravel(), n.ravel(), N.ravel(), strict=True)
         return B.asarray(
-            [_inner(B.arange(_args[0] + 1), *_args[1:]) for _args in _iter]
+            [_inner(B.arange(0, _args[0] + 1), *_args[1:]) for _args in _iter]
         ).reshape(k.shape)
 
 

diff --git a/tests/test_brier.py b/tests/test_brier.py
@@ -0,0 +1,38 @@
+import numpy as np
+import pytest
+from scoringrules import _brier
+
+from .conftest import BACKENDS
+
+
+@pytest.mark.parametrize("backend", BACKENDS)
+def test_brier(backend):
+    # test correctness
+    obs, fct = 0, 0.271
+    res = _brier.brier_score(obs, fct, backend=backend)
+    expected = 0.073441
+    assert np.isclose(res, expected)
+
+
+@pytest.mark.parametrize("backend", BACKENDS)
+def test_rps(backend):
+    # test correctness
+    obs, fct = 3, [0.01, 0.32, 0.44, 0.23]
+    res = _brier.rps_score(obs, fct, backend=backend)
+    expected = 0.1619
+    assert np.isclose(res, expected)
+
+    # test correctness
+    obs = [1, 3, 4, 2]
+    fct = [
+        [0.01, 0.32, 0.44, 0.23],
+        [0.12, 0.05, 0.48, 0.35],
+        [0.09, 0.21, 0.05, 0.65],
+        [0.57, 0.31, 0.08, 0.04],
+    ]
+    res1 = _brier.rps_score(obs, fct, backend=backend)
+
+    fct = np.transpose(fct)
+    res2 = _brier.rps_score(obs, fct, axis=0, backend=backend)
+
+    assert np.allclose(res1, res2)