diff --git a/docs/api/kernels.md b/docs/api/kernels.md index 0377a48..cea97fa 100644 --- a/docs/api/kernels.md +++ b/docs/api/kernels.md @@ -1,3 +1,17 @@ # Kernel scores -::: scoringrules.gks_ensemble +::: scoringrules.gksuv_ensemble + +::: scoringrules.twgksuv_ensemble + +::: scoringrules.owgksuv_ensemble + +::: scoringrules.vrgksuv_ensemble + +::: scoringrules.gksmv_ensemble + +::: scoringrules.twgksmv_ensemble + +::: scoringrules.owgksmv_ensemble + +::: scoringrules.vrgksmv_ensemble diff --git a/scoringrules/__init__.py b/scoringrules/__init__.py index 860f7e3..12a6fb3 100644 --- a/scoringrules/__init__.py +++ b/scoringrules/__init__.py @@ -85,7 +85,13 @@ ) from scoringrules._kernels import ( gksuv_ensemble, + twgksuv_ensemble, + owgksuv_ensemble, + vrgksuv_ensemble, gksmv_ensemble, + twgksmv_ensemble, + owgksmv_ensemble, + vrgksmv_ensemble, ) from scoringrules.backend import backends, register_backend @@ -159,8 +165,6 @@ "rps_score", "log_score", "rls_score", - "gksuv_ensemble", - "gksmv_ensemble", "error_spread_score", "energy_score", "owenergy_score", @@ -170,6 +174,14 @@ "owvariogram_score", "twvariogram_score", "vrvariogram_score", + "gksuv_ensemble", + "twgksuv_ensemble", + "owgksuv_ensemble", + "vrgksuv_ensemble", + "gksmv_ensemble", + "twgksmv_ensemble", + "owgksmv_ensemble", + "vrgksmv_ensemble", "interval_score", "weighted_interval_score", ] diff --git a/scoringrules/_energy.py b/scoringrules/_energy.py index dfb8531..a1e35b5 100644 --- a/scoringrules/_energy.py +++ b/scoringrules/_energy.py @@ -185,7 +185,7 @@ def vrenergy_score( ) -> "Array": r"""Compute the Vertically Re-scaled Energy Score (vrES) for a finite multivariate ensemble. - Computation is performed using the ensemble representation of the twES in + Computation is performed using the ensemble representation of the vrES in [Allen et al. (2022)](https://arxiv.org/abs/2202.12732): \[ @@ -196,7 +196,7 @@ def vrenergy_score( \] where $F_{ens}$ is the ensemble forecast $\mathbf{x}_{1}, \dots, \mathbf{x}_{M}$ with - $M$ members, and $v$ is the chaining function used to target particular outcomes. + $M$ members, and $w$ is the weight function used to target particular outcomes. Parameters diff --git a/scoringrules/_kernels.py b/scoringrules/_kernels.py index 4a10e75..d3fe118 100644 --- a/scoringrules/_kernels.py +++ b/scoringrules/_kernels.py @@ -17,7 +17,7 @@ def gksuv_ensemble( estimator: str = "nrg", backend: "Backend" = None, ) -> "Array": - r"""Estimate the univariate Gaussian Kernel Score (GKS) for a finite ensemble. + r"""Compute the univariate Gaussian Kernel Score (GKS) for a finite ensemble. The GKS is the kernel score associated with the Gaussian kernel @@ -73,6 +73,219 @@ def gksuv_ensemble( return kernels.ensemble_uv(observations, forecasts, estimator, backend=backend) +def twgksuv_ensemble( + observations: "ArrayLike", + forecasts: "Array", + v_func: tp.Callable[["ArrayLike"], "ArrayLike"], + /, + axis: int = -1, + *, + estimator: str = "nrg", + backend: "Backend" = None, +) -> "Array": + r"""Compute the Threshold-Weighted univariate Gaussian Kernel Score (GKS) for a finite ensemble. + + Computation is performed using the ensemble representation of threshold-weighted kernel scores in + [Allen et al. (2022)](https://arxiv.org/abs/2202.12732). + + $$ \mathrm{twGKS}(F_{ens}, y) = - \frac{1}{M} \sum_{m = 1}^{M} k(v(x_{m}), v(y)) + \frac{1}{2 M^{2}} \sum_{m = 1}^{M} \sum_{j = 1}^{M} k(v(x_{m}), v(x_{j})) + \frac{1}{2} k(v(y), v(y)), $$ + + where $F_{ens}(x) = \sum_{m=1}^{M} 1 \{ x_{m} \leq x \}/M$ is the empirical + distribution function associated with an ensemble forecast $x_{1}, \dots, x_{M}$ with + $M$ members, $v$ is the chaining function used to target particular outcomes, and + + $$ k(x_{1}, x_{2}) = \exp \left(- \frac{(x_{1} - x_{2})^{2}}{2} \right) $$ + + is the Gaussian kernel. + + Parameters + ---------- + observations: ArrayLike + The observed values. + forecasts: ArrayLike + The predicted forecast ensemble, where the ensemble dimension is by default + represented by the last axis. + v_func: tp.Callable + Chaining function used to emphasise particular outcomes. For example, a function that + only considers values above a certain threshold $t$ by projecting forecasts and observations + to $[t, \inf)$. + axis: int + The axis corresponding to the ensemble. Default is the last axis. + backend: str + The name of the backend used for computations. Defaults to 'numba' if available, else 'numpy'. + + Returns + ------- + twgks: ArrayLike + The twGKS between the forecast ensemble and obs for the chosen chaining function. + + Examples + -------- + >>> import numpy as np + >>> import scoringrules as sr + >>> + >>> def v_func(x): + >>> return np.maximum(x, -1.0) + >>> + >>> sr.twgksuv_ensemble(obs, pred, v_func) + """ + observations, forecasts = map(v_func, (observations, forecasts)) + return gksuv_ensemble( + observations, + forecasts, + axis=axis, + estimator=estimator, + backend=backend, + ) + + +def owgksuv_ensemble( + observations: "ArrayLike", + forecasts: "Array", + w_func: tp.Callable[["ArrayLike"], "ArrayLike"], + /, + axis: int = -1, + *, + backend: "Backend" = None, +) -> "Array": + r"""Compute the univariate Outcome-Weighted Gaussian Kernel Score (owGKS) for a finite ensemble. + + Computation is performed using the ensemble representation of the owCRPS in + [Allen et al. (2022)](https://arxiv.org/abs/2202.12732): + + $$ \mathrm{owGKS}(F_{ens}, y) = -\frac{1}{M \bar{w}} \sum_{m = 1}^{M} k(x_{m}, y)w(x_{m})w(y) - \frac{1}{2 M^{2} \bar{w}^{2}} \sum_{m = 1}^{M} \sum_{j = 1}^{M} k(x_{m}, x_{j})w(x_{m})w(x_{j})w(y),$$ + + where $F_{ens}(x) = \sum_{m=1}^{M} 1\{ x_{m} \leq x \}/M$ is the empirical + distribution function associated with an ensemble forecast $x_{1}, \dots, x_{M}$ with + $M$ members, $w$ is the chosen weight function, $\bar{w} = \sum_{m=1}^{M}w(x_{m})/M$, + and + + $$ k(x_{1}, x_{2}) = \exp \left(- \frac{(x_{1} - x_{2})^{2}}{2} \right) $$ + + is the Gaussian kernel. + + Parameters + ---------- + observations: ArrayLike + The observed values. + forecasts: ArrayLike + The predicted forecast ensemble, where the ensemble dimension is by default + represented by the last axis. + w_func: tp.Callable + Weight function used to emphasise particular outcomes. + axis: int + The axis corresponding to the ensemble. Default is the last axis. + backend: str + The name of the backend used for computations. Defaults to 'numba' if available, else 'numpy'. + + Returns + ------- + score: ArrayLike + The owGKS between the forecast ensemble and obs for the chosen weight function. + + Examples + -------- + >>> import numpy as np + >>> import scoringrules as sr + >>> + >>> def w_func(x): + >>> return (x > -1).astype(float) + >>> + >>> sr.owgksuv_ensemble(obs, pred, w_func) + """ + B = backends.active if backend is None else backends[backend] + + if axis != -1: + forecasts = B.moveaxis(forecasts, axis, -1) + + obs_weights, fct_weights = map(w_func, (observations, forecasts)) + + if backend == "numba": + return kernels.estimator_gufuncs["ow"]( + observations, forecasts, obs_weights, fct_weights + ) + + observations, forecasts, obs_weights, fct_weights = map( + B.asarray, (observations, forecasts, obs_weights, fct_weights) + ) + return kernels.ow_ensemble_uv( + observations, forecasts, obs_weights, fct_weights, backend=backend + ) + + +def vrgksuv_ensemble( + observations: "ArrayLike", + forecasts: "Array", + w_func: tp.Callable[["ArrayLike"], "ArrayLike"], + /, + axis: int = -1, + *, + backend: "Backend" = None, +) -> "Array": + r"""Estimate the Vertically Re-scaled Gaussian Kernel Score (vrGKS) for a finite ensemble. + + Computation is performed using the ensemble representation of vertically re-scaled kernel scores in + [Allen et al. (2022)](https://arxiv.org/abs/2202.12732): + + $$ \mathrm{vrGKS}(F_{ens}, y) = - \frac{1}{M} \sum_{m = 1}^{M} k(x_{m}, y)w(x_{m})w(y) + \frac{1}{2 M^{2}} \sum_{m = 1}^{M} \sum_{j = 1}^{M} k(x_{m}, x_{j})w(x_{m})w(x_{j}) + \frac{1}{2} k(y, y)w(y)w(y), $$ + + where $F_{ens}(x) = \sum_{m=1}^{M} 1 \{ x_{m} \leq x \}/M$ is the empirical + distribution function associated with an ensemble forecast $x_{1}, \dots, x_{M}$ with + $M$ members, $w$ is the chosen weight function, $\bar{w} = \sum_{m=1}^{M}w(x_{m})/M$, and + + $$ k(x_{1}, x_{2}) = \exp \left(- \frac{(x_{1} - x_{2})^{2}}{2} \right) $$ + + is the Gaussian kernel. + + Parameters + ---------- + observations: ArrayLike + The observed values. + forecasts: ArrayLike + The predicted forecast ensemble, where the ensemble dimension is by default + represented by the last axis. + w_func: tp.Callable + Weight function used to emphasise particular outcomes. + axis: int + The axis corresponding to the ensemble. Default is the last axis. + backend: str + The name of the backend used for computations. Defaults to 'numba' if available, else 'numpy'. + + Returns + ------- + score: ArrayLike + The vrGKS between the forecast ensemble and obs for the chosen weight function. + + Examples + -------- + >>> import numpy as np + >>> import scoringrules as sr + >>> + >>> def w_func(x): + >>> return (x > -1).astype(float) + >>> + >>> sr.vrgksuv_ensemble(obs, pred, w_func) + """ + B = backends.active if backend is None else backends[backend] + + if axis != -1: + forecasts = B.moveaxis(forecasts, axis, -1) + + obs_weights, fct_weights = map(w_func, (observations, forecasts)) + + if backend == "numba": + return kernels.estimator_gufuncs["vr"]( + observations, forecasts, obs_weights, fct_weights + ) + + observations, forecasts, obs_weights, fct_weights = map( + B.asarray, (observations, forecasts, obs_weights, fct_weights) + ) + return kernels.vr_ensemble_uv( + observations, forecasts, obs_weights, fct_weights, backend=backend + ) + + def gksmv_ensemble( observations: "Array", forecasts: "Array", @@ -83,7 +296,7 @@ def gksmv_ensemble( estimator: str = "nrg", backend: "Backend" = None, ) -> "Array": - r"""Estimate the multivariate Gaussian Kernel Score (GKS) for a finite ensemble. + r"""Compute the multivariate Gaussian Kernel Score (GKS) for a finite ensemble. The GKS is the kernel score associated with the Gaussian kernel @@ -137,3 +350,200 @@ def gksmv_ensemble( return kernels.estimator_gufuncs_mv[estimator](observations, forecasts) return kernels.ensemble_mv(observations, forecasts, estimator, backend=backend) + + +def twgksmv_ensemble( + observations: "Array", + forecasts: "Array", + v_func: tp.Callable[["ArrayLike"], "ArrayLike"], + /, + m_axis: int = -2, + v_axis: int = -1, + *, + backend: "Backend" = None, +) -> "Array": + r"""Compute the Threshold-Weighted Gaussian Kernel Score (twGKS) for a finite multivariate ensemble. + + Computation is performed using the ensemble representation of threshold-weighted kernel scores in + [Allen et al. (2022)](https://arxiv.org/abs/2202.12732): + + $$ \mathrm{twGKS}(F_{ens}, y) = - \frac{1}{M} \sum_{m = 1}^{M} k(v(x_{m}), v(y)) + \frac{1}{2 M^{2}} \sum_{m = 1}^{M} \sum_{j = 1}^{M} k(v(x_{m}), v(x_{j})) + \frac{1}{2} k(v(y), v(y)), $$ + + where $F_{ens}$ is the ensemble forecast $\mathbf{x}_{1}, \dots, \mathbf{x}_{M}$ with + $M$ members, $\| \cdotp \|$ is the Euclidean distance, $v$ is the chaining function + used to target particular outcomes, and + + $$ k(x_{1}, x_{2}) = \exp \left(- \frac{(x_{1} - x_{2})^{2}}{2} \right) $$ + + is the Gaussian kernel. + + Parameters + ---------- + observations: ArrayLike of shape (...,D) + The observed values, where the variables dimension is by default the last axis. + forecasts: ArrayLike of shape (..., M, D) + The predicted forecast ensemble, where the ensemble dimension is by default + represented by the second last axis and the variables dimension by the last axis. + v_func: tp.Callable + Chaining function used to emphasise particular outcomes. + m_axis: int + The axis corresponding to the ensemble dimension. Defaults to -2. + v_axis: int or tuple(int) + The axis corresponding to the variables dimension. Defaults to -1. + backend: str + The name of the backend used for computations. Defaults to 'numba' if available, else 'numpy'. + + Returns + ------- + score: ArrayLike of shape (...) + The computed Threshold-Weighted Gaussian Kernel Score. + """ + observations, forecasts = map(v_func, (observations, forecasts)) + return gksmv_ensemble( + observations, forecasts, m_axis=m_axis, v_axis=v_axis, backend=backend + ) + + +def owgksmv_ensemble( + observations: "Array", + forecasts: "Array", + w_func: tp.Callable[["ArrayLike"], "ArrayLike"], + /, + m_axis: int = -2, + v_axis: int = -1, + *, + backend: "Backend" = None, +) -> "Array": + r"""Compute the multivariate Outcome-Weighted Gaussian Kernel Score (owGKS) for a finite ensemble. + + + + Given an ensemble forecast $F_{ens}$ comprised of multivariate members $\mathbf{x}_{1}, \dots, \mathbf{x}_{M}$, + the GKS is + + $$\text{GKS}(F_{ens}, y)= - \frac{1}{M} \sum_{m=1}^{M} k(\mathbf{x}_{m}, \mathbf{y}) + \frac{1}{2 M^{2}} \sum_{m=1}^{M} \sum_{j=1}^{M} k(\mathbf{x}_{m}, \mathbf{x}_{j}) + \frac{1}{2}k(y, y) $$ + + If the fair estimator is to be used, then $M^{2}$ in the second component of the right-hand-side + is replaced with $M(M - 1)$. + + Computation is performed using the ensemble representation of outcome-weighted kernel scores in + [Allen et al. (2022)](https://arxiv.org/abs/2202.12732): + + \[ + \mathrm{owGKS}(F_{ens}, \mathbf{y}) = - \frac{1}{M \bar{w}} \sum_{m = 1}^{M} k(\mathbf{x}_{m}, \mathbf{y}) w(\mathbf{x}_{m}) w(\mathbf{y}) + \frac{1}{2 M^{2} \bar{w}^{2}} \sum_{m = 1}^{M} \sum_{j = 1}^{M} k(\mathbf{x}_{m}, \mathbf{x}_{j}) w(\mathbf{x}_{m}) w(\mathbf{x}_{j}) w(\mathbf{y}) + \frac{1}{2}k(\mathbf{y}, \mathbf{y})w(\mathbf{y}), + \] + + where $F_{ens}$ is the ensemble forecast $\mathbf{x}_{1}, \dots, \mathbf{x}_{M}$ with + $M$ members, $\| \cdotp \|$ is the Euclidean distance, $w$ is the chosen weight function, + $\bar{w} = \sum_{m=1}^{M}w(\mathbf{x}_{m})/M$, and + + $$ k(x_{1}, x_{2}) = \exp \left(- \frac{ \| x_{1} - x_{2} \| ^{2}}{2} \right), $$ + + is the multivariate Gaussian kernel, with $ \| \cdot \|$ the Euclidean norm. + + + Parameters + ---------- + observations: ArrayLike of shape (...,D) + The observed values, where the variables dimension is by default the last axis. + forecasts: ArrayLike of shape (..., M, D) + The predicted forecast ensemble, where the ensemble dimension is by default + represented by the second last axis and the variables dimension by the last axis. + w_func: tp.Callable + Weight function used to emphasise particular outcomes. + m_axis: int + The axis corresponding to the ensemble dimension. Defaults to -2. + v_axis: int or tuple(int) + The axis corresponding to the variables dimension. Defaults to -1. + backend: str + The name of the backend used for computations. Defaults to 'numba' if available, else 'numpy'. + + Returns + ------- + score: ArrayLike of shape (...) + The computed Outcome-Weighted GKS. + """ + B = backends.active if backend is None else backends[backend] + + observations, forecasts = multivariate_array_check( + observations, forecasts, m_axis, v_axis, backend=backend + ) + + fct_weights = B.apply_along_axis(w_func, forecasts, -1) + obs_weights = B.apply_along_axis(w_func, observations, -1) + + if B.name == "numba": + return kernels.estimator_gufuncs_mv["ow"]( + observations, forecasts, obs_weights, fct_weights + ) + + return kernels.ow_ensemble_mv( + observations, forecasts, obs_weights, fct_weights, backend=backend + ) + + +def vrgksmv_ensemble( + observations: "Array", + forecasts: "Array", + w_func: tp.Callable[["ArrayLike"], "ArrayLike"], + /, + *, + m_axis: int = -2, + v_axis: int = -1, + backend: "Backend" = None, +) -> "Array": + r"""Compute the Vertically Re-scaled Gaussian Kernel Score (vrGKS) for a finite multivariate ensemble. + + Computation is performed using the ensemble representation of vertically re-scaled kernel scores in + [Allen et al. (2022)](https://arxiv.org/abs/2202.12732): + + \[ + \mathrm{vrGKS}(F_{ens}, \mathbf{y}) = & - \frac{1}{M} \sum_{m = 1}^{M} k(\mathbf{x}_{m}, \mathbf{y}) w(\mathbf{x}_{m}) w(\mathbf{y}) + \frac{1}{2 M^{2}} \sum_{m = 1}^{M} \sum_{j = 1}^{M} k(\mathbf{x}_{m}, \mathbf{x}_{j}) w(\mathbf{x}_{m}) w(\mathbf{x_{j}}) + \frac{1}{2} k(y, y)w(y)w(y), + \] + + where $F_{ens}$ is the ensemble forecast $\mathbf{x}_{1}, \dots, \mathbf{x}_{M}$ with + $M$ members, $w$ is the weight function used to target particular outcomes, and + + $$ k(x_{1}, x_{2}) = \exp \left(- \frac{ \| x_{1} - x_{2} \| ^{2}}{2} \right), $$ + + is the multivariate Gaussian kernel, with $ \| \cdot \|$ the Euclidean norm. + + + Parameters + ---------- + observations: ArrayLike of shape (...,D) + The observed values, where the variables dimension is by default the last axis. + forecasts: ArrayLike of shape (..., M, D) + The predicted forecast ensemble, where the ensemble dimension is by default + represented by the second last axis and the variables dimension by the last axis. + w_func: tp.Callable + Weight function used to emphasise particular outcomes. + m_axis: int + The axis corresponding to the ensemble dimension. Defaults to -2. + v_axis: int or tuple(int) + The axis corresponding to the variables dimension. Defaults to -1. + backend: str + The name of the backend used for computations. Defaults to 'numba' if available, else 'numpy'. + + Returns + ------- + score: ArrayLike of shape (...) + The computed Vertically Re-scaled Gaussian Kernel Score. + """ + B = backends.active if backend is None else backends[backend] + + observations, forecasts = multivariate_array_check( + observations, forecasts, m_axis, v_axis, backend=backend + ) + + fct_weights = B.apply_along_axis(w_func, forecasts, -1) + obs_weights = B.apply_along_axis(w_func, observations, -1) + + if B.name == "numba": + return kernels.estimator_gufuncs_mv["vr"]( + observations, forecasts, obs_weights, fct_weights + ) + + return kernels.vr_ensemble_mv( + observations, forecasts, obs_weights, fct_weights, backend=backend + ) diff --git a/scoringrules/backend/base.py b/scoringrules/backend/base.py index c76ff11..350e215 100644 --- a/scoringrules/backend/base.py +++ b/scoringrules/backend/base.py @@ -43,6 +43,29 @@ def mean( ) -> "Array": """Calculate the arithmetic mean of the input array ``x``.""" + @abc.abstractmethod + def std( + self, + x: "Array", + /, + *, + axis: int | tuple[int, ...] | None = None, + keepdims: bool = False, + ) -> "Array": + """Calculate the standard deviation of the input array ``x``.""" + + @abc.abstractmethod + def quantile( + self, + x: "Array", + q: "ArrayLike", + /, + *, + axis: int | tuple[int, ...] | None = None, + keepdims: bool = False, + ) -> "Array": + """Calculate the ``q`` quantiles of the input array ``x``.""" + @abc.abstractmethod def max( self, x: "Array", axis: int | tuple[int, ...] | None, keepdims: bool = False @@ -263,3 +286,7 @@ def where(self, condition: "Array", x1: "Array", x2: "Array", /) -> "Array": @abc.abstractmethod def size(self, x: "Array") -> int: """Return the number of elements in the input array.""" + + @abc.abstractmethod + def indices(self, x: "Array") -> int: + """Return an array representing the indices of a grid.""" diff --git a/scoringrules/core/kernels/__init__.py b/scoringrules/core/kernels/__init__.py index 072c535..d0377b7 100644 --- a/scoringrules/core/kernels/__init__.py +++ b/scoringrules/core/kernels/__init__.py @@ -1,4 +1,11 @@ -from ._approx import ensemble_uv, ensemble_mv +from ._approx import ( + ensemble_uv, + ow_ensemble_uv, + vr_ensemble_uv, + ensemble_mv, + ow_ensemble_mv, + vr_ensemble_mv, +) try: from ._gufuncs import estimator_gufuncs @@ -12,7 +19,11 @@ __all__ = [ "ensemble_uv", + "ow_ensemble_uv", + "vr_ensemble_uv", "ensemble_mv", + "ow_ensemble_mv", + "vr_ensemble_mv", "estimator_gufuncs", "estimator_gufuncs_mv", ] diff --git a/scoringrules/core/kernels/_approx.py b/scoringrules/core/kernels/_approx.py index 440e694..8e86fa8 100644 --- a/scoringrules/core/kernels/_approx.py +++ b/scoringrules/core/kernels/_approx.py @@ -79,3 +79,126 @@ def ensemble_mv( out = -out return out + + +def ow_ensemble_uv( + obs: "ArrayLike", + fct: "Array", + ow: "Array", + fw: "Array", + backend: "Backend" = None, +) -> "Array": + """Compute an outcome-weighted kernel score for a finite univariate ensemble.""" + B = backends.active if backend is None else backends[backend] + M: int = fct.shape[-1] + wbar = B.mean(fw, axis=-1) + e_1 = ( + B.sum(gauss_kern_uv(obs[..., None], fct, backend=backend) * fw, axis=-1) + * ow + / (M * wbar) + ) + e_2 = B.sum( + gauss_kern_uv(fct[..., None], fct[..., None, :], backend=backend) + * fw[..., None] + * fw[..., None, :], + axis=(-1, -2), + ) + e_2 *= ow / (M**2 * wbar**2) + e_3 = gauss_kern_uv(obs, obs, backend=backend) * ow + + out = e_1 - 0.5 * e_2 - 0.5 * e_3 + out = -out + return out + + +def ow_ensemble_mv( + obs: "ArrayLike", + fct: "Array", + ow: "Array", + fw: "Array", + backend: "Backend" = None, +) -> "Array": + """Compute an outcome-weighted kernel score for a finite multivariate ensemble. + + The ensemble and variables axes are on the second last and last dimensions respectively. + """ + B = backends.active if backend is None else backends[backend] + M: int = fct.shape[-2] + wbar = B.sum(fw, -1) / M + + err_kern = gauss_kern_mv(B.expand_dims(obs, -2), fct, backend=backend) + E_1 = B.sum(err_kern * fw * B.expand_dims(ow, -1), axis=-1) / (M * wbar) + + spread_kern = gauss_kern_mv( + B.expand_dims(fct, -3), B.expand_dims(fct, -2), backend=backend + ) + fw_prod = B.expand_dims(fw, -1) * B.expand_dims(fw, -2) + spread_kern *= fw_prod * B.expand_dims(ow, (-2, -1)) + E_2 = B.sum(spread_kern, (-2, -1)) / (M**2 * wbar**2) + + E_3 = gauss_kern_mv(obs, obs, backend=backend) * ow + + out = E_1 - 0.5 * E_2 - 0.5 * E_3 + out = -out + return out + + +def vr_ensemble_uv( + obs: "ArrayLike", + fct: "Array", + ow: "Array", + fw: "Array", + backend: "Backend" = None, +) -> "Array": + """Compute a vertically re-scaled kernel score for a finite univariate ensemble.""" + B = backends.active if backend is None else backends[backend] + M: int = fct.shape[-1] + + e_1 = ( + B.sum(gauss_kern_uv(obs[..., None], fct, backend=backend) * fw, axis=-1) + * ow + / M + ) + e_2 = B.sum( + gauss_kern_uv( + B.expand_dims(fct, axis=-1), B.expand_dims(fct, axis=-2), backend=backend + ) + * (B.expand_dims(fw, axis=-1) * B.expand_dims(fw, axis=-2)), + axis=(-1, -2), + ) / (M**2) + e_3 = gauss_kern_uv(obs, obs, backend=backend) * ow * ow + + out = e_1 - 0.5 * e_2 - 0.5 * e_3 + out = -out + return out + + +def vr_ensemble_mv( + obs: "ArrayLike", + fct: "Array", + ow: "Array", + fw: "Array", + backend: "Backend" = None, +) -> "Array": + """Compute a vertically re-scaled kernel score for a finite multivariate ensemble. + + The ensemble and variables axes are on the second last and last dimensions respectively. + """ + B = backends.active if backend is None else backends[backend] + M: int = fct.shape[-2] + + err_kern = gauss_kern_mv(B.expand_dims(obs, -2), fct, backend=backend) + E_1 = B.sum(err_kern * fw * B.expand_dims(ow, -1), axis=-1) / M + + spread_kern = gauss_kern_mv( + B.expand_dims(fct, -3), B.expand_dims(fct, -2), backend=backend + ) + fw_prod = B.expand_dims(fw, -1) * B.expand_dims(fw, -2) + spread_kern *= fw_prod + E_2 = B.sum(spread_kern, (-2, -1)) / (M**2) + + E_3 = gauss_kern_mv(obs, obs, backend=backend) * ow * ow + + out = E_1 - 0.5 * E_2 - 0.5 * E_3 + out = -out + return out diff --git a/scoringrules/core/kernels/_gufuncs.py b/scoringrules/core/kernels/_gufuncs.py index dcb3803..2aa7a3e 100644 --- a/scoringrules/core/kernels/_gufuncs.py +++ b/scoringrules/core/kernels/_gufuncs.py @@ -74,6 +74,78 @@ def _ks_ensemble_uv_fair_gufunc(obs: np.ndarray, fct: np.ndarray, out: np.ndarra out[0] = -(e_1 / M - 0.5 * e_2 / (M * (M - 1)) - 0.5 * e_3) +@guvectorize( + [ + "void(float32[:], float32[:], float32[:], float32[:], float32[:])", + "void(float64[:], float64[:], float64[:], float64[:], float64[:])", + ], + "(),(n),(),(n)->()", +) +def _owks_ensemble_uv_gufunc( + obs: np.ndarray, + fct: np.ndarray, + ow: np.ndarray, + fw: np.ndarray, + out: np.ndarray, +): + """Outcome-weighted kernel score for univariate ensembles.""" + obs = obs[0] + ow = ow[0] + M = fct.shape[-1] + + if np.isnan(obs): + out[0] = np.nan + return + + e_1 = 0.0 + e_2 = 0.0 + + for i, x_i in enumerate(fct): + e_1 += _gauss_kern_uv(x_i, obs) * fw[i] * ow + for j, x_j in enumerate(fct): + e_2 += _gauss_kern_uv(x_i, x_j) * fw[i] * fw[j] * ow + e_3 = _gauss_kern_uv(obs, obs) * ow + + wbar = np.mean(fw) + + out[0] = -(e_1 / (M * wbar) - 0.5 * e_2 / ((M * wbar) ** 2) - 0.5 * e_3) + + +@guvectorize( + [ + "void(float32[:], float32[:], float32[:], float32[:], float32[:])", + "void(float64[:], float64[:], float64[:], float64[:], float64[:])", + ], + "(),(n),(),(n)->()", +) +def _vrks_ensemble_uv_gufunc( + obs: np.ndarray, + fct: np.ndarray, + ow: np.ndarray, + fw: np.ndarray, + out: np.ndarray, +): + """Vertically re-scaled kernel score for univariate ensembles.""" + obs = obs[0] + ow = ow[0] + M = fct.shape[-1] + + if np.isnan(obs): + out[0] = np.nan + return + + e_1 = 0.0 + e_2 = 0.0 + + for i, x_i in enumerate(fct): + e_1 += _gauss_kern_uv(x_i, obs) * fw[i] * ow + for j, x_j in enumerate(fct): + e_2 += _gauss_kern_uv(x_i, x_j) * fw[i] * fw[j] + e_3 = _gauss_kern_uv(obs, obs) * ow * ow + + out[0] = -(e_1 / M - 0.5 * e_2 / (M**2) - 0.5 * e_3) + + @guvectorize( [ "void(float32[:], float32[:,:], float32[:])", @@ -118,20 +190,87 @@ def _ks_ensemble_mv_fair_gufunc(obs: np.ndarray, fct: np.ndarray, out: np.ndarra out[0] = -(e_1 / M - 0.5 * e_2 / (M * (M - 1)) - 0.5 * e_3) +@guvectorize( + [ + "void(float32[:], float32[:,:], float32[:], float32[:], float32[:])", + "void(float64[:], float64[:,:], float64[:], float64[:], float64[:])", + ], + "(d),(m,d),(),(m)->()", +) +def _owks_ensemble_mv_gufunc( + obs: np.ndarray, + fct: np.ndarray, + ow: np.ndarray, + fw: np.ndarray, + out: np.ndarray, +): + """Outcome-weighted kernel score for multivariate ensembles.""" + M = fct.shape[0] + ow = ow[0] + + e_1 = 0.0 + e_2 = 0.0 + for i in range(M): + e_1 += float(_gauss_kern_mv(fct[i], obs) * fw[i] * ow) + for j in range(M): + e_2 += float(_gauss_kern_mv(fct[i], fct[j]) * fw[i] * fw[j] * ow) + e_3 = float(_gauss_kern_mv(obs, obs)) * ow + + wbar = np.mean(fw) + + out[0] = -(e_1 / (M * wbar) - 0.5 * e_2 / (M**2 * wbar**2) - 0.5 * e_3) + + +@guvectorize( + [ + "void(float32[:], float32[:,:], float32[:], float32[:], float32[:])", + "void(float64[:], float64[:,:], float64[:], float64[:], float64[:])", + ], + "(d),(m,d),(),(m)->()", +) +def _vrks_ensemble_mv_gufunc( + obs: np.ndarray, + fct: np.ndarray, + ow: np.ndarray, + fw: np.ndarray, + out: np.ndarray, +): + """Vertically re-scaled kernel score for multivariate ensembles.""" + M = fct.shape[0] + ow = ow[0] + + e_1 = 0.0 + e_2 = 0.0 + for i in range(M): + e_1 += float(_gauss_kern_mv(fct[i], obs) * fw[i] * ow) + for j in range(M): + e_2 += float(_gauss_kern_mv(fct[i], fct[j]) * fw[i] * fw[j]) + e_3 = float(_gauss_kern_mv(obs, obs)) * ow * ow + + out[0] = -(e_1 / M - 0.5 * e_2 / (M**2) - 0.5 * e_3) + + estimator_gufuncs = { "fair": _ks_ensemble_uv_fair_gufunc, "nrg": _ks_ensemble_uv_nrg_gufunc, + "ow": _owks_ensemble_uv_gufunc, + "vr": _vrks_ensemble_uv_gufunc, } estimator_gufuncs_mv = { "fair": _ks_ensemble_mv_fair_gufunc, "nrg": _ks_ensemble_mv_nrg_gufunc, + "ow": _owks_ensemble_mv_gufunc, + "vr": _vrks_ensemble_mv_gufunc, } - __all__ = [ "_ks_ensemble_uv_fair_gufunc", "_ks_ensemble_uv_nrg_gufunc", + "_owks_ensemble_uv_gufunc", + "_vrks_ensemble_uv_gufunc", "_ks_ensemble_mv_fair_gufunc", "_ks_ensemble_mv_nrg_gufunc", + "_owks_ensemble_mv_gufunc", + "_vrks_ensemble_mv_gufunc", ] diff --git a/tests/test_kernels.py b/tests/test_kernels.py index adf52de..b9ceb67 100644 --- a/tests/test_kernels.py +++ b/tests/test_kernels.py @@ -2,6 +2,7 @@ import numpy as np import pytest from scoringrules import _kernels +from scoringrules.backend import backends from .conftest import BACKENDS @@ -91,3 +92,291 @@ def test_gksmv(estimator, backend): res = _kernels.gksmv_ensemble(obs, fct, estimator=estimator, backend=backend) expected = 0.6120162 assert np.isclose(res, expected) + + +@pytest.mark.parametrize("estimator", ESTIMATORS) +@pytest.mark.parametrize("backend", BACKENDS) +def test_twgksuv(estimator, backend): + if backend == "jax": + pytest.skip("Not implemented in jax backend") + obs = np.random.randn(N) + mu = obs + np.random.randn(N) * 0.1 + sigma = abs(np.random.randn(N)) * 0.3 + fct = np.random.randn(N, ENSEMBLE_SIZE) * sigma[..., None] + mu[..., None] + + res = _kernels.gksuv_ensemble(obs, fct, estimator=estimator, backend=backend) + resw = _kernels.twgksuv_ensemble( + obs, fct, lambda x: x, estimator=estimator, backend=backend + ) + np.testing.assert_allclose(res, resw, rtol=1e-10) + + # test correctness + fct = np.array( + [ + [-0.03574194, 0.06873582, 0.03098684, 0.07316138, 0.08498165], + [-0.11957874, 0.26472238, -0.06324622, 0.43026451, -0.25640457], + [-1.31340831, -1.43722153, -1.01696021, -0.70790148, -1.20432392], + [1.26054027, 1.03477874, 0.61688454, 0.75305795, 1.19364529], + [0.63192933, 0.33521695, 0.20626017, 0.43158264, 0.69518928], + [0.83305233, 0.71965134, 0.96687378, 1.0000473, 0.82714425], + [0.0128655, 0.03849841, 0.02459106, -0.02618909, 0.18687008], + [-0.69399216, -0.59432627, -1.43629972, 0.01979857, -0.3286767], + [1.92958764, 2.2678255, 1.86036922, 1.84766384, 2.03331138], + [0.80847492, 1.11265193, 0.58995365, 1.04838184, 1.10439277], + ] + ) + + obs = np.array( + [ + 0.19640722, + -0.11300369, + -1.0400268, + 0.84306533, + 0.36572078, + 0.82487264, + 0.14088773, + -0.51936113, + 1.70706264, + 0.75985908, + ] + ) + + def v_func1(x): + return np.maximum(x, -1.0) + + def v_func2(x): + return np.minimum(x, 1.85) + + if estimator == "nrg": + res = np.mean( + np.float64( + _kernels.twgksuv_ensemble( + obs, fct, v_func1, estimator=estimator, backend=backend + ) + ) + ) + np.testing.assert_allclose(res, 0.01018774, rtol=1e-6) + + res = np.mean( + np.float64( + _kernels.twgksuv_ensemble( + obs, fct, v_func2, estimator=estimator, backend=backend + ) + ) + ) + np.testing.assert_allclose(res, 0.0089314, rtol=1e-6) + + elif estimator == "fair": + res = np.mean( + np.float64( + _kernels.twgksuv_ensemble( + obs, fct, v_func1, estimator=estimator, backend=backend + ) + ) + ) + np.testing.assert_allclose(res, 0.130842, rtol=1e-6) + + res = np.mean( + np.float64( + _kernels.twgksuv_ensemble( + obs, fct, v_func2, estimator=estimator, backend=backend + ) + ) + ) + np.testing.assert_allclose(res, 0.1283745, rtol=1e-6) + + +@pytest.mark.parametrize("backend", BACKENDS) +def test_twgksmv(backend): + if backend == "jax": + pytest.skip("Not implemented in jax backend") + obs = np.random.randn(N, N_VARS) + fct = np.expand_dims(obs, axis=-2) + np.random.randn(N, ENSEMBLE_SIZE, N_VARS) + + res0 = _kernels.gksmv_ensemble(obs, fct, backend=backend) + res = _kernels.twgksmv_ensemble(obs, fct, lambda x: x, backend=backend) + np.testing.assert_allclose(res, res0, rtol=1e-6) + + fct = np.array( + [[0.79546742, 0.4777960, 0.2164079], [0.02461368, 0.7584595, 0.3181810]] + ).T + obs = np.array([0.2743836, 0.8146400]) + + def v_func(x): + return np.maximum(x, 0.2) + + res = _kernels.twgksmv_ensemble(obs, fct, v_func, backend=backend) + np.testing.assert_allclose(res, 0.08671498, rtol=1e-6) + + def v_func(x): + return np.minimum(x, 1) + + res = _kernels.twgksmv_ensemble(obs, fct, v_func, backend=backend) + np.testing.assert_allclose(res, 0.1016436, rtol=1e-6) + + +@pytest.mark.parametrize("backend", BACKENDS) +def test_owgksuv(backend): + if backend == "jax": + pytest.skip("Not implemented in jax backend") + obs = np.random.randn(N) + mu = obs + np.random.randn(N) * 0.1 + sigma = abs(np.random.randn(N)) * 0.3 + fct = np.random.randn(N, ENSEMBLE_SIZE) * sigma[..., None] + mu[..., None] + + res = _kernels.gksuv_ensemble(obs, fct, backend=backend) + resw = _kernels.owgksuv_ensemble(obs, fct, lambda x: x * 0.0 + 1.0, backend=backend) + np.testing.assert_allclose(res, resw, rtol=1e-9) + + # test correctness + fct = np.array( + [ + [-0.03574194, 0.06873582, 0.03098684, 0.07316138, 0.08498165], + [-0.11957874, 0.26472238, -0.06324622, 0.43026451, -0.25640457], + [-1.31340831, -1.43722153, -1.01696021, -0.70790148, -1.20432392], + [1.26054027, 1.03477874, 0.61688454, 0.75305795, 1.19364529], + [0.63192933, 0.33521695, 0.20626017, 0.43158264, 0.69518928], + [0.83305233, 0.71965134, 0.96687378, 1.0000473, 0.82714425], + [0.0128655, 0.03849841, 0.02459106, -0.02618909, 0.18687008], + [-0.69399216, -0.59432627, -1.43629972, 0.01979857, -0.3286767], + [1.92958764, 2.2678255, 1.86036922, 1.84766384, 2.03331138], + [0.80847492, 1.11265193, 0.58995365, 1.04838184, 1.10439277], + ] + ) + + obs = np.array( + [ + 0.19640722, + -0.11300369, + -1.0400268, + 0.84306533, + 0.36572078, + 0.82487264, + 0.14088773, + -0.51936113, + 1.70706264, + 0.75985908, + ] + ) + + def w_func(x): + return (x > -1).astype(float) + + res = np.mean( + np.float64(_kernels.owgksuv_ensemble(obs, fct, w_func, backend=backend)) + ) + np.testing.assert_allclose(res, 0.01036335, rtol=1e-6) + + def w_func(x): + return (x < 1.85).astype(float) + + res = np.mean( + np.float64(_kernels.owgksuv_ensemble(obs, fct, w_func, backend=backend)) + ) + np.testing.assert_allclose(res, 0.008905213, rtol=1e-6) + + +@pytest.mark.parametrize("backend", BACKENDS) +def test_owgksmv(backend): + if backend == "jax": + pytest.skip("Not implemented in jax backend") + obs = np.random.randn(N, N_VARS) + fct = np.expand_dims(obs, axis=-2) + np.random.randn(N, ENSEMBLE_SIZE, N_VARS) + + res0 = _kernels.gksmv_ensemble(obs, fct, backend=backend) + res = _kernels.owgksmv_ensemble( + obs, fct, lambda x: backends[backend].mean(x) * 0.0 + 1.0, backend=backend + ) + np.testing.assert_allclose(res, res0, rtol=1e-6) + + fct = np.array( + [[0.79546742, 0.4777960, 0.2164079], [0.02461368, 0.7584595, 0.3181810]] + ).T + obs = np.array([0.2743836, 0.8146400]) + + def w_func(x): + return backends[backend].all(x > 0.2) + + res = _kernels.owgksmv_ensemble(obs, fct, w_func, backend=backend) + np.testing.assert_allclose(res, 0.03901075, rtol=1e-5) + + def w_func(x): + return backends[backend].all(x < 1.0) + + res = _kernels.owgksmv_ensemble(obs, fct, w_func, backend=backend) + np.testing.assert_allclose(res, 0.1016436, rtol=1e-6) + + +@pytest.mark.parametrize("backend", BACKENDS) +def test_vrgksuv(backend): + if backend == "jax": + pytest.skip("Not implemented in jax backend") + obs = np.random.randn(N) + mu = obs + np.random.randn(N) * 0.1 + sigma = abs(np.random.randn(N)) * 0.3 + fct = np.random.randn(N, ENSEMBLE_SIZE) * sigma[..., None] + mu[..., None] + + res = _kernels.gksuv_ensemble(obs, fct, backend=backend) + resw = _kernels.vrgksuv_ensemble(obs, fct, lambda x: x * 0.0 + 1.0, backend=backend) + np.testing.assert_allclose(res, resw, rtol=1e-10) + + # test correctness + fct = np.array( + [ + [-0.03574194, 0.06873582, 0.03098684, 0.07316138, 0.08498165], + [-0.11957874, 0.26472238, -0.06324622, 0.43026451, -0.25640457], + [-1.31340831, -1.43722153, -1.01696021, -0.70790148, -1.20432392], + [1.26054027, 1.03477874, 0.61688454, 0.75305795, 1.19364529], + [0.63192933, 0.33521695, 0.20626017, 0.43158264, 0.69518928], + [0.83305233, 0.71965134, 0.96687378, 1.0000473, 0.82714425], + [0.0128655, 0.03849841, 0.02459106, -0.02618909, 0.18687008], + [-0.69399216, -0.59432627, -1.43629972, 0.01979857, -0.3286767], + [1.92958764, 2.2678255, 1.86036922, 1.84766384, 2.03331138], + [0.80847492, 1.11265193, 0.58995365, 1.04838184, 1.10439277], + ] + ) + + obs = np.array( + [ + 0.19640722, + -0.11300369, + -1.0400268, + 0.84306533, + 0.36572078, + 0.82487264, + 0.14088773, + -0.51936113, + 1.70706264, + 0.75985908, + ] + ) + + def w_func(x): + return (x > -1).astype(float) + + res = np.mean( + np.float64(_kernels.vrgksuv_ensemble(obs, fct, w_func, backend=backend)) + ) + np.testing.assert_allclose(res, 0.01476682, rtol=1e-6) + + def w_func(x): + return (x < 1.85).astype(float) + + res = np.mean( + np.float64(_kernels.vrgksuv_ensemble(obs, fct, w_func, backend=backend)) + ) + np.testing.assert_allclose(res, 0.04011836, rtol=1e-6) + + +@pytest.mark.parametrize("backend", BACKENDS) +def test_vrgksmv(backend): + if backend == "jax": + pytest.skip("Not implemented in jax backend") + obs = np.random.randn(N, N_VARS) + fct = np.expand_dims(obs, axis=-2) + np.random.randn(N, ENSEMBLE_SIZE, N_VARS) + + res0 = _kernels.gksmv_ensemble(obs, fct, backend=backend) + res = _kernels.vrgksmv_ensemble( + obs, fct, lambda x: backends[backend].mean(x) * 0.0 + 1.0, backend=backend + ) + np.testing.assert_allclose(res, res0, rtol=1e-6) diff --git a/tests/test_wenergy.py b/tests/test_wenergy.py index de3cc0d..9177f54 100644 --- a/tests/test_wenergy.py +++ b/tests/test_wenergy.py @@ -75,23 +75,21 @@ def w_func(x): np.testing.assert_allclose(res, 0.3345418, rtol=1e-6) -# @pytest.mark.parametrize("backend", BACKENDS) -# def test_twenergy_score_correctness(backend): -# fct = np.array( -# [[0.79546742, 0.4777960, 0.2164079], [0.02461368, 0.7584595, 0.3181810]] -# ).T -# obs = np.array([0.2743836, 0.8146400]) - -# def v_func(x, t): -# return np.maximum(x, t) - -# t = 0.2 -# res = twenergy_score(obs, fct, v_func, (t,), backend=backend) -# np.testing.assert_allclose(res, 0.3116075, rtol=1e-6) - -# def v_func(x, t): -# return np.minimum(x, t) - -# t = 1 -# res = twenergy_score(obs, fct, v_func, (t,), backend=backend) -# np.testing.assert_allclose(res, 0.3345418, rtol=1e-6) +@pytest.mark.parametrize("backend", BACKENDS) +def test_twenergy_score_correctness(backend): + fct = np.array( + [[0.79546742, 0.4777960, 0.2164079], [0.02461368, 0.7584595, 0.3181810]] + ).T + obs = np.array([0.2743836, 0.8146400]) + + def v_func(x): + return np.maximum(x, 0.2) + + res = twenergy_score(obs, fct, v_func, backend=backend) + np.testing.assert_allclose(res, 0.3116075, rtol=1e-6) + + def v_func(x): + return np.minimum(x, 1) + + res = twenergy_score(obs, fct, v_func, backend=backend) + np.testing.assert_allclose(res, 0.3345418, rtol=1e-6)