From a52338ce61a3f25e6728b4e2fa55e0582bf5fdf3 Mon Sep 17 00:00:00 2001
From: Philip Blair <peblairman@gmail.com>
Date: Wed, 7 Feb 2024 20:05:08 +0100
Subject: [PATCH] Port implementation of SimplexToOrderedTransform from numpyro
 (#3320)

---
 docs/source/distributions.rst                 |  7 ++
 pyro/distributions/transforms/__init__.py     |  2 +
 .../transforms/simplex_to_ordered.py          | 70 +++++++++++++++++++
 tests/distributions/test_transforms.py        | 56 ++++++++++++---
 4 files changed, 126 insertions(+), 9 deletions(-)
 create mode 100644 pyro/distributions/transforms/simplex_to_ordered.py

diff --git a/docs/source/distributions.rst b/docs/source/distributions.rst
index ccb51b088d..aee80f6cc4 100644
--- a/docs/source/distributions.rst
+++ b/docs/source/distributions.rst
@@ -536,6 +536,13 @@ PositivePowerTransform
     :undoc-members:
     :show-inheritance:
 
+SimplexToOrderedTransform
+-------------------------
+.. autoclass:: pyro.distributions.transforms.SimplexToOrderedTransform
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 SoftplusLowerCholeskyTransform
 ------------------------------
 .. autoclass:: pyro.distributions.transforms.SoftplusLowerCholeskyTransform
diff --git a/pyro/distributions/transforms/__init__.py b/pyro/distributions/transforms/__init__.py
index a5847ad31d..d2a2382974 100644
--- a/pyro/distributions/transforms/__init__.py
+++ b/pyro/distributions/transforms/__init__.py
@@ -70,6 +70,7 @@
 from .polynomial import Polynomial, polynomial
 from .power import PositivePowerTransform
 from .radial import ConditionalRadial, Radial, conditional_radial, radial
+from .simplex_to_ordered import SimplexToOrderedTransform
 from .softplus import SoftplusLowerCholeskyTransform, SoftplusTransform
 from .spline import ConditionalSpline, Spline, conditional_spline, spline
 from .spline_autoregressive import (
@@ -184,6 +185,7 @@ def iterated(repeats, base_fn, *args, **kwargs):
     "Polynomial",
     "PositivePowerTransform",
     "Radial",
+    "SimplexToOrderedTransform",
     "SoftplusLowerCholeskyTransform",
     "SoftplusTransform",
     "Spline",
diff --git a/pyro/distributions/transforms/simplex_to_ordered.py b/pyro/distributions/transforms/simplex_to_ordered.py
new file mode 100644
index 0000000000..26791f7ff9
--- /dev/null
+++ b/pyro/distributions/transforms/simplex_to_ordered.py
@@ -0,0 +1,70 @@
+# Copyright Contributors to the Pyro project.
+# SPDX-License-Identifier: Apache-2.0
+
+import torch
+from torch.distributions.transforms import Transform
+from torch.special import expit, logit
+
+from .. import constraints
+
+
+# This class is a port of https://num.pyro.ai/en/stable/_modules/numpyro/distributions/transforms.html#SimplexToOrderedTransform
+class SimplexToOrderedTransform(Transform):
+    """
+    Transform a simplex into an ordered vector (via difference in Logistic CDF between cutpoints)
+    Used in [1] to induce a prior on latent cutpoints via transforming ordered category probabilities.
+
+    :param anchor_point: Anchor point is a nuisance parameter to improve the identifiability of the transform.
+        For simplicity, we assume it is a scalar value, but it is broadcastable x.shape[:-1].
+        For more details please refer to Section 2.2 in [1]
+
+    **References:**
+
+    1. *Ordinal Regression Case Study, section 2.2*,
+       M. Betancourt, https://betanalpha.github.io/assets/case_studies/ordinal_regression.html
+
+    """
+
+    domain = constraints.simplex
+    codomain = constraints.ordered_vector
+
+    def __init__(self, anchor_point=None):
+        super().__init__()
+        self.anchor_point = (
+            anchor_point if anchor_point is not None else torch.tensor(0.0)
+        )
+
+    def _call(self, x):
+        s = torch.cumsum(x[..., :-1], axis=-1)
+        y = logit(s) + torch.unsqueeze(self.anchor_point, -1)
+        return y
+
+    def _inverse(self, y):
+        y = y - torch.unsqueeze(self.anchor_point, -1)
+        s = expit(y)
+        # x0 = s0, x1 = s1 - s0, x2 = s2 - s1,..., xn = 1 - s[n-1]
+        # add two boundary points 0 and 1
+        s = torch.concat(
+            [torch.zeros_like(s)[..., :1], s, torch.ones_like(s)[..., :1]], dim=-1
+        )
+        x = s[..., 1:] - s[..., :-1]
+        return x
+
+    def log_abs_det_jacobian(self, x, y):
+        # |dp/dc| = |dx/dy| = prod(ds/dy) = prod(expit'(y))
+        # we know log derivative of expit(y) is `-softplus(y) - softplus(-y)`
+        J_logdet = (
+            torch.nn.functional.softplus(y) + torch.nn.functional.softplus(-y)
+        ).sum(-1)
+        return J_logdet
+
+    def __eq__(self, other):
+        if not isinstance(other, SimplexToOrderedTransform):
+            return False
+        return torch.all(torch.equal(self.anchor_point, other.anchor_point))
+
+    def forward_shape(self, shape):
+        return shape[:-1] + (shape[-1] - 1,)
+
+    def inverse_shape(self, shape):
+        return shape[:-1] + (shape[-1] + 1,)
diff --git a/tests/distributions/test_transforms.py b/tests/distributions/test_transforms.py
index a339f7ea79..67f4c5167b 100644
--- a/tests/distributions/test_transforms.py
+++ b/tests/distributions/test_transforms.py
@@ -112,10 +112,15 @@ def nonzero(x):
             assert diag_sum == float(input_dim)
             assert lower_sum == float(0.0)
 
-    def _test_inverse(self, shape, transform):
+    def _test_inverse(self, shape, transform, base_dist_type="normal"):
         # Test g^{-1}(g(x)) = x
         # NOTE: Calling _call and _inverse directly bypasses caching
-        base_dist = dist.Normal(torch.zeros(shape), torch.ones(shape))
+        if base_dist_type == "dirichlet":
+            base_dist = dist.Dirichlet(torch.ones(shape) * 10.0)
+        elif base_dist_type == "normal":
+            base_dist = dist.Normal(torch.zeros(shape), torch.ones(shape))
+        else:
+            raise ValueError(f"Unknown base distribution type: {base_dist_type}")
         x_true = base_dist.sample(torch.Size([10]))
         y = transform._call(x_true)
         x_calculated = transform._inverse(y)
@@ -132,8 +137,13 @@ def _test_inverse(self, shape, transform):
             J_2 = transform.log_abs_det_jacobian(x_calculated, y)
             assert (J_1 - J_2).abs().max().item() < self.delta
 
-    def _test_shape(self, base_shape, transform):
-        base_dist = dist.Normal(torch.zeros(base_shape), torch.ones(base_shape))
+    def _test_shape(self, base_shape, transform, base_dist_type="normal"):
+        if base_dist_type == "dirichlet":
+            base_dist = dist.Dirichlet(torch.ones(base_shape) * 10.0)
+        elif base_dist_type == "normal":
+            base_dist = dist.Normal(torch.zeros(base_shape), torch.ones(base_shape))
+        else:
+            raise ValueError(f"Unknown base distribution type: {base_dist_type}")
         sample = dist.TransformedDistribution(base_dist, [transform]).sample()
         assert sample.shape == base_shape
 
@@ -148,7 +158,9 @@ def _test_shape(self, base_shape, transform):
         assert transform.inverse_shape(output_event_shape) == input_event_shape
         assert transform.inverse_shape(output_shape) == base_shape
 
-    def _test_autodiff(self, input_dim, transform, inverse=False):
+    def _test_autodiff(
+        self, input_dim, transform, inverse=False, base_dist_type="normal"
+    ):
         """
         This method essentially tests whether autodiff will not throw any errors
         when you're doing maximum-likelihood learning with the transform. Many
@@ -159,7 +171,12 @@ def _test_autodiff(self, input_dim, transform, inverse=False):
         if inverse:
             transform = transform.inv
 
-        base_dist = dist.Normal(torch.zeros(input_dim), torch.ones(input_dim))
+        if base_dist_type == "dirichlet":
+            base_dist = dist.Dirichlet(torch.ones(input_dim) * 10.0)
+        elif base_dist_type == "normal":
+            base_dist = dist.Normal(torch.zeros(input_dim), torch.ones(input_dim))
+        else:
+            raise ValueError(f"Unknown base distribution type: {base_dist_type}")
         flow_dist = dist.TransformedDistribution(base_dist, [transform])
         optimizer = torch.optim.Adam(temp_transform.parameters())
         x = torch.rand(1, input_dim)
@@ -177,6 +194,7 @@ def _test(
         inverse=True,
         autodiff=True,
         event_dim=1,
+        base_dist_type="normal",
     ):
         for event_shape in [(2,), (5,)]:
             if event_dim > 1:
@@ -186,11 +204,15 @@ def _test(
             )
 
             if inverse:
-                self._test_inverse(event_shape, transform)
+                self._test_inverse(
+                    event_shape, transform, base_dist_type=base_dist_type
+                )
             if shape:
                 for shape in [(3,), (3, 4), (3, 4, 5)]:
                     base_shape = shape + event_shape
-                    self._test_shape(base_shape, transform)
+                    self._test_shape(
+                        base_shape, transform, base_dist_type=base_dist_type
+                    )
             if jacobian and transform.bijective:
                 if event_dim > 1:
                     transform = Flatten(transform, event_shape)
@@ -198,7 +220,10 @@ def _test(
             if isinstance(transform, dist.TransformModule) and autodiff:
                 # If the function doesn't have an explicit inverse, then use the forward op for autodiff
                 self._test_autodiff(
-                    reduce(operator.mul, event_shape, 1), transform, inverse=not inverse
+                    reduce(operator.mul, event_shape, 1),
+                    transform,
+                    inverse=not inverse,
+                    base_dist_type=base_dist_type,
                 )
 
     def _test_conditional(
@@ -376,6 +401,19 @@ def test_polynomial(self):
     def test_radial(self):
         self._test(T.radial, inverse=False)
 
+    def test_simplex_to_ordered(self):
+        self._test(
+            lambda event_shape: T.SimplexToOrderedTransform(),
+            shape=False,
+            autodiff=False,
+            base_dist_type="dirichlet",
+        )
+        # Unique shape behavior:
+        assert T.SimplexToOrderedTransform().forward_shape((4, 3, 3)) == (4, 3, 2)
+        assert T.SimplexToOrderedTransform().forward_shape((2,)) == (1,)
+        assert T.SimplexToOrderedTransform().inverse_shape((2,)) == (3,)
+        assert T.SimplexToOrderedTransform().inverse_shape((4, 3, 3)) == (4, 3, 4)
+
     def test_spline(self):
         for order in ["linear", "quadratic"]:
             self._test(partial(T.spline, order=order))