pyro-ppl · martinjankowiak · May 1, 2020 · May 1, 2020 · May 1, 2020 · martinjankowiak
diff --git a/pyro/contrib/epidemiology/__init__.py b/pyro/contrib/epidemiology/__init__.py
@@ -4,13 +4,14 @@
 from .compartmental import CompartmentalModel
 from .distributions import infection_dist
 from .seir import OverdispersedSEIRModel, SimpleSEIRModel
-from .sir import OverdispersedSIRModel, SimpleSIRModel
+from .sir import OverdispersedSIRModel, SimpleSIRModel, SparseSIRModel
 
 __all__ = [
     "CompartmentalModel",
     "OverdispersedSEIRModel",
     "OverdispersedSIRModel",
     "SimpleSEIRModel",
     "SimpleSIRModel",
+    "SparseSIRModel",
     "infection_dist",
 ]
diff --git a/pyro/contrib/epidemiology/sir.py b/pyro/contrib/epidemiology/sir.py
@@ -22,7 +22,6 @@ class SimpleSIRModel(CompartmentalModel):
     :param int population: Total ``population = S + I + R``.
     :param float recovery_time: Mean recovery time (duration in state
         ``I``). Must be greater than 1.
-    :param iterable data: Time series of new observed infections.
     :param iterable data: Time series of new observed infections. Each time
         step is Binomial distributed between 0 and the number of ``S -> I``
         transitions. This allows false negative but no false positives.
@@ -136,7 +135,6 @@ class OverdispersedSIRModel(CompartmentalModel):
     :param int population: Total ``population = S + I + R``.
     :param float recovery_time: Mean recovery time (duration in state
         ``I``). Must be greater than 1.
-    :param iterable data: Time series of new observed infections.
     :param iterable data: Time series of new observed infections. Each time
         step is Binomial distributed between 0 and the number of ``S -> I``
         transitions. This allows false negative but no false positives.
@@ -212,3 +210,111 @@ def transition_bwd(self, params, prev, curr, t):
         pyro.sample("obs_{}".format(t),
                     dist.ExtendedBinomial(S2I, rho),
                     obs=self.data[t])
+
+
+class SparseSIRModel(CompartmentalModel):
+    """
+    Susceptible-Infected-Recovered model with sparsely observed infections.
+
+    To customize this model we recommend forking and editing this class.
+
+    This is a stochastic discrete-time discrete-state model with four
+    compartments: "S" for susceptible, "I" for infected, and "R" for
+    recovered individuals (the recovered individuals are implicit: ``R =
+    population - S - I``) with transitions ``S -> I -> R``.
+
+    This model allows observations of **cumulative** infections at uneven time
+    intervals. To preserve Markov structure (and hence tractable inference)
+    this model adds an auxiliary compartment ``O`` denoting the fully-observed
+    cumulative number of observations at each time point. At observed times
+    (when ``mask[t] == True``) ``O`` must exactly match the provided data;
+    between observed times ``O`` stochastically imputes the provided data.
+
+    :param int population: Total ``population = S + I + R``.
+    :param float recovery_time: Mean recovery time (duration in state
+        ``I``). Must be greater than 1.
+    :param iterable data: Time series of **cumulative** observed infections.
+        Whenever ``mask[t] == True``, ``data[t]`` corresponds to an
+        observation; otherwise ``data[t]`` can be arbitrary, e.g. NAN.
+    :param iterable mask: Boolean time series denoting whether an observation
+        is made at each time step. Should satisfy ``len(mask) == len(data)``.
+    """
+
+    def __init__(self, population, recovery_time, data, mask):
+        assert len(data) == len(mask)
+        duration = len(data)
+        compartments = ("S", "I", "O")  # O is auxiliary, R is implicit.
+        super().__init__(compartments, duration, population)
+
+        assert isinstance(recovery_time, float)
+        assert recovery_time > 1
+        self.recovery_time = recovery_time
+
+        self.data = data
+        self.mask = mask
+
+    series = ("S2I", "I2R", "S2O", "obs")
+    full_mass = [("R0", "rho")]
+
+    def global_model(self):
+        tau = self.recovery_time
+        R0 = pyro.sample("R0", dist.LogNormal(0., 1.))
+        rho = pyro.sample("rho", dist.Uniform(0, 1))
+        return R0, tau, rho
+
+    def initialize(self, params):
+        # Start with a single infection.
+        return {"S": self.population - 1, "I": 1, "O": 0}
+
+    def transition_fwd(self, params, state, t):
+        R0, tau, rho = params
+
+        # Sample flows between compartments.
+        S2I = pyro.sample("S2I_{}".format(t),
+                          infection_dist(individual_rate=R0 / tau,
+                                         num_susceptible=state["S"],
+                                         num_infectious=state["I"],
+                                         population=self.population))
+        I2R = pyro.sample("I2R_{}".format(t),
+                          dist.Binomial(state["I"], 1 / tau))
+        S2O = pyro.sample("S2O_{}".format(t),
+                          dist.ExtendedBinomial(S2I, rho))
+
+        # Update compartments with flows.
+        state["S"] = state["S"] - S2I
+        state["I"] = state["I"] + S2I - I2R
+        state["O"] = state["O"] + S2O
+
+        # Condition on cumulative observations.
+        mask_t = self.mask[t] if t < self.duration else False
+        data_t = self.data[t] if t < self.duration else None
+        pyro.sample("obs_{}".format(t),
+                    dist.Delta(state["O"]).mask(mask_t),
+                    obs=data_t)
+
+    def transition_bwd(self, params, prev, curr, t):
+        R0, tau, rho = params
+
+        # Reverse the flow computation.
+        S2I = prev["S"] - curr["S"]
+        I2R = prev["I"] - curr["I"] + S2I
+        S2O = curr["O"] - prev["O"]
+
+        # Condition on flows between compartments.
+        pyro.sample("S2I_{}".format(t),
+                    infection_dist(individual_rate=R0 / tau,
+                                   num_susceptible=prev["S"],
+                                   num_infectious=prev["I"],
+                                   population=self.population),
+                    obs=S2I)
+        pyro.sample("I2R_{}".format(t),
+                    dist.ExtendedBinomial(prev["I"], 1 / tau),
+                    obs=I2R)
+        pyro.sample("S2O_{}".format(t),
+                    dist.ExtendedBinomial(S2I, rho),
+                    obs=S2O)
+
+        # Condition on cumulative observations.
+        pyro.sample("obs_{}".format(t),
+                    dist.Delta(curr["O"]).mask(self.mask[t]),
+                    obs=self.data[t])
diff --git a/tests/contrib/epidemiology/test_sir.py b/tests/contrib/epidemiology/test_sir.py
@@ -1,9 +1,15 @@
 # Copyright Contributors to the Pyro project.
 # SPDX-License-Identifier: Apache-2.0
 
+import logging
+import math
+
 import pytest
+import torch
+
+from pyro.contrib.epidemiology import OverdispersedSIRModel, SimpleSIRModel, SparseSIRModel
 
-from pyro.contrib.epidemiology import OverdispersedSIRModel, SimpleSIRModel
+logger = logging.getLogger(__name__)
 
 
 @pytest.mark.parametrize("duration", [3, 7])
@@ -66,3 +72,43 @@ def test_overdispersed_smoke(duration, forecast, options):
     samples = model.predict(forecast=forecast)
     assert samples["S"].shape == (num_samples, duration + forecast)
     assert samples["I"].shape == (num_samples, duration + forecast)
+
+
+@pytest.mark.parametrize("duration", [4, 12])
+@pytest.mark.parametrize("forecast", [7])
+@pytest.mark.parametrize("options", [
+    {},
+    {"dct": 1.},
+    {"num_quant_bins": 8},
+], ids=str)
+def test_sparse_smoke(duration, forecast, options):
+    population = 100
+    recovery_time = 7.0
+
+    # Generate data.
+    data = [None] * duration
+    mask = torch.arange(duration) % 4 == 3
+    model = SparseSIRModel(population, recovery_time, data, mask)
+    for attempt in range(100):
+        data = model.generate({"R0": 1.5, "rho": 0.5})["obs"]
+        if data.sum():
+            break
+    assert data.sum() > 0, "failed to generate positive data"
+    assert (data[1:] >= data[:-1]).all()
+    data[~mask] = math.nan
+    logger.info("data:\n{}".format(data))
+
+    # Infer.
+    model = SparseSIRModel(population, recovery_time, data, mask)
+    num_samples = 5
+    model.fit(warmup_steps=1, num_samples=num_samples, max_tree_depth=2, **options)
+
+    # Predict and forecast.
+    samples = model.predict(forecast=forecast)
+    assert samples["S"].shape == (num_samples, duration + forecast)
+    assert samples["I"].shape == (num_samples, duration + forecast)
+    assert samples["O"].shape == (num_samples, duration + forecast)
+    assert (samples["O"][..., 1:] >= samples["O"][..., :-1]).all()
+    for O in samples["O"]:
+        logger.info("imputed:\n{}".format(O))
+        assert (O[:duration][mask] == data[mask]).all()