pyro-ppl · fritzo · Jul 27, 2018 · Jul 27, 2018 · Jul 27, 2018 · Jul 27, 2018
diff --git a/.travis.yml b/.travis.yml
@@ -9,9 +9,9 @@ env:
 install:
     - pip install -U pip
     - if [[ $TRAVIS_PYTHON_VERSION == 2.7 ]]; then
-          pip install http://download.pytorch.org/whl/cpu/torch-0.4.0-cp27-cp27mu-linux_x86_64.whl;
+          pip install http://download.pytorch.org/whl/cpu/torch-0.4.1-cp27-cp27mu-linux_x86_64.whl;
       else
-          pip install http://download.pytorch.org/whl/cpu/torch-0.4.0-cp35-cp35m-linux_x86_64.whl;
+          pip install http://download.pytorch.org/whl/cpu/torch-0.4.1-cp35-cp35m-linux_x86_64.whl;
       fi
     - pip install .[test]
     - pip freeze

diff --git a/Makefile b/Makefile
@@ -57,6 +57,12 @@ test-cuda: lint FORCE
 	CUDA_TEST=1 PYRO_TENSOR_TYPE=torch.cuda.DoubleTensor pytest -vx -n 4 --stage unit
 	CUDA_TEST=1 pytest -vx -n 4 tests/test_examples.py::test_cuda
 
+test-jit: FORCE
+	@echo See jit.log
+	pytest -v -n auto --tb=short --runxfail tests/infer/test_jit.py tests/test_examples.py::test_jit | tee jit.log
+	pytest -v -n auto --tb=short --runxfail tests/infer/mcmc/test_hmc.py tests/infer/mcmc/test_nuts.py \
+		-k JIT=True | tee -a jit.log
+
 clean: FORCE
 	git clean -dfx -e pyro-egg.info
 

diff --git a/docs/source/primitives.rst b/docs/source/primitives.rst
@@ -20,4 +20,4 @@ Primitives
 .. autofunction:: pyro.validation_enabled
 .. autofunction:: pyro.enable_validation
 
-.. autofunction:: pyro.ops.jit.compile
+.. autofunction:: pyro.ops.jit.trace
diff --git a/examples/baseball.py b/examples/baseball.py
@@ -218,7 +218,7 @@ def main(args):
     baseball_dataset = pd.read_csv(DATA_URL, "\t")
     train, _, player_names = train_test_split(baseball_dataset)
     at_bats, hits = train[:, 0], train[:, 1]
-    nuts_kernel = NUTS(conditioned_model, adapt_step_size=True)
+    nuts_kernel = NUTS(conditioned_model, adapt_step_size=True, jit_compile=args.jit)
     logging.info("Original Dataset:")
     logging.info(baseball_dataset)
 
@@ -270,5 +270,7 @@ def main(args):
     parser.add_argument("-n", "--num-samples", nargs="?", default=1200, type=int)
     parser.add_argument("--warmup-steps", nargs='?', default=300, type=int)
     parser.add_argument("--rng_seed", nargs='?', default=0, type=int)
+    parser.add_argument('--jit', action='store_true', default=False,
+                        help='use PyTorch jit')
     args = parser.parse_args()
     main(args)
diff --git a/examples/eight_schools/mcmc.py b/examples/eight_schools/mcmc.py
@@ -34,7 +34,7 @@ def conditioned_model(model, sigma, y):
 
 
 def main(args):
-    nuts_kernel = NUTS(conditioned_model, adapt_step_size=True)
+    nuts_kernel = NUTS(conditioned_model, adapt_step_size=True, jit_compile=args.jit)
     posterior = MCMC(nuts_kernel, num_samples=args.num_samples, warmup_steps=args.warmup_steps)\
         .run(model, data.sigma, data.y)
     marginal_mu_tau = EmpiricalMarginal(posterior, sites=["mu", "tau"])\
@@ -54,6 +54,7 @@ def main(args):
                         help='number of MCMC samples (default: 1000)')
     parser.add_argument('--warmup-steps', type=int, default=1000,
                         help='number of MCMC samples for warmup (default: 1000)')
+    parser.add_argument('--jit', action='store_true', default=False)
     args = parser.parse_args()
 
     main(args)
diff --git a/pyro/contrib/gp/models/gplvm.py b/pyro/contrib/gp/models/gplvm.py
@@ -1,14 +1,14 @@
 from __future__ import absolute_import, division, print_function
 
-import torch
 from torch.distributions import constraints
 from torch.nn import Parameter
 
 import pyro
-from pyro.contrib.gp.util import Parameterized
 import pyro.distributions as dist
 import pyro.infer as infer
 import pyro.optim as optim
+from pyro.contrib.gp.util import Parameterized
+from pyro.distributions.util import eye_like
 from pyro.params import param_with_module_name
 
 
@@ -74,7 +74,7 @@ def __init__(self, base_model, name="GPLVM"):
 
         C = self.X_loc.shape[1]
         X_scale_tril_shape = self.X_loc.shape + (C,)
-        Id = torch.eye(C, out=self.X_loc.new_empty(C, C))
+        Id = eye_like(self.X_loc, C)
         X_scale_tril = Id.expand(X_scale_tril_shape)
         self.X_scale_tril = Parameter(X_scale_tril)
         self.set_constraint("X_scale_tril", constraints.lower_cholesky)
@@ -87,7 +87,7 @@ def model(self):
         # sample X from unit multivariate normal distribution
         zero_loc = self.X_loc.new_zeros(self.X_loc.shape)
         C = self.X_loc.shape[1]
-        Id = torch.eye(C, out=self.X_loc.new_empty(C, C))
+        Id = eye_like(self.X_loc, C)
         X_name = param_with_module_name(self.name, "X")
         X = pyro.sample(X_name, dist.MultivariateNormal(zero_loc, scale_tril=Id)
                                     .independent(zero_loc.dim()-1))

diff --git a/pyro/contrib/gp/models/vgp.py b/pyro/contrib/gp/models/vgp.py
@@ -8,6 +8,7 @@
 import pyro.distributions as dist
 from pyro.contrib.gp.models.model import GPModel
 from pyro.contrib.gp.util import conditional
+from pyro.distributions.util import eye_like
 from pyro.params import param_with_module_name
 
 
@@ -74,7 +75,7 @@ def __init__(self, X, y, kernel, likelihood, mean_function=None,
         self.f_loc = Parameter(f_loc)
 
         f_scale_tril_shape = self.latent_shape + (N, N)
-        Id = torch.eye(N, out=self.X.new_empty(N, N))
+        Id = eye_like(self.X, N)
         f_scale_tril = Id.expand(f_scale_tril_shape)
         self.f_scale_tril = Parameter(f_scale_tril)
         self.set_constraint("f_scale_tril", constraints.lower_cholesky)
@@ -96,7 +97,7 @@ def model(self):
         f_name = param_with_module_name(self.name, "f")
 
         if self.whiten:
-            Id = torch.eye(N, out=self.X.new_empty(N, N))
+            Id = eye_like(self.X, N)
             pyro.sample(f_name,
                         dist.MultivariateNormal(zero_loc, scale_tril=Id)
                             .independent(zero_loc.dim() - 1))

diff --git a/pyro/contrib/gp/models/vsgp.py b/pyro/contrib/gp/models/vsgp.py
@@ -9,6 +9,7 @@
 import pyro.poutine as poutine
 from pyro.contrib.gp.models.model import GPModel
 from pyro.contrib.gp.util import conditional
+from pyro.distributions.util import eye_like
 from pyro.params import param_with_module_name
 
 
@@ -98,7 +99,7 @@ def __init__(self, X, y, kernel, Xu, likelihood, mean_function=None,
         self.u_loc = Parameter(u_loc)
 
         u_scale_tril_shape = self.latent_shape + (M, M)
-        Id = torch.eye(M, out=self.Xu.new_empty(M, M))
+        Id = eye_like(self.Xu, M)
         u_scale_tril = Id.expand(u_scale_tril_shape)
         self.u_scale_tril = Parameter(u_scale_tril)
         self.set_constraint("u_scale_tril", constraints.lower_cholesky)
@@ -120,7 +121,7 @@ def model(self):
         zero_loc = Xu.new_zeros(u_loc.shape)
         u_name = param_with_module_name(self.name, "u")
         if self.whiten:
-            Id = torch.eye(M, out=Xu.new_empty(M, M))
+            Id = eye_like(Xu, M)
             pyro.sample(u_name,
                         dist.MultivariateNormal(zero_loc, scale_tril=Id)
                             .independent(zero_loc.dim() - 1))

diff --git a/pyro/distributions/lowrank_mvn.py b/pyro/distributions/lowrank_mvn.py
@@ -7,6 +7,7 @@
 from torch.distributions.utils import lazy_property
 
 from pyro.distributions.torch_distribution import IndependentConstraint, TorchDistribution
+from pyro.distributions.util import eye_like
 
 
 def _matrix_triangular_solve_compat(b, A, upper=True):
@@ -84,7 +85,7 @@ def scale_tril(self):
         A = self.covariance_matrix_W_term / Dsqrt
         At_A = A.t().matmul(A)
         N = A.shape[1]
-        Id = torch.eye(N, N, out=A.new_empty(N, N))
+        Id = eye_like(A, N)
         K = Id + At_A
         L = K.potrf(upper=False)
         return Dsqrt.unsqueeze(1) * L
@@ -111,7 +112,7 @@ def _compute_logdet_and_mahalanobis(self, D, W, y, trace_term=0):
         """
         W_Dinv = W / D
         M = W.shape[0]
-        Id = torch.eye(M, M, out=W.new_empty(M, M))
+        Id = eye_like(W, M)
         K = Id + W_Dinv.matmul(W.t())
         L = K.potrf(upper=False)
         if y.dim() == 1:

diff --git a/pyro/distributions/omt_mvn.py b/pyro/distributions/omt_mvn.py
@@ -6,7 +6,7 @@
 from torch.distributions import constraints
 
 from pyro.distributions.torch import MultivariateNormal
-from pyro.distributions.util import sum_leftmost
+from pyro.distributions.util import eye_like, sum_leftmost
 
 
 class OMTMultivariateNormal(MultivariateNormal):
@@ -51,7 +51,7 @@ def backward(ctx, grad_output):
         g = grad_output
         loc_grad = sum_leftmost(grad_output, -1)
 
-        identity = torch.eye(dim, out=torch.tensor(g.new_empty(dim, dim)))
+        identity = eye_like(g, dim)
         R_inv = torch.trtrs(identity, L.t(), transpose=False, upper=True)[0]
 
         z_ja = z.unsqueeze(-1)

diff --git a/pyro/distributions/torch.py b/pyro/distributions/torch.py
@@ -4,6 +4,7 @@
 from torch.distributions import constraints
 
 from pyro.distributions.torch_distribution import IndependentConstraint, TorchDistributionMixin
+from pyro.distributions.util import eye_like
 
 
 class Bernoulli(torch.distributions.Bernoulli, TorchDistributionMixin):
@@ -39,6 +40,7 @@ def expand(self, batch_shape):
 
 
 class Categorical(torch.distributions.Categorical, TorchDistributionMixin):
+
     def expand(self, batch_shape):
         try:
             return super(Categorical, self).expand(batch_shape)
@@ -252,8 +254,7 @@ def expand(self, batch_shape):
 
     def enumerate_support(self, expand=True):
         n = self.event_shape[0]
-        values = self._new((n, n))
-        torch.eye(n, out=values)
+        values = eye_like(self._categorical._param, n)
         values = values.view((n,) + (1,) * len(self.batch_shape) + (n,))
         if expand:
             values = values.expand((n,) + self.batch_shape + (n,))

diff --git a/pyro/distributions/torch_patch.py b/pyro/distributions/torch_patch.py
@@ -45,7 +45,6 @@ def _torch_dirichlet_grad(x, concentration, total):
     return unpatched_fn(x, concentration, total)
 
 
-@_patch('torch.einsum')
 def _einsum(equation, operands):
     # work around torch.einsum performance issues
     # see https://github.com/pytorch/pytorch/issues/10661
@@ -56,15 +55,6 @@ def _einsum(equation, operands):
         y, x = operands
         return (x.unsqueeze(1) * y).sum(0).transpose(0, 1)
 
-    # work around torch.einsum's limitation to 26 letters
-    symbols = sorted(set(equation) - set(',->'))
-    rename = dict(zip(symbols, 'abcdefghijklmnopqrstuvwxyz'))
-    equation = ''.join(rename.get(s, s) for s in equation)
-
-    # this workaround can be deleted after this issue is fixed in release:
-    # https://github.com/pytorch/pytorch/issues/7763
-    operands = [t.clone() for t in operands]
-
     return _einsum._pyro_unpatched(equation, operands)
 
 

diff --git a/pyro/distributions/util.py b/pyro/distributions/util.py
@@ -3,11 +3,15 @@
 import numbers
 from contextlib import contextmanager
 
+import torch
 import torch.distributions as torch_dist
+from torch import logsumexp
 from torch.distributions.utils import broadcast_all
 
 _VALIDATION_ENABLED = False
 
+log_sum_exp = logsumexp  # DEPRECATED
+
 
 def copy_docs_from(source_class, full_text=False):
     """
@@ -52,15 +56,23 @@ def is_identically_zero(x):
     Check if argument is exactly the number zero. True for the number zero;
     false for other numbers; false for :class:`~torch.Tensor`s.
     """
-    return isinstance(x, numbers.Number) and x == 0
+    if isinstance(x, numbers.Number):
+        return x == 0
+    elif isinstance(x, torch.Tensor) and x.dtype == torch.int64 and not x.shape:
+        return x.item() == 0
+    return False
 
 
 def is_identically_one(x):
     """
     Check if argument is exactly the number one. True for the number one;
     false for other numbers; false for :class:`~torch.Tensor`s.
     """
-    return isinstance(x, numbers.Number) and x == 1
+    if isinstance(x, numbers.Number):
+        return x == 1
+    elif isinstance(x, torch.Tensor) and x.dtype == torch.int64 and not x.shape:
+        return x.item() == 1
+    return False
 
 
 def broadcast_shape(*shapes, **kwargs):
@@ -178,27 +190,6 @@ def eye_like(value, m, n=None):
     return eye
 
 
-try:
-    from torch import logsumexp  # for pytorch 0.4.1 and later
-except ImportError:
-    def logsumexp(tensor, dim=-1, keepdim=False):
-        """
-        Numerically stable implementation for the `LogSumExp` operation. The
-        summing is done along the dimension specified by ``dim``.
-
-        :param torch.Tensor tensor: Input tensor.
-        :param dim: Dimension to be summed out.
-        :param keepdim: Whether to retain the dimension
-            that is summed out.
-        """
-        max_val = tensor.max(dim, keepdim=True)[0]
-        log_sum_exp = max_val + (tensor - max_val).exp().sum(dim=dim, keepdim=True).log()
-        return log_sum_exp if keepdim else log_sum_exp.squeeze(dim)
-
-
-log_sum_exp = logsumexp  # DEPRECATED
-
-
 def enable_validation(is_validate):
     global _VALIDATION_ENABLED
     _VALIDATION_ENABLED = is_validate

diff --git a/pyro/infer/mcmc/hmc.py b/pyro/infer/mcmc/hmc.py
@@ -50,6 +50,9 @@ class HMC(TraceKernel):
     :param int max_iarange_nesting: Optional bound on max number of nested
         :func:`pyro.iarange` contexts. This is required if model contains
         discrete sample sites that can be enumerated over in parallel.
+    :param bool jit_compile: Optional parameter denoting whether to use
+        the PyTorch JIT to trace the log density computation, and use this
+        optimized executable trace in the integrator.
     :param bool experimental_use_einsum: Whether to use an einsum operation
         to evaluate log pdf for the model trace. No-op unless the trace has
         discrete sample sites. This flag is experimental and will most likely
@@ -83,6 +86,7 @@ def __init__(self,
                  adapt_step_size=False,
                  transforms=None,
                  max_iarange_nesting=float("inf"),
+                 jit_compile=False,
                  experimental_use_einsum=False):
         # Wrap model in `poutine.enum` to enumerate over discrete latent sites.
         # No-op if model does not have any discrete latents.
@@ -99,6 +103,7 @@ def __init__(self,
             self.trajectory_length = 2 * math.pi  # from Stan
         self.num_steps = max(1, int(self.trajectory_length / self.step_size))
         self.adapt_step_size = adapt_step_size
+        self._jit_compile = jit_compile
         self.use_einsum = experimental_use_einsum
         self._target_accept_prob = 0.8  # from Stan
 
@@ -129,6 +134,8 @@ def _kinetic_energy(self, r):
         return 0.5 * sum(x.pow(2).sum() for x in r.values())
 
     def _potential_energy(self, z):
+        if self._jit_compile:
+            return self._potential_energy_jit(z)
         # Since the model is specified in the constrained space, transform the
         # unconstrained R.V.s `z` to the constrained space.
         z_constrained = z.copy()
@@ -141,6 +148,32 @@ def _potential_energy(self, z):
             potential_energy += transform.log_abs_det_jacobian(z_constrained[name], z[name]).sum()
         return potential_energy
 
+    def _potential_energy_jit(self, z):
+        names, vals = zip(*sorted(z.items()))
+        if self._compiled_potential_fn:
+            return self._compiled_potential_fn(*vals)
+
+        @torch.jit.trace(*vals, optimize=True)
+        def wrapped(*zi):
+            z_constrained = list(zi)
+            # transform to constrained space.
+            for i, name in enumerate(names):
+                if name in self.transforms:
+                    transform = self.transforms[name]
+                    z_constrained[i] = transform.inv(z_constrained[i])
+            z_constrained = dict(zip(names, z_constrained))
+            trace = self._get_trace(z_constrained)
+            potential_energy = -self._compute_trace_log_prob(trace)
+            # adjust by the jacobian for this transformation.
+            for i, name in enumerate(names):
+                if name in self.transforms:
+                    transform = self.transforms[name]
+                    potential_energy += transform.log_abs_det_jacobian(z_constrained[name], zi[i]).sum()
+            return potential_energy
+
+        self._compiled_potential_fn = wrapped
+        return self._compiled_potential_fn(*vals)
+
     def _energy(self, z, r):
         return self._kinetic_energy(r) + self._potential_energy(z)
 
@@ -149,6 +182,7 @@ def _reset(self):
         self._accept_cnt = 0
         self._r_dist = OrderedDict()
         self._args = None
+        self._compiled_potential_fn = None
         self._kwargs = None
         self._prototype_trace = None
         self._adapt_phase = False