PennyLaneAI · josh146 · Mar 26, 2021 · Mar 9, 2021 · Mar 9, 2021 · Mar 9, 2021
diff --git a/.github/CHANGELOG.md b/.github/CHANGELOG.md
@@ -2,6 +2,39 @@
 
 <h3>New features since last release</h3>
 
+* Computing second derivatives and Hessians of QNodes is now supported when
+  using the PyTorch interface.
+  [(#1129)](https://github.com/PennyLaneAI/pennylane/pull/1129/files)
+
+  Hessians are computed using the parameter-shift rule, and can be
+  evaluated on both hardware and simulator devices.
+
+  ```python
+  from torch.autograd.functional import jacobian, hessian
+  dev = qml.device('default.qubit', wires=1)
+
+  @qml.qnode(dev, interface='torch', diff_method="parameter-shift")
+  def circuit(p):
+      qml.RY(p[0], wires=0)
+      qml.RX(p[1], wires=0)
+      return qml.expval(qml.PauliZ(0))
+
+  x = torch.tensor([1.0, 2.0], requires_grad=True)
+  ```
+
+  ```python
+  >>> circuit(x)
+  tensor([0.3876, 0.6124], dtype=torch.float64, grad_fn=<SqueezeBackward0>)
+  >>> jacobian(circuit, x)
+  tensor([[ 0.1751, -0.2456],
+          [-0.1751,  0.2456]], grad_fn=<ViewBackward>)
+  >>> hessian(circuit, x)
+  tensor([[[ 0.1124,  0.3826],
+           [ 0.3826,  0.1124]],
+          [[-0.1124, -0.3826],
+           [-0.3826, -0.1124]]])
+  ```
+
 * Adds a new optimizer `qml.ShotAdaptiveOptimizer`, a gradient-descent optimizer where
   the shot rate is adaptively calculated using the variances of the parameter-shift gradient.
   [(#1139)](https://github.com/PennyLaneAI/pennylane/pull/1139)

diff --git a/pennylane/interfaces/torch.py b/pennylane/interfaces/torch.py
@@ -76,6 +76,74 @@ def forward(ctx, input_kwargs, *input_):
         if hasattr(res, "numpy"):
             res = res.numpy()
 
+        ctx.saved_grad_matrices = {}
+
+        def _evaluate_grad_matrix(grad_matrix_fn):
+            """Convenience function for generating gradient matrices
+            for the given parameter values.
+
+            This function serves two purposes:
+
+            * Avoids duplicating logic surrounding parameter unwrapping/wrapping
+
+            * Takes advantage of closure, to cache computed gradient matrices via
+              the ctx.saved_grad_matrices attribute, to avoid gradient matrices being
+              computed multiple redundant times.
+
+              This is particularly useful when differentiating vector-valued QNodes.
+              Because PyTorch requests the vector-GradMatrix product,
+              and *not* the full GradMatrix, differentiating vector-valued
+              functions will result in multiple backward passes.
+            """
+            if grad_matrix_fn in ctx.saved_grad_matrices:
+                return ctx.saved_grad_matrices[grad_matrix_fn]
+
+            tape.set_parameters(ctx.all_params_unwrapped, trainable_only=False)
+            grad_matrix = getattr(tape, grad_matrix_fn)(
+                device, params=ctx.args, **tape.jacobian_options
+            )
+            tape.set_parameters(ctx.all_params, trainable_only=False)
+
+            grad_matrix = torch.as_tensor(torch.from_numpy(grad_matrix), dtype=tape.dtype)
+            ctx.saved_grad_matrices[grad_matrix_fn] = grad_matrix
+
+            return grad_matrix
+
+        class _Jacobian(torch.autograd.Function):
+            @staticmethod
+            def forward(ctx_, parent_ctx, *input_):
+                """Implements the forward pass QNode Jacobian evaluation"""
+                ctx_.dy = parent_ctx.dy
+                ctx_.save_for_backward(*input_)
+                jacobian = _evaluate_grad_matrix("jacobian")
+                return jacobian
+
+            @staticmethod
+            def backward(ctx_, ddy):  # pragma: no cover
+                """Implements the backward pass QNode vector-Hessian product"""
+                hessian = _evaluate_grad_matrix("hessian")
+
+                if torch.squeeze(ddy).ndim > 1:
+                    vhp = ctx_.dy.view(1, -1) @ ddy @ hessian @ ctx_.dy.view(-1, 1)
+                else:
+                    vhp = ddy @ hessian
+
+                vhp = torch.unbind(vhp.view(-1))
+
+                grad_input = []
+
+                # match the type and device of the input tensors
+                for i, j in zip(vhp, ctx_.saved_tensors):
+                    res = torch.as_tensor(i, dtype=tape.dtype)
+                    if j.is_cuda:  # pragma: no cover
+                        cuda_device = j.get_device()
+                        res = torch.as_tensor(res, device=cuda_device)
+                    grad_input.append(res)
+
+                return (None,) + tuple(grad_input)
+
+        ctx.jacobian = _Jacobian
+
         # if any input tensor uses the GPU, the output should as well
         for i in input_:
             if isinstance(i, torch.Tensor):
@@ -94,30 +162,12 @@ def forward(ctx, input_kwargs, *input_):
         return torch.as_tensor(torch.from_numpy(res), dtype=tape.dtype)
 
     @staticmethod
-    def backward(ctx, grad_output):  # pragma: no cover
+    def backward(ctx, dy):  # pragma: no cover
         """Implements the backwards pass QNode vector-Jacobian product"""
-        tape = ctx.kwargs["tape"]
-        device = ctx.kwargs["device"]
-
-        tape.set_parameters(ctx.all_params_unwrapped, trainable_only=False)
-        jacobian = tape.jacobian(device, params=ctx.args, **tape.jacobian_options)
-        tape.set_parameters(ctx.all_params, trainable_only=False)
-
-        jacobian = torch.as_tensor(jacobian, dtype=grad_output.dtype).to(grad_output)
-
-        vjp = grad_output.view(1, -1) @ jacobian
-        grad_input_list = torch.unbind(vjp.flatten())
-        grad_input = []
-
-        # match the type and device of the input tensors
-        for i, j in zip(grad_input_list, ctx.saved_tensors):
-            res = torch.as_tensor(i, dtype=tape.dtype)
-            if j.is_cuda:  # pragma: no cover
-                cuda_device = j.get_device()
-                res = torch.as_tensor(res, device=cuda_device)
-            grad_input.append(res)
-
-        return (None,) + tuple(grad_input)
+        ctx.dy = dy
+        vjp = dy.view(1, -1) @ ctx.jacobian.apply(ctx, *ctx.saved_tensors)
+        vjp = torch.unbind(vjp.view(-1))
+        return (None,) + tuple(vjp)
 
 
 class TorchInterface(AnnotatedQueue):

diff --git a/pennylane/tape/jacobian_tape.py b/pennylane/tape/jacobian_tape.py
@@ -91,6 +91,7 @@ class JacobianTape(QuantumTape):
     def __init__(self, name=None, do_queue=True):
         super().__init__(name=name, do_queue=do_queue)
         self.jacobian_options = {}
+        self.hessian_options = {}
 
     def _grad_method(self, idx, use_graph=True, default_method="F"):
         """Determine the correct partial derivative computation method for each gate parameter.
@@ -730,11 +731,19 @@ def hessian(self, device, params=None, **options):
 
             if hessian is None:
                 # create the Hessian matrix
-                hessian = np.zeros((len(params), len(params)), dtype=float)
+                if self.output_dim is not None:
+                    hessian = np.zeros(
+                        (len(params), len(params), np.prod(self.output_dim)), dtype=float
+                    )
+                else:
+                    hessian = np.zeros((len(params), len(params)), dtype=float)
 
             if i == j:
                 hessian[i, i] = g
             else:
                 hessian[i, j] = hessian[j, i] = g
 
+        if self.output_dim == 1:
+            hessian = np.squeeze(hessian, axis=-1)
+
         return hessian
diff --git a/tests/interfaces/test_qnode_torch.py b/tests/interfaces/test_qnode_torch.py
@@ -21,6 +21,7 @@
 import pennylane as qml
 from pennylane import qnode, QNode
 from pennylane.tape import JacobianTape
+from torch.autograd.functional import hessian, jacobian
 
 
 @pytest.mark.parametrize(
@@ -527,6 +528,149 @@ def circuit():
         assert isinstance(res[0], torch.Tensor)
         assert isinstance(res[1], torch.Tensor)
 
+    def test_hessian(self, dev_name, diff_method, mocker, tol):
+        """Test hessian calculation of a scalar valued QNode"""
+        if diff_method not in {"parameter-shift", "backprop"}:
+            pytest.skip("Test only supports parameter-shift or backprop")
+
+        dev = qml.device(dev_name, wires=1)
+
+        @qnode(dev, diff_method=diff_method, interface="torch")
+        def circuit(x):
+            qml.RY(x[0], wires=0)
+            qml.RX(x[1], wires=0)
+            return qml.expval(qml.PauliZ(0))
+
+        x = torch.tensor([1.0, 2.0], requires_grad=True)
+        res = circuit(x)
+
+        res.backward()
+        g = x.grad
+
+        spy = mocker.spy(JacobianTape, "hessian")
+        hess = hessian(circuit, x)
+        spy.assert_called_once()
+
+        a, b = x.detach().numpy()
+
+        expected_res = np.cos(a) * np.cos(b)
+        assert np.allclose(res.detach(), expected_res, atol=tol, rtol=0)
+
+        expected_g = [-np.sin(a) * np.cos(b), -np.cos(a) * np.sin(b)]
+        assert np.allclose(g.detach(), expected_g, atol=tol, rtol=0)
+
+        expected_hess = [
+            [-np.cos(a) * np.cos(b), np.sin(a) * np.sin(b)],
+            [np.sin(a) * np.sin(b), -np.cos(a) * np.cos(b)]
+        ]
+        assert np.allclose(hess.detach(), expected_hess, atol=tol, rtol=0)
+
+    def test_hessian_vector_valued(self, dev_name, diff_method, mocker, tol):
+        """Test hessian calculation of a vector valued QNode"""
+        if diff_method not in {"parameter-shift", "backprop"}:
+            pytest.skip("Test only supports parameter-shift or backprop")
+
+        dev = qml.device(dev_name, wires=1)
+
+        @qnode(dev, diff_method=diff_method, interface="torch")
+        def circuit(x):
+            qml.RY(x[0], wires=0)
+            qml.RX(x[1], wires=0)
+            return qml.probs(wires=0)
+
+        x = torch.tensor([1.0, 2.0], requires_grad=True)
+        res = circuit(x)
+        jac_fn = lambda x: jacobian(circuit, x, create_graph=True)
+
+        g = jac_fn(x)
+
+        spy = mocker.spy(JacobianTape, "hessian")
+        hess = jacobian(jac_fn, x)
+        spy.assert_called_once()
+
+        a, b = x.detach().numpy()
+
+        expected_res = [
+            0.5 + 0.5 * np.cos(a) * np.cos(b),
+            0.5 - 0.5 * np.cos(a) * np.cos(b)
+        ]
+        assert np.allclose(res.detach(), expected_res, atol=tol, rtol=0)
+
+        expected_g = [
+            [-0.5 * np.sin(a) * np.cos(b), -0.5 * np.cos(a) * np.sin(b)],
+            [0.5 * np.sin(a) * np.cos(b), 0.5 * np.cos(a) * np.sin(b)]
+        ]
+        assert np.allclose(g.detach(), expected_g, atol=tol, rtol=0)
+
+        expected_hess = [
+            [
+                [-0.5 * np.cos(a) * np.cos(b), 0.5 * np.sin(a) * np.sin(b)],
+                [0.5 * np.sin(a) * np.sin(b), -0.5 * np.cos(a) * np.cos(b)]
+            ],
+            [
+                [0.5 * np.cos(a) * np.cos(b), -0.5 * np.sin(a) * np.sin(b)],
+                [-0.5 * np.sin(a) * np.sin(b), 0.5 * np.cos(a) * np.cos(b)]
+            ]
+        ]
+        assert np.allclose(hess.detach(), expected_hess, atol=tol, rtol=0)
+
+    def test_hessian_ragged(self, dev_name, diff_method, mocker, tol):
+        """Test hessian calculation of a ragged QNode"""
+        if diff_method not in {"parameter-shift", "backprop"}:
+            pytest.skip("Test only supports parameter-shift or backprop")
+
+        dev = qml.device(dev_name, wires=2)
+
+        @qnode(dev, diff_method=diff_method, interface="torch")
+        def circuit(x):
+            qml.RY(x[0], wires=0)
+            qml.RX(x[1], wires=0)
+            qml.RY(x[0], wires=1)
+            qml.RX(x[1], wires=1)
+            return qml.expval(qml.PauliZ(0)), qml.probs(wires=1)
+
+        x = torch.tensor([1.0, 2.0], requires_grad=True)
+        res = circuit(x)
+        jac_fn = lambda x: jacobian(circuit, x, create_graph=True)
+
+        g = jac_fn(x)
+
+        spy = mocker.spy(JacobianTape, "hessian")
+        hess = jacobian(jac_fn, x)
+        spy.assert_called_once()
+
+        a, b = x.detach().numpy()
+
+        expected_res = [
+            np.cos(a) * np.cos(b),
+            0.5 + 0.5 * np.cos(a) * np.cos(b),
+            0.5 - 0.5 * np.cos(a) * np.cos(b)
+        ]
+        assert np.allclose(res.detach(), expected_res, atol=tol, rtol=0)
+
+        expected_g = [
+            [-np.sin(a) * np.cos(b), -np.cos(a) * np.sin(b)],
+            [-0.5 * np.sin(a) * np.cos(b), -0.5 * np.cos(a) * np.sin(b)],
+            [0.5 * np.sin(a) * np.cos(b), 0.5 * np.cos(a) * np.sin(b)]
+        ]
+        assert np.allclose(g.detach(), expected_g, atol=tol, rtol=0)
+
+        expected_hess = [
+            [
+                [-np.cos(a) * np.cos(b), np.sin(a) * np.sin(b)],
+                [np.sin(a) * np.sin(b), -np.cos(a) * np.cos(b)]
+            ],
+            [
+                [-0.5 * np.cos(a) * np.cos(b), 0.5 * np.sin(a) * np.sin(b)],
+                [0.5 * np.sin(a) * np.sin(b), -0.5 * np.cos(a) * np.cos(b)]
+            ],
+            [
+                [0.5 * np.cos(a) * np.cos(b), -0.5 * np.sin(a) * np.sin(b)],
+                [-0.5 * np.sin(a) * np.sin(b), 0.5 * np.cos(a) * np.cos(b)]
+            ]
+        ]
+        assert np.allclose(hess.detach(), expected_hess, atol=tol, rtol=0)
+
 
 def qtransform(qnode, a, framework=torch):
     """Transforms every RY(y) gate in a circuit to RX(-a*cos(y))"""