PennyLaneAI · josh146 · Mar 26, 2021 · Mar 9, 2021 · Mar 9, 2021 · Mar 9, 2021
diff --git a/.github/CHANGELOG.md b/.github/CHANGELOG.md
@@ -2,6 +2,32 @@
 
 <h3>New features since last release</h3>
 
+* Computing second derivatives and Hessians of QNodes is now supported when
+  using the Autograd interface.
+  [(#1130)](https://github.com/PennyLaneAI/pennylane/pull/1130)
+
+  Hessians are computed using the parameter-shift rule, and can be
+  evaluated on both hardware and simulator devices.
+
+  ```python
+  dev = qml.device('default.qubit', wires=1)
+
+  @qml.qnode(dev, diff_method="parameter-shift")
+  def circuit(p):
+      qml.RY(p[0], wires=0)
+      qml.RX(p[1], wires=0)
+      return qml.expval(qml.PauliZ(0))
+
+  x = np.array([1.0, 2.0], requires_grad=True)
+  ```
+
+  ```python
+  >>> hessian_fn = qml.jacobian(qml.grad(circuit))
+  >>> hessian_fn(x)
+  [[0.2248451 0.7651474]
+   [0.7651474 0.2248451]]
+  ```
+
 * Computing second derivatives and Hessians of QNodes is now supported when
   using the PyTorch interface.
   [(#1129)](https://github.com/PennyLaneAI/pennylane/pull/1129/files)

diff --git a/pennylane/_grad.py b/pennylane/_grad.py
@@ -14,8 +14,10 @@
 """
 This module contains the autograd wrappers :class:`grad` and :func:`jacobian`
 """
+import numpy as onp
+
+from pennylane import numpy as np
 from functools import partial
-import numpy as _np
 
 from autograd.core import make_vjp as _make_vjp
 from autograd.wrap_util import unary_to_nary
@@ -153,7 +155,7 @@ def jacobian(func, argnum=None):
         if isinstance(argnum, int):
             return _jacobian(func, argnum)
 
-        return lambda *args, **kwargs: _np.stack(
+        return lambda *args, **kwargs: np.stack(
             [_jacobian(func, arg)(*args, **kwargs) for arg in argnum]
         ).T
 
@@ -179,7 +181,7 @@ def _jacobian_function(*args, **kwargs):
         if len(argnum) == 1:
             return _jacobian(func, argnum[0])(*args, **kwargs)
 
-        return _np.stack([_jacobian(func, arg)(*args, **kwargs) for arg in argnum]).T
+        return np.stack([_jacobian(func, arg)(*args, **kwargs) for arg in argnum]).T
 
     return _jacobian_function
 
@@ -209,8 +211,8 @@ def _fd_first_order_centered(f, argnum, delta, *args, idx=None, **kwargs):
             "The value of 'argnum' has to be between 0 and {}; got {}".format(len(args) - 1, argnum)
         )
 
-    x = _np.array(args[argnum])
-    gradient = _np.zeros_like(x, dtype="O")
+    x = onp.array(args[argnum])
+    gradient = onp.zeros_like(x, dtype="O")
 
     if x.ndim == 0 and idx is not None:
         raise ValueError(
@@ -219,10 +221,10 @@ def _fd_first_order_centered(f, argnum, delta, *args, idx=None, **kwargs):
         )
 
     if idx is None:
-        idx = list(_np.ndindex(*x.shape))
+        idx = list(onp.ndindex(*x.shape))
 
     for i in idx:
-        shift = _np.zeros_like(x)
+        shift = onp.zeros_like(x)
         shift[i] += 0.5 * delta
         gradient[i] = (
             f(*args[:argnum], x + shift, *args[argnum + 1 :], **kwargs)
@@ -257,7 +259,7 @@ def _fd_second_order_centered(f, argnum, delta, *args, idx=None, **kwargs):
             "The value of 'argnum' has to be between 0 and {}; got {}".format(len(args) - 1, argnum)
         )
 
-    x = _np.array(args[argnum])
+    x = onp.array(args[argnum])
 
     if x.ndim == 0 and idx is not None:
         raise ValueError(
@@ -284,7 +286,7 @@ def _fd_second_order_centered(f, argnum, delta, *args, idx=None, **kwargs):
 
     # diagonal
     if i == j:
-        shift = _np.zeros_like(x)
+        shift = onp.zeros_like(x)
         shift[i] += delta
         deriv2 = (
             f(*args[:argnum], x + shift, *args[argnum + 1 :], **kwargs)
@@ -294,10 +296,10 @@ def _fd_second_order_centered(f, argnum, delta, *args, idx=None, **kwargs):
 
     # off-diagonal
     if i != j:
-        shift_i = _np.zeros_like(x)
+        shift_i = onp.zeros_like(x)
         shift_i[i] += 0.5 * delta
 
-        shift_j = _np.zeros_like(x)
+        shift_j = onp.zeros_like(x)
         shift_j[j] += 0.5 * delta
 
         deriv2 = (

diff --git a/pennylane/interfaces/autograd.py b/pennylane/interfaces/autograd.py
@@ -195,18 +195,82 @@ def vjp(ans, self, params, device):  # pylint: disable=unused-argument
             gradient output vector, and computes the vector-Jacobian product
         """
 
-        def gradient_product(g):
-            # In autograd, the forward pass is always performed prior to the backwards
-            # pass, so we do not need to re-unwrap the parameters.
+        # The following dictionary caches the Jacobian and Hessian matrices,
+        # so that they can be re-used for different vjp/vhp computations
+        # within the same backpropagation call.
+        # This dictionary will exist in memory when autograd.grad is called,
+        # via closure. Once autograd.grad has returned, this dictionary
+        # will no longer be in scope and the memory will be freed.
+        saved_grad_matrices = {}
+
+        def _evaluate_grad_matrix(p, grad_matrix_fn):
+            """Convenience function for generating gradient matrices
+            for the given parameter values.
+
+            This function serves two purposes:
+
+            * Avoids duplicating logic surrounding parameter unwrapping/wrapping.
+
+            * Takes advantage of closure, to cache computed gradient matrices via
+              the ``saved_grad_matrices`` attribute, to avoid gradient matrices being
+              computed multiple redundant times.
+
+              This is particularly useful when differentiating vector-valued QNodes.
+              Because Autograd requests the vector-grad matrix product,
+              and *not* the full grad matrix, differentiating vector-valued
+              functions will result in multiple backward passes.
+
+            Args:
+                p (Sequence): quantum tape parameter values use to evaluate the gradient matrix
+                grad_matrix_fn (str): Name of the gradient matrix function. Should correspond to an existing
+                    tape method. Currently allowed values include ``"jacobian"`` and ``"hessian"``.
+
+                Returns:
+                    array[float]: the gradient matrix
+            """
+            if grad_matrix_fn in saved_grad_matrices:
+                return saved_grad_matrices[grad_matrix_fn]
+
             self.set_parameters(self._all_params_unwrapped, trainable_only=False)
-            jac = self.jacobian(device, params=params, **self.jacobian_options)
+            grad_matrix = getattr(self, grad_matrix_fn)(device, params=p, **self.jacobian_options)
             self.set_parameters(self._all_parameter_values, trainable_only=False)
 
-            # only flatten g if all parameters are single values
+            saved_grad_matrices[grad_matrix_fn] = grad_matrix
+            return grad_matrix
+
+        def gradient_product(dy):
+            """Returns the vector-Jacobian product with given
+            parameter values p and output gradient dy"""
+
             if all(np.ndim(p) == 0 for p in params):
-                vjp = g.flatten() @ jac
-            else:
-                vjp = g @ jac
+                # only flatten dy if all parameters are single values
+                dy = dy.flatten()
+
+            @autograd.extend.primitive
+            def jacobian(p):
+                """Returns the Jacobian for parameters p"""
+                return _evaluate_grad_matrix(p, "jacobian")
+
+            def vhp(ans, p):
+                def hessian_product(ddy):
+                    """Returns the vector-Hessian product with given
+                    parameter values p, output gradient dy, and output
+                    second-order gradient ddy"""
+                    hessian = _evaluate_grad_matrix(p, "hessian")
+
+                    if dy.size > 1:
+                        vhp = dy @ ddy @ hessian @ dy.T
+                    else:
+                        vhp = np.squeeze(ddy @ hessian)
+
+                    return vhp
+
+                return hessian_product
+
+            # register vhp as the backward method of the jacobian function
+            autograd.extend.defvjp(jacobian, vhp, argnums=[0])
+
+            vjp = dy @ jacobian(params)
             return vjp
 
         return gradient_product

diff --git a/pennylane/tape/jacobian_tape.py b/pennylane/tape/jacobian_tape.py
@@ -91,6 +91,7 @@ class JacobianTape(QuantumTape):
     def __init__(self, name=None, do_queue=True):
         super().__init__(name=name, do_queue=do_queue)
         self.jacobian_options = {}
+        self.hessian_options = {}
 
     def _grad_method(self, idx, use_graph=True, default_method="F"):
         """Determine the correct partial derivative computation method for each gate parameter.