Add support for the parameter-shift hessian to the TF interface (#1110)

* prototyping for hessian computation in tensorflow interface * add changelog * more changes * more stuff * qnode test * Add support for computing the Hessian for vector-valued QNodes (#1122) * fix * fix * add additional test * fix * fix * Fix interface * fix * fix test * fix * fix * Update .github/CHANGELOG.md * Update pennylane/interfaces/tf.py * Update pennylane/interfaces/tf.py Co-authored-by: Theodor <theodor@xanadu.ai> * suggested changes * minor updates * comment updates * comment indentation * minor fix * Update tests/tape/test_jacobian_tape.py * add test for vector function hessian tape * fix test Co-authored-by: Josh Izaac <josh146@gmail.com> Co-authored-by: Theodor <theodor@xanadu.ai>
PennyLaneAI · Mar 26, 2021 · 2069021 · 2069021
1 parent 6e22257
commit 2069021
Show file tree

Hide file tree

Showing 5 changed files with 338 additions and 18 deletions.
diff --git a/.github/CHANGELOG.md b/.github/CHANGELOG.md
@@ -2,6 +2,35 @@
 
 <h3>New features since last release</h3>
 
+- The TensorFlow interface now supports computing second derivatives and Hessians of hybrid quantum models.
+  Second derivatives are supported on both hardware and simulators.
+  [(#1110)](https://github.com/PennyLaneAI/pennylane/pull/1110) 
+
+  ```python
+  dev = qml.device('default.qubit', wires=1)
+  @qml.qnode(dev, interface='tf', diff_method='parameter-shift')
+  def circuit(x):
+      qml.RX(x[0], wires=0)
+      qml.RY(x[1], wires=0)
+      return qml.expval(qml.PauliZ(0))
+
+  x = tf.Variable([0.1, 0.2], dtype=tf.float64)
+
+  with tf.GradientTape() as tape1:
+      with tf.GradientTape() as tape2:
+          y = circuit(x)
+      grad = tape2.gradient(res, x)
+
+  hessian = tape1.jacobian(grad, x)
+  ```
+
+  To compute just the diagonal of the Hessian, the gradient of the
+  first derivatives can be taken:
+
+  ```python
+  hessian_diagonals = tape1.gradient(grad, x)
+  ```
+
 * Adds a new transform `qml.ctrl` that adds control wires to subroutines.
   [(#1157)](https://github.com/PennyLaneAI/pennylane/pull/1157)
 

diff --git a/pennylane/interfaces/tf.py b/pennylane/interfaces/tf.py
@@ -138,34 +138,88 @@ def _execute(self, params, **input_kwargs):
         res = self.execute_device(args, input_kwargs["device"])
         self.set_parameters(all_params, trainable_only=False)
 
-        def grad(grad_output, **tfkwargs):
-            variables = tfkwargs.get("variables", None)
+        # The following dictionary caches the Jacobian and Hessian matrices,
+        # so that they can be re-used for different vjp/vhp computations
+        # within the same backpropagation call.
+        # This dictionary is tied to an instance of the inner function jacobian_product
+        # called within tf_tape.gradient or tf_tape.jacobian,
+        # via closure. Once tf_tape.gradient/ jacobian has returned, the jacobian_product instance
+        # will no longer be in scope and the memory will be freed.
+        saved_grad_matrices = {}
+
+        def _evaluate_grad_matrix(grad_matrix_fn):
+            """Convenience function for generating gradient matrices
+            for the given parameter values.
+
+            This function serves two purposes:
+
+            * Avoids duplicating logic surrounding parameter unwrapping/wrapping
+
+            * Takes advantage of closure, to cache computed gradient matrices via
+              the ``saved_grad_matrices`` dictionary, to avoid gradient matrices being
+              computed multiple redundant times.
+
+              This is particularly useful when differentiating vector-valued QNodes.
+              Because tensorflow requests the vector-grad matrix product,
+              and *not* the full grad matrix, differentiating vector-valued
+              functions will result in multiple backward passes.
+
+            Args:
+                grad_matrix_fn (str): Name of the gradient matrix function. Should correspond to an existing
+                    tape method. Currently allowed values include ``"jacobian"`` and ``"hessian"``.
+
+            Returns:
+                array[float]: the gradient matrix
+            """
+            if grad_matrix_fn in saved_grad_matrices:
+                return saved_grad_matrices[grad_matrix_fn]
 
             self.set_parameters(all_params_unwrapped, trainable_only=False)
-            jacobian = self.jacobian(input_kwargs["device"], params=args, **self.jacobian_options)
+            grad_matrix = getattr(self, grad_matrix_fn)(
+                input_kwargs["device"], params=args, **self.jacobian_options
+            )
             self.set_parameters(all_params, trainable_only=False)
 
-            jacobian = tf.constant(jacobian, dtype=self.dtype)
+            grad_matrix = tf.constant(grad_matrix, dtype=self.dtype)
+            saved_grad_matrices[grad_matrix_fn] = grad_matrix
+
+            return grad_matrix
+
+        def jacobian_product(dy, **tfkwargs):
+            variables = tfkwargs.get("variables", None)
+            dy_row = tf.reshape(dy, [1, -1])
+
+            @tf.custom_gradient
+            def jacobian(p):
+                def hessian_product(ddy, **tfkwargs):
+                    variables = tfkwargs.get("variables", None)
+                    hessian = _evaluate_grad_matrix("hessian")
+
+                    if self.output_dim == 1:
+                        hessian = tf.expand_dims(hessian, -1)
 
-            # Reshape gradient output array as a 2D row-vector.
-            grad_output_row = tf.reshape(grad_output, [1, -1])
+                    vhp = tf.cond(
+                        tf.rank(hessian) > 2,
+                        lambda: dy_row @ ddy @ hessian @ tf.transpose(dy_row),
+                        lambda: ddy @ hessian,
+                    )
 
-            # Calculate the vector-Jacobian matrix product, and unstack the output.
-            grad_input = tf.matmul(grad_output_row, jacobian)
-            grad_input = tf.unstack(tf.reshape(grad_input, [-1]))
+                    vhp = tf.unstack(tf.reshape(vhp, [-1]))
+                    return (vhp, variables) if variables is not None else vhp
 
-            if variables is not None:
-                return grad_input, variables
+                return _evaluate_grad_matrix("jacobian"), hessian_product
 
-            return grad_input
+            vjp = tf.matmul(dy_row, jacobian(params))
+            vjp = tf.unstack(tf.reshape(vjp, [-1]))
+            return (vjp, variables) if variables is not None else vjp
 
         if self.is_sampled:
-            return res, grad
+            return res, jacobian_product
 
         if res.dtype == np.dtype("object"):
             res = np.hstack(res)
 
-        return tf.convert_to_tensor(res, dtype=self.dtype), grad
+        return tf.convert_to_tensor(res, dtype=self.dtype), jacobian_product
 
     @classmethod
     def apply(cls, tape, dtype=tf.float64):

diff --git a/pennylane/tape/jacobian_tape.py b/pennylane/tape/jacobian_tape.py
@@ -489,7 +489,7 @@ def jacobian(self, device, params=None, **options):
         >>> tape.jacobian(dev)
         array([], shape=(4, 0), dtype=float64)
         """
-        if any([m.return_type is State for m in self.measurements]):
+        if any(m.return_type is State for m in self.measurements):
             raise ValueError("The jacobian method does not support circuits that return the state")
 
         if self.is_sampled:
@@ -663,7 +663,7 @@ def hessian(self, device, params=None, **options):
         >>> tape.hessian(dev)
         array([], shape=(0, 0), dtype=float64)
         """
-        if any([m.return_type is State for m in self.measurements]):
+        if any(m.return_type is State for m in self.measurements):
             raise ValueError("The Hessian method does not support circuits that return the state")
 
         method = options.get("method", "analytic")
@@ -730,11 +730,19 @@ def hessian(self, device, params=None, **options):
 
             if hessian is None:
                 # create the Hessian matrix
-                hessian = np.zeros((len(params), len(params)), dtype=float)
+                if self.output_dim is not None:
+                    hessian = np.zeros(
+                        (len(params), len(params), np.prod(self.output_dim)), dtype=float
+                    )
+                else:
+                    hessian = np.zeros((len(params), len(params)), dtype=float)
 
             if i == j:
                 hessian[i, i] = g
             else:
                 hessian[i, j] = hessian[j, i] = g
 
+        if self.output_dim == 1:
+            hessian = np.squeeze(hessian, axis=-1)
+
         return hessian
diff --git a/tests/interfaces/test_qnode_tf.py b/tests/interfaces/test_qnode_tf.py
@@ -32,7 +32,7 @@
     ],
 )
 class TestQNode:
-    """Same tests as above, but this time via the QNode interface!"""
+    """Tests the tensorflow interface used with a QNode."""
 
     def test_execution_no_interface(self, dev_name, diff_method):
         """Test execution works without an interface, and that trainable parameters
@@ -525,6 +525,208 @@ def circuit():
         assert res.shape == (2, 10)
         assert isinstance(res, tf.Tensor)
 
+    def test_second_derivative(self, dev_name, diff_method, mocker, tol):
+        """Test second derivative calculation of a scalar valued QNode"""
+        if diff_method not in {"parameter-shift", "backprop"}:
+            pytest.skip("Test only supports parameter-shift or backprop")
+
+        dev = qml.device(dev_name, wires=1)
+
+        @qnode(dev, diff_method=diff_method, interface="tf")
+        def circuit(x):
+            qml.RY(x[0], wires=0)
+            qml.RX(x[1], wires=0)
+            return qml.expval(qml.PauliZ(0))
+
+        x = tf.Variable([1.0, 2.0], dtype=tf.float64)
+
+        with tf.GradientTape() as tape1:
+            with tf.GradientTape() as tape2:
+                res = circuit(x)
+            g = tape2.gradient(res, x)
+            res2 = tf.reduce_sum(g)
+
+        spy = mocker.spy(JacobianTape, "hessian")
+        g2 = tape1.gradient(res2, x)
+
+        if diff_method == "parameter-shift":
+            spy.assert_called_once()
+        elif diff_method == "backprop":
+            spy.assert_not_called()
+
+        a, b = x * 1.0
+
+        expected_res = tf.cos(a) * tf.cos(b)
+        assert np.allclose(res, expected_res, atol=tol, rtol=0)
+
+        expected_g = [-tf.sin(a) * tf.cos(b), -tf.cos(a) * tf.sin(b)]
+        assert np.allclose(g, expected_g, atol=tol, rtol=0)
+
+        expected_g2 = [-tf.cos(a) * tf.cos(b) + tf.sin(a) * tf.sin(b), tf.sin(a) * tf.sin(b) - tf.cos(a) * tf.cos(b)]
+        assert np.allclose(g2, expected_g2, atol=tol, rtol=0)
+
+    def test_hessian(self, dev_name, diff_method, mocker, tol):
+        """Test hessian calculation of a scalar valued QNode"""
+        if diff_method not in {"parameter-shift", "backprop"}:
+            pytest.skip("Test only supports parameter-shift or backprop")
+
+        dev = qml.device(dev_name, wires=1)
+
+        @qnode(dev, diff_method=diff_method, interface="tf")
+        def circuit(x):
+            qml.RY(x[0], wires=0)
+            qml.RX(x[1], wires=0)
+            return qml.expval(qml.PauliZ(0))
+
+        x = tf.Variable([1.0, 2.0], dtype=tf.float64)
+
+        with tf.GradientTape() as tape1:
+            with tf.GradientTape() as tape2:
+                res = circuit(x)
+            g = tape2.gradient(res, x)
+
+        spy = mocker.spy(JacobianTape, "hessian")
+        hess = tape1.jacobian(g, x)
+
+        if diff_method == "parameter-shift":
+            spy.assert_called_once()
+        elif diff_method == "backprop":
+            spy.assert_not_called()
+
+        a, b = x * 1.0
+
+        expected_res = tf.cos(a) * tf.cos(b)
+        assert np.allclose(res, expected_res, atol=tol, rtol=0)
+
+        expected_g = [-tf.sin(a) * tf.cos(b), -tf.cos(a) * tf.sin(b)]
+        assert np.allclose(g, expected_g, atol=tol, rtol=0)
+
+        expected_hess = [
+            [-tf.cos(a) * tf.cos(b), tf.sin(a) * tf.sin(b)],
+            [tf.sin(a) * tf.sin(b), -tf.cos(a) * tf.cos(b)]
+        ]
+        assert np.allclose(hess, expected_hess, atol=tol, rtol=0)
+
+    def test_hessian_vector_valued(self, dev_name, diff_method, mocker, tol):
+        """Test hessian calculation of a vector valued QNode"""
+        if diff_method not in {"parameter-shift", "backprop"}:
+            pytest.skip("Test only supports parameter-shift or backprop")
+
+        dev = qml.device(dev_name, wires=1)
+
+        @qnode(dev, diff_method=diff_method, interface="tf")
+        def circuit(x):
+            qml.RY(x[0], wires=0)
+            qml.RX(x[1], wires=0)
+            return qml.probs(wires=0)
+
+        x = tf.Variable([1.0, 2.0], dtype=tf.float64)
+
+        with tf.GradientTape(persistent=True) as tape1:
+            with tf.GradientTape(persistent=True) as tape2:
+                res = circuit(x)
+
+            spy = mocker.spy(JacobianTape, "hessian")
+            g = tape2.jacobian(res, x, experimental_use_pfor=False)
+
+        hess = tape1.jacobian(g, x, experimental_use_pfor=False)
+
+        if diff_method == "parameter-shift":
+            spy.assert_called_once()
+        elif diff_method == "backprop":
+            spy.assert_not_called()
+
+        a, b = x * 1.0
+
+        expected_res = [
+            0.5 + 0.5 * tf.cos(a) * tf.cos(b),
+            0.5 - 0.5 * tf.cos(a) * tf.cos(b)
+        ]
+        assert np.allclose(res, expected_res, atol=tol, rtol=0)
+
+        expected_g = [
+            [-0.5 * tf.sin(a) * tf.cos(b), -0.5 * tf.cos(a) * tf.sin(b)],
+            [0.5 * tf.sin(a) * tf.cos(b), 0.5 * tf.cos(a) * tf.sin(b)]
+        ]
+        assert np.allclose(g, expected_g, atol=tol, rtol=0)
+
+        expected_hess = [
+            [
+                [-0.5 * tf.cos(a) * tf.cos(b), 0.5 * tf.sin(a) * tf.sin(b)],
+                [0.5 * tf.sin(a) * tf.sin(b), -0.5 * tf.cos(a) * tf.cos(b)]
+            ],
+            [
+                [0.5 * tf.cos(a) * tf.cos(b), -0.5 * tf.sin(a) * tf.sin(b)],
+                [-0.5 * tf.sin(a) * tf.sin(b), 0.5 * tf.cos(a) * tf.cos(b)]
+            ]
+        ]
+
+        np.testing.assert_allclose(hess, expected_hess, atol=tol, rtol=0, verbose=True)
+
+    def test_hessian_ragged(self, dev_name, diff_method, mocker, tol):
+        """Test hessian calculation of a ragged QNode"""
+        if diff_method not in {"parameter-shift", "backprop"}:
+            pytest.skip("Test only supports parameter-shift or backprop")
+
+        dev = qml.device(dev_name, wires=2)
+
+        @qnode(dev, diff_method=diff_method, interface="tf")
+        def circuit(x):
+            qml.RY(x[0], wires=0)
+            qml.RX(x[1], wires=0)
+            qml.RY(x[0], wires=1)
+            qml.RX(x[1], wires=1)
+            return qml.expval(qml.PauliZ(0)), qml.probs(wires=1)
+
+        x = tf.Variable([1.0, 2.0], dtype=tf.float64)
+        res = circuit(x)
+
+        with tf.GradientTape(persistent=True) as tape1:
+            with tf.GradientTape(persistent=True) as tape2:
+                res = circuit(x)
+
+            spy = mocker.spy(JacobianTape, "hessian")
+            g = tape2.jacobian(res, x, experimental_use_pfor=False)
+
+        hess = tape1.jacobian(g, x, experimental_use_pfor=False)
+
+        if diff_method == "parameter-shift":
+            spy.assert_called_once()
+        elif diff_method == "backprop":
+            spy.assert_not_called()
+
+        a, b = x * 1.0
+
+        expected_res = [
+            tf.cos(a) * tf.cos(b),
+            0.5 + 0.5 * tf.cos(a) * tf.cos(b),
+            0.5 - 0.5 * tf.cos(a) * tf.cos(b)
+        ]
+        assert np.allclose(res, expected_res, atol=tol, rtol=0)
+
+        expected_g = [
+            [-tf.sin(a) * tf.cos(b), -tf.cos(a) * tf.sin(b)],
+            [-0.5 * tf.sin(a) * tf.cos(b), -0.5 * tf.cos(a) * tf.sin(b)],
+            [0.5 * tf.sin(a) * tf.cos(b), 0.5 * tf.cos(a) * tf.sin(b)]
+        ]
+        assert np.allclose(g, expected_g, atol=tol, rtol=0)
+
+        expected_hess = [
+            [
+                [-tf.cos(a) * tf.cos(b), tf.sin(a) * tf.sin(b)],
+                [tf.sin(a) * tf.sin(b), -tf.cos(a) * tf.cos(b)]
+            ],
+            [
+                [-0.5 * tf.cos(a) * tf.cos(b), 0.5 * tf.sin(a) * tf.sin(b)],
+                [0.5 * tf.sin(a) * tf.sin(b), -0.5 * tf.cos(a) * tf.cos(b)]
+            ],
+            [
+                [0.5 * tf.cos(a) * tf.cos(b), -0.5 * tf.sin(a) * tf.sin(b)],
+                [-0.5 * tf.sin(a) * tf.sin(b), 0.5 * tf.cos(a) * tf.cos(b)]
+            ]
+        ]
+        np.testing.assert_allclose(hess, expected_hess, atol=tol, rtol=0, verbose=True)
+
 
 def qtransform(qnode, a, framework=tf):
     """Transforms every RY(y) gate in a circuit to RX(-a*cos(y))"""