Skip to content

Commit

Permalink
Add support for the parameter-shift hessian to the TF interface (#1110)
Browse files Browse the repository at this point in the history
* prototyping for hessian computation in tensorflow interface

* add changelog

* more changes

* more stuff

* qnode test

* Add support for computing the Hessian for vector-valued QNodes (#1122)

* fix

* fix

* add additional test

* fix

* fix

* Fix interface

* fix

* fix test

* fix

* fix

* Update .github/CHANGELOG.md

* Update pennylane/interfaces/tf.py

* Update pennylane/interfaces/tf.py

Co-authored-by: Theodor <theodor@xanadu.ai>

* suggested changes

* minor updates

* comment updates

* comment indentation

* minor fix

* Update tests/tape/test_jacobian_tape.py

* add test for vector function hessian tape

* fix test

Co-authored-by: Josh Izaac <josh146@gmail.com>
Co-authored-by: Theodor <theodor@xanadu.ai>
  • Loading branch information
3 people authored Mar 26, 2021
1 parent 6e22257 commit 2069021
Show file tree
Hide file tree
Showing 5 changed files with 338 additions and 18 deletions.
29 changes: 29 additions & 0 deletions .github/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,35 @@

<h3>New features since last release</h3>

- The TensorFlow interface now supports computing second derivatives and Hessians of hybrid quantum models.
Second derivatives are supported on both hardware and simulators.
[(#1110)](https://github.com/PennyLaneAI/pennylane/pull/1110)

```python
dev = qml.device('default.qubit', wires=1)
@qml.qnode(dev, interface='tf', diff_method='parameter-shift')
def circuit(x):
qml.RX(x[0], wires=0)
qml.RY(x[1], wires=0)
return qml.expval(qml.PauliZ(0))

x = tf.Variable([0.1, 0.2], dtype=tf.float64)

with tf.GradientTape() as tape1:
with tf.GradientTape() as tape2:
y = circuit(x)
grad = tape2.gradient(res, x)

hessian = tape1.jacobian(grad, x)
```

To compute just the diagonal of the Hessian, the gradient of the
first derivatives can be taken:

```python
hessian_diagonals = tape1.gradient(grad, x)
```

* Adds a new transform `qml.ctrl` that adds control wires to subroutines.
[(#1157)](https://github.com/PennyLaneAI/pennylane/pull/1157)

Expand Down
82 changes: 68 additions & 14 deletions pennylane/interfaces/tf.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,34 +138,88 @@ def _execute(self, params, **input_kwargs):
res = self.execute_device(args, input_kwargs["device"])
self.set_parameters(all_params, trainable_only=False)

def grad(grad_output, **tfkwargs):
variables = tfkwargs.get("variables", None)
# The following dictionary caches the Jacobian and Hessian matrices,
# so that they can be re-used for different vjp/vhp computations
# within the same backpropagation call.
# This dictionary is tied to an instance of the inner function jacobian_product
# called within tf_tape.gradient or tf_tape.jacobian,
# via closure. Once tf_tape.gradient/ jacobian has returned, the jacobian_product instance
# will no longer be in scope and the memory will be freed.
saved_grad_matrices = {}

def _evaluate_grad_matrix(grad_matrix_fn):
"""Convenience function for generating gradient matrices
for the given parameter values.
This function serves two purposes:
* Avoids duplicating logic surrounding parameter unwrapping/wrapping
* Takes advantage of closure, to cache computed gradient matrices via
the ``saved_grad_matrices`` dictionary, to avoid gradient matrices being
computed multiple redundant times.
This is particularly useful when differentiating vector-valued QNodes.
Because tensorflow requests the vector-grad matrix product,
and *not* the full grad matrix, differentiating vector-valued
functions will result in multiple backward passes.
Args:
grad_matrix_fn (str): Name of the gradient matrix function. Should correspond to an existing
tape method. Currently allowed values include ``"jacobian"`` and ``"hessian"``.
Returns:
array[float]: the gradient matrix
"""
if grad_matrix_fn in saved_grad_matrices:
return saved_grad_matrices[grad_matrix_fn]

self.set_parameters(all_params_unwrapped, trainable_only=False)
jacobian = self.jacobian(input_kwargs["device"], params=args, **self.jacobian_options)
grad_matrix = getattr(self, grad_matrix_fn)(
input_kwargs["device"], params=args, **self.jacobian_options
)
self.set_parameters(all_params, trainable_only=False)

jacobian = tf.constant(jacobian, dtype=self.dtype)
grad_matrix = tf.constant(grad_matrix, dtype=self.dtype)
saved_grad_matrices[grad_matrix_fn] = grad_matrix

return grad_matrix

def jacobian_product(dy, **tfkwargs):
variables = tfkwargs.get("variables", None)
dy_row = tf.reshape(dy, [1, -1])

@tf.custom_gradient
def jacobian(p):
def hessian_product(ddy, **tfkwargs):
variables = tfkwargs.get("variables", None)
hessian = _evaluate_grad_matrix("hessian")

if self.output_dim == 1:
hessian = tf.expand_dims(hessian, -1)

# Reshape gradient output array as a 2D row-vector.
grad_output_row = tf.reshape(grad_output, [1, -1])
vhp = tf.cond(
tf.rank(hessian) > 2,
lambda: dy_row @ ddy @ hessian @ tf.transpose(dy_row),
lambda: ddy @ hessian,
)

# Calculate the vector-Jacobian matrix product, and unstack the output.
grad_input = tf.matmul(grad_output_row, jacobian)
grad_input = tf.unstack(tf.reshape(grad_input, [-1]))
vhp = tf.unstack(tf.reshape(vhp, [-1]))
return (vhp, variables) if variables is not None else vhp

if variables is not None:
return grad_input, variables
return _evaluate_grad_matrix("jacobian"), hessian_product

return grad_input
vjp = tf.matmul(dy_row, jacobian(params))
vjp = tf.unstack(tf.reshape(vjp, [-1]))
return (vjp, variables) if variables is not None else vjp

if self.is_sampled:
return res, grad
return res, jacobian_product

if res.dtype == np.dtype("object"):
res = np.hstack(res)

return tf.convert_to_tensor(res, dtype=self.dtype), grad
return tf.convert_to_tensor(res, dtype=self.dtype), jacobian_product

@classmethod
def apply(cls, tape, dtype=tf.float64):
Expand Down
14 changes: 11 additions & 3 deletions pennylane/tape/jacobian_tape.py
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,7 @@ def jacobian(self, device, params=None, **options):
>>> tape.jacobian(dev)
array([], shape=(4, 0), dtype=float64)
"""
if any([m.return_type is State for m in self.measurements]):
if any(m.return_type is State for m in self.measurements):
raise ValueError("The jacobian method does not support circuits that return the state")

if self.is_sampled:
Expand Down Expand Up @@ -663,7 +663,7 @@ def hessian(self, device, params=None, **options):
>>> tape.hessian(dev)
array([], shape=(0, 0), dtype=float64)
"""
if any([m.return_type is State for m in self.measurements]):
if any(m.return_type is State for m in self.measurements):
raise ValueError("The Hessian method does not support circuits that return the state")

method = options.get("method", "analytic")
Expand Down Expand Up @@ -730,11 +730,19 @@ def hessian(self, device, params=None, **options):

if hessian is None:
# create the Hessian matrix
hessian = np.zeros((len(params), len(params)), dtype=float)
if self.output_dim is not None:
hessian = np.zeros(
(len(params), len(params), np.prod(self.output_dim)), dtype=float
)
else:
hessian = np.zeros((len(params), len(params)), dtype=float)

if i == j:
hessian[i, i] = g
else:
hessian[i, j] = hessian[j, i] = g

if self.output_dim == 1:
hessian = np.squeeze(hessian, axis=-1)

return hessian
204 changes: 203 additions & 1 deletion tests/interfaces/test_qnode_tf.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
],
)
class TestQNode:
"""Same tests as above, but this time via the QNode interface!"""
"""Tests the tensorflow interface used with a QNode."""

def test_execution_no_interface(self, dev_name, diff_method):
"""Test execution works without an interface, and that trainable parameters
Expand Down Expand Up @@ -525,6 +525,208 @@ def circuit():
assert res.shape == (2, 10)
assert isinstance(res, tf.Tensor)

def test_second_derivative(self, dev_name, diff_method, mocker, tol):
"""Test second derivative calculation of a scalar valued QNode"""
if diff_method not in {"parameter-shift", "backprop"}:
pytest.skip("Test only supports parameter-shift or backprop")

dev = qml.device(dev_name, wires=1)

@qnode(dev, diff_method=diff_method, interface="tf")
def circuit(x):
qml.RY(x[0], wires=0)
qml.RX(x[1], wires=0)
return qml.expval(qml.PauliZ(0))

x = tf.Variable([1.0, 2.0], dtype=tf.float64)

with tf.GradientTape() as tape1:
with tf.GradientTape() as tape2:
res = circuit(x)
g = tape2.gradient(res, x)
res2 = tf.reduce_sum(g)

spy = mocker.spy(JacobianTape, "hessian")
g2 = tape1.gradient(res2, x)

if diff_method == "parameter-shift":
spy.assert_called_once()
elif diff_method == "backprop":
spy.assert_not_called()

a, b = x * 1.0

expected_res = tf.cos(a) * tf.cos(b)
assert np.allclose(res, expected_res, atol=tol, rtol=0)

expected_g = [-tf.sin(a) * tf.cos(b), -tf.cos(a) * tf.sin(b)]
assert np.allclose(g, expected_g, atol=tol, rtol=0)

expected_g2 = [-tf.cos(a) * tf.cos(b) + tf.sin(a) * tf.sin(b), tf.sin(a) * tf.sin(b) - tf.cos(a) * tf.cos(b)]
assert np.allclose(g2, expected_g2, atol=tol, rtol=0)

def test_hessian(self, dev_name, diff_method, mocker, tol):
"""Test hessian calculation of a scalar valued QNode"""
if diff_method not in {"parameter-shift", "backprop"}:
pytest.skip("Test only supports parameter-shift or backprop")

dev = qml.device(dev_name, wires=1)

@qnode(dev, diff_method=diff_method, interface="tf")
def circuit(x):
qml.RY(x[0], wires=0)
qml.RX(x[1], wires=0)
return qml.expval(qml.PauliZ(0))

x = tf.Variable([1.0, 2.0], dtype=tf.float64)

with tf.GradientTape() as tape1:
with tf.GradientTape() as tape2:
res = circuit(x)
g = tape2.gradient(res, x)

spy = mocker.spy(JacobianTape, "hessian")
hess = tape1.jacobian(g, x)

if diff_method == "parameter-shift":
spy.assert_called_once()
elif diff_method == "backprop":
spy.assert_not_called()

a, b = x * 1.0

expected_res = tf.cos(a) * tf.cos(b)
assert np.allclose(res, expected_res, atol=tol, rtol=0)

expected_g = [-tf.sin(a) * tf.cos(b), -tf.cos(a) * tf.sin(b)]
assert np.allclose(g, expected_g, atol=tol, rtol=0)

expected_hess = [
[-tf.cos(a) * tf.cos(b), tf.sin(a) * tf.sin(b)],
[tf.sin(a) * tf.sin(b), -tf.cos(a) * tf.cos(b)]
]
assert np.allclose(hess, expected_hess, atol=tol, rtol=0)

def test_hessian_vector_valued(self, dev_name, diff_method, mocker, tol):
"""Test hessian calculation of a vector valued QNode"""
if diff_method not in {"parameter-shift", "backprop"}:
pytest.skip("Test only supports parameter-shift or backprop")

dev = qml.device(dev_name, wires=1)

@qnode(dev, diff_method=diff_method, interface="tf")
def circuit(x):
qml.RY(x[0], wires=0)
qml.RX(x[1], wires=0)
return qml.probs(wires=0)

x = tf.Variable([1.0, 2.0], dtype=tf.float64)

with tf.GradientTape(persistent=True) as tape1:
with tf.GradientTape(persistent=True) as tape2:
res = circuit(x)

spy = mocker.spy(JacobianTape, "hessian")
g = tape2.jacobian(res, x, experimental_use_pfor=False)

hess = tape1.jacobian(g, x, experimental_use_pfor=False)

if diff_method == "parameter-shift":
spy.assert_called_once()
elif diff_method == "backprop":
spy.assert_not_called()

a, b = x * 1.0

expected_res = [
0.5 + 0.5 * tf.cos(a) * tf.cos(b),
0.5 - 0.5 * tf.cos(a) * tf.cos(b)
]
assert np.allclose(res, expected_res, atol=tol, rtol=0)

expected_g = [
[-0.5 * tf.sin(a) * tf.cos(b), -0.5 * tf.cos(a) * tf.sin(b)],
[0.5 * tf.sin(a) * tf.cos(b), 0.5 * tf.cos(a) * tf.sin(b)]
]
assert np.allclose(g, expected_g, atol=tol, rtol=0)

expected_hess = [
[
[-0.5 * tf.cos(a) * tf.cos(b), 0.5 * tf.sin(a) * tf.sin(b)],
[0.5 * tf.sin(a) * tf.sin(b), -0.5 * tf.cos(a) * tf.cos(b)]
],
[
[0.5 * tf.cos(a) * tf.cos(b), -0.5 * tf.sin(a) * tf.sin(b)],
[-0.5 * tf.sin(a) * tf.sin(b), 0.5 * tf.cos(a) * tf.cos(b)]
]
]

np.testing.assert_allclose(hess, expected_hess, atol=tol, rtol=0, verbose=True)

def test_hessian_ragged(self, dev_name, diff_method, mocker, tol):
"""Test hessian calculation of a ragged QNode"""
if diff_method not in {"parameter-shift", "backprop"}:
pytest.skip("Test only supports parameter-shift or backprop")

dev = qml.device(dev_name, wires=2)

@qnode(dev, diff_method=diff_method, interface="tf")
def circuit(x):
qml.RY(x[0], wires=0)
qml.RX(x[1], wires=0)
qml.RY(x[0], wires=1)
qml.RX(x[1], wires=1)
return qml.expval(qml.PauliZ(0)), qml.probs(wires=1)

x = tf.Variable([1.0, 2.0], dtype=tf.float64)
res = circuit(x)

with tf.GradientTape(persistent=True) as tape1:
with tf.GradientTape(persistent=True) as tape2:
res = circuit(x)

spy = mocker.spy(JacobianTape, "hessian")
g = tape2.jacobian(res, x, experimental_use_pfor=False)

hess = tape1.jacobian(g, x, experimental_use_pfor=False)

if diff_method == "parameter-shift":
spy.assert_called_once()
elif diff_method == "backprop":
spy.assert_not_called()

a, b = x * 1.0

expected_res = [
tf.cos(a) * tf.cos(b),
0.5 + 0.5 * tf.cos(a) * tf.cos(b),
0.5 - 0.5 * tf.cos(a) * tf.cos(b)
]
assert np.allclose(res, expected_res, atol=tol, rtol=0)

expected_g = [
[-tf.sin(a) * tf.cos(b), -tf.cos(a) * tf.sin(b)],
[-0.5 * tf.sin(a) * tf.cos(b), -0.5 * tf.cos(a) * tf.sin(b)],
[0.5 * tf.sin(a) * tf.cos(b), 0.5 * tf.cos(a) * tf.sin(b)]
]
assert np.allclose(g, expected_g, atol=tol, rtol=0)

expected_hess = [
[
[-tf.cos(a) * tf.cos(b), tf.sin(a) * tf.sin(b)],
[tf.sin(a) * tf.sin(b), -tf.cos(a) * tf.cos(b)]
],
[
[-0.5 * tf.cos(a) * tf.cos(b), 0.5 * tf.sin(a) * tf.sin(b)],
[0.5 * tf.sin(a) * tf.sin(b), -0.5 * tf.cos(a) * tf.cos(b)]
],
[
[0.5 * tf.cos(a) * tf.cos(b), -0.5 * tf.sin(a) * tf.sin(b)],
[-0.5 * tf.sin(a) * tf.sin(b), 0.5 * tf.cos(a) * tf.cos(b)]
]
]
np.testing.assert_allclose(hess, expected_hess, atol=tol, rtol=0, verbose=True)


def qtransform(qnode, a, framework=tf):
"""Transforms every RY(y) gate in a circuit to RX(-a*cos(y))"""
Expand Down
Loading

0 comments on commit 2069021

Please sign in to comment.