Skip to content

Commit

Permalink
Add support for the parameter-shift hessian to the Torch interface (#…
Browse files Browse the repository at this point in the history
…1129)

* Add support for the parameter-shift hessian to the Torch interface

* update changelog

* fixes

* fixes

* fix

* line break

* Update pennylane/interfaces/torch.py

Co-authored-by: Christina Lee <christina@xanadu.ai>

* Update pennylane/tape/jacobian_tape.py

Co-authored-by: Christina Lee <christina@xanadu.ai>

Co-authored-by: Christina Lee <christina@xanadu.ai>
  • Loading branch information
josh146 and albi3ro authored Mar 26, 2021
1 parent 2069021 commit ede35f4
Show file tree
Hide file tree
Showing 3 changed files with 257 additions and 23 deletions.
33 changes: 33 additions & 0 deletions .github/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,39 @@

<h3>New features since last release</h3>

* Computing second derivatives and Hessians of QNodes is now supported when
using the PyTorch interface.
[(#1129)](https://github.com/PennyLaneAI/pennylane/pull/1129/files)

Hessians are computed using the parameter-shift rule, and can be
evaluated on both hardware and simulator devices.

```python
from torch.autograd.functional import jacobian, hessian
dev = qml.device('default.qubit', wires=1)

@qml.qnode(dev, interface='torch', diff_method="parameter-shift")
def circuit(p):
qml.RY(p[0], wires=0)
qml.RX(p[1], wires=0)
return qml.expval(qml.PauliZ(0))

x = torch.tensor([1.0, 2.0], requires_grad=True)
```

```python
>>> circuit(x)
tensor([0.3876, 0.6124], dtype=torch.float64, grad_fn=<SqueezeBackward0>)
>>> jacobian(circuit, x)
tensor([[ 0.1751, -0.2456],
[-0.1751, 0.2456]], grad_fn=<ViewBackward>)
>>> hessian(circuit, x)
tensor([[[ 0.1124, 0.3826],
[ 0.3826, 0.1124]],
[[-0.1124, -0.3826],
[-0.3826, -0.1124]]])
```

- The TensorFlow interface now supports computing second derivatives and Hessians of hybrid quantum models.
Second derivatives are supported on both hardware and simulators.
[(#1110)](https://github.com/PennyLaneAI/pennylane/pull/1110)
Expand Down
103 changes: 80 additions & 23 deletions pennylane/interfaces/torch.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,81 @@ def forward(ctx, input_kwargs, *input_):
if hasattr(res, "numpy"):
res = res.numpy()

ctx.saved_grad_matrices = {}

def _evaluate_grad_matrix(grad_matrix_fn):
"""Convenience function for generating gradient matrices
for the given parameter values.
This function serves two purposes:
* Avoids duplicating logic surrounding parameter unwrapping/wrapping
* Takes advantage of closure, to cache computed gradient matrices via
the ctx.saved_grad_matrices attribute, to avoid gradient matrices being
computed multiple redundant times.
This is particularly useful when differentiating vector-valued QNodes.
Because PyTorch requests the vector-GradMatrix product,
and *not* the full GradMatrix, differentiating vector-valued
functions will result in multiple backward passes.
Args:
grad_matrix_fn (str): Name of the gradient matrix function. Should correspond to an existing
tape method. Currently allowed values include ``"jacobian"`` and ``"hessian"``.
Returns:
array[float]: the gradient matrix
"""
if grad_matrix_fn in ctx.saved_grad_matrices:
return ctx.saved_grad_matrices[grad_matrix_fn]

tape.set_parameters(ctx.all_params_unwrapped, trainable_only=False)
grad_matrix = getattr(tape, grad_matrix_fn)(
device, params=ctx.args, **tape.jacobian_options
)
tape.set_parameters(ctx.all_params, trainable_only=False)

grad_matrix = torch.as_tensor(torch.from_numpy(grad_matrix), dtype=tape.dtype)
ctx.saved_grad_matrices[grad_matrix_fn] = grad_matrix

return grad_matrix

class _Jacobian(torch.autograd.Function):
@staticmethod
def forward(ctx_, parent_ctx, *input_):
"""Implements the forward pass QNode Jacobian evaluation"""
ctx_.dy = parent_ctx.dy
ctx_.save_for_backward(*input_)
jacobian = _evaluate_grad_matrix("jacobian")
return jacobian

@staticmethod
def backward(ctx_, ddy): # pragma: no cover
"""Implements the backward pass QNode vector-Hessian product"""
hessian = _evaluate_grad_matrix("hessian")

if torch.squeeze(ddy).ndim > 1:
vhp = ctx_.dy.view(1, -1) @ ddy @ hessian @ ctx_.dy.view(-1, 1)
else:
vhp = ddy @ hessian

vhp = torch.unbind(vhp.view(-1))

grad_input = []

# match the type and device of the input tensors
for i, j in zip(vhp, ctx_.saved_tensors):
res = torch.as_tensor(i, dtype=tape.dtype)
if j.is_cuda: # pragma: no cover
cuda_device = j.get_device()
res = torch.as_tensor(res, device=cuda_device)
grad_input.append(res)

return (None,) + tuple(grad_input)

ctx.jacobian = _Jacobian

# if any input tensor uses the GPU, the output should as well
for i in input_:
if isinstance(i, torch.Tensor):
Expand All @@ -94,30 +169,12 @@ def forward(ctx, input_kwargs, *input_):
return torch.as_tensor(torch.from_numpy(res), dtype=tape.dtype)

@staticmethod
def backward(ctx, grad_output): # pragma: no cover
def backward(ctx, dy): # pragma: no cover
"""Implements the backwards pass QNode vector-Jacobian product"""
tape = ctx.kwargs["tape"]
device = ctx.kwargs["device"]

tape.set_parameters(ctx.all_params_unwrapped, trainable_only=False)
jacobian = tape.jacobian(device, params=ctx.args, **tape.jacobian_options)
tape.set_parameters(ctx.all_params, trainable_only=False)

jacobian = torch.as_tensor(jacobian, dtype=grad_output.dtype).to(grad_output)

vjp = grad_output.view(1, -1) @ jacobian
grad_input_list = torch.unbind(vjp.flatten())
grad_input = []

# match the type and device of the input tensors
for i, j in zip(grad_input_list, ctx.saved_tensors):
res = torch.as_tensor(i, dtype=tape.dtype)
if j.is_cuda: # pragma: no cover
cuda_device = j.get_device()
res = torch.as_tensor(res, device=cuda_device)
grad_input.append(res)

return (None,) + tuple(grad_input)
ctx.dy = dy
vjp = dy.view(1, -1) @ ctx.jacobian.apply(ctx, *ctx.saved_tensors)
vjp = torch.unbind(vjp.view(-1))
return (None,) + tuple(vjp)


class TorchInterface(AnnotatedQueue):
Expand Down
144 changes: 144 additions & 0 deletions tests/interfaces/test_qnode_torch.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import pennylane as qml
from pennylane import qnode, QNode
from pennylane.tape import JacobianTape
from torch.autograd.functional import hessian, jacobian


@pytest.mark.parametrize(
Expand Down Expand Up @@ -527,6 +528,149 @@ def circuit():
assert isinstance(res[0], torch.Tensor)
assert isinstance(res[1], torch.Tensor)

def test_hessian(self, dev_name, diff_method, mocker, tol):
"""Test hessian calculation of a scalar valued QNode"""
if diff_method not in {"parameter-shift", "backprop"}:
pytest.skip("Test only supports parameter-shift or backprop")

dev = qml.device(dev_name, wires=1)

@qnode(dev, diff_method=diff_method, interface="torch")
def circuit(x):
qml.RY(x[0], wires=0)
qml.RX(x[1], wires=0)
return qml.expval(qml.PauliZ(0))

x = torch.tensor([1.0, 2.0], requires_grad=True)
res = circuit(x)

res.backward()
g = x.grad

spy = mocker.spy(JacobianTape, "hessian")
hess = hessian(circuit, x)
spy.assert_called_once()

a, b = x.detach().numpy()

expected_res = np.cos(a) * np.cos(b)
assert np.allclose(res.detach(), expected_res, atol=tol, rtol=0)

expected_g = [-np.sin(a) * np.cos(b), -np.cos(a) * np.sin(b)]
assert np.allclose(g.detach(), expected_g, atol=tol, rtol=0)

expected_hess = [
[-np.cos(a) * np.cos(b), np.sin(a) * np.sin(b)],
[np.sin(a) * np.sin(b), -np.cos(a) * np.cos(b)]
]
assert np.allclose(hess.detach(), expected_hess, atol=tol, rtol=0)

def test_hessian_vector_valued(self, dev_name, diff_method, mocker, tol):
"""Test hessian calculation of a vector valued QNode"""
if diff_method not in {"parameter-shift", "backprop"}:
pytest.skip("Test only supports parameter-shift or backprop")

dev = qml.device(dev_name, wires=1)

@qnode(dev, diff_method=diff_method, interface="torch")
def circuit(x):
qml.RY(x[0], wires=0)
qml.RX(x[1], wires=0)
return qml.probs(wires=0)

x = torch.tensor([1.0, 2.0], requires_grad=True)
res = circuit(x)
jac_fn = lambda x: jacobian(circuit, x, create_graph=True)

g = jac_fn(x)

spy = mocker.spy(JacobianTape, "hessian")
hess = jacobian(jac_fn, x)
spy.assert_called_once()

a, b = x.detach().numpy()

expected_res = [
0.5 + 0.5 * np.cos(a) * np.cos(b),
0.5 - 0.5 * np.cos(a) * np.cos(b)
]
assert np.allclose(res.detach(), expected_res, atol=tol, rtol=0)

expected_g = [
[-0.5 * np.sin(a) * np.cos(b), -0.5 * np.cos(a) * np.sin(b)],
[0.5 * np.sin(a) * np.cos(b), 0.5 * np.cos(a) * np.sin(b)]
]
assert np.allclose(g.detach(), expected_g, atol=tol, rtol=0)

expected_hess = [
[
[-0.5 * np.cos(a) * np.cos(b), 0.5 * np.sin(a) * np.sin(b)],
[0.5 * np.sin(a) * np.sin(b), -0.5 * np.cos(a) * np.cos(b)]
],
[
[0.5 * np.cos(a) * np.cos(b), -0.5 * np.sin(a) * np.sin(b)],
[-0.5 * np.sin(a) * np.sin(b), 0.5 * np.cos(a) * np.cos(b)]
]
]
assert np.allclose(hess.detach(), expected_hess, atol=tol, rtol=0)

def test_hessian_ragged(self, dev_name, diff_method, mocker, tol):
"""Test hessian calculation of a ragged QNode"""
if diff_method not in {"parameter-shift", "backprop"}:
pytest.skip("Test only supports parameter-shift or backprop")

dev = qml.device(dev_name, wires=2)

@qnode(dev, diff_method=diff_method, interface="torch")
def circuit(x):
qml.RY(x[0], wires=0)
qml.RX(x[1], wires=0)
qml.RY(x[0], wires=1)
qml.RX(x[1], wires=1)
return qml.expval(qml.PauliZ(0)), qml.probs(wires=1)

x = torch.tensor([1.0, 2.0], requires_grad=True)
res = circuit(x)
jac_fn = lambda x: jacobian(circuit, x, create_graph=True)

g = jac_fn(x)

spy = mocker.spy(JacobianTape, "hessian")
hess = jacobian(jac_fn, x)
spy.assert_called_once()

a, b = x.detach().numpy()

expected_res = [
np.cos(a) * np.cos(b),
0.5 + 0.5 * np.cos(a) * np.cos(b),
0.5 - 0.5 * np.cos(a) * np.cos(b)
]
assert np.allclose(res.detach(), expected_res, atol=tol, rtol=0)

expected_g = [
[-np.sin(a) * np.cos(b), -np.cos(a) * np.sin(b)],
[-0.5 * np.sin(a) * np.cos(b), -0.5 * np.cos(a) * np.sin(b)],
[0.5 * np.sin(a) * np.cos(b), 0.5 * np.cos(a) * np.sin(b)]
]
assert np.allclose(g.detach(), expected_g, atol=tol, rtol=0)

expected_hess = [
[
[-np.cos(a) * np.cos(b), np.sin(a) * np.sin(b)],
[np.sin(a) * np.sin(b), -np.cos(a) * np.cos(b)]
],
[
[-0.5 * np.cos(a) * np.cos(b), 0.5 * np.sin(a) * np.sin(b)],
[0.5 * np.sin(a) * np.sin(b), -0.5 * np.cos(a) * np.cos(b)]
],
[
[0.5 * np.cos(a) * np.cos(b), -0.5 * np.sin(a) * np.sin(b)],
[-0.5 * np.sin(a) * np.sin(b), 0.5 * np.cos(a) * np.cos(b)]
]
]
assert np.allclose(hess.detach(), expected_hess, atol=tol, rtol=0)


def qtransform(qnode, a, framework=torch):
"""Transforms every RY(y) gate in a circuit to RX(-a*cos(y))"""
Expand Down

0 comments on commit ede35f4

Please sign in to comment.