Skip to content

Commit

Permalink
Add caching to the autograd batch interface (#1508)
Browse files Browse the repository at this point in the history
* Added differentiable VJP transform

* linting

* more tests

* linting

* add tests

* add comment

* fix

* more

* typos

* Apply suggestions from code review

Co-authored-by: Nathan Killoran <co9olguy@users.noreply.github.com>

* fixes

* add tests

* more tests

* renamed

* typo

* Add caching to the autograd backend

* more

* more

* more

* more

* caching

* fix

* fix

* fix tests

* final

* update changelog

* update

* more

* revert formatting

* more

* add tests

* linting

* merge master

* Apply suggestions from code review

Co-authored-by: Maria Schuld <mariaschuld@gmail.com>

* fix

* Apply suggestions from code review

Co-authored-by: Nathan Killoran <co9olguy@users.noreply.github.com>

* linting

* linting

* linting

* remove pass

* changelog

* Apply suggestions from code review

Co-authored-by: Tom Bromley <49409390+trbromley@users.noreply.github.com>

* Update pennylane/interfaces/batch/__init__.py

* Add hashing tests

* Apply suggestions from code review

Co-authored-by: Tom Bromley <49409390+trbromley@users.noreply.github.com>

Co-authored-by: Nathan Killoran <co9olguy@users.noreply.github.com>
Co-authored-by: Maria Schuld <mariaschuld@gmail.com>
Co-authored-by: Tom Bromley <49409390+trbromley@users.noreply.github.com>
  • Loading branch information
4 people committed Aug 20, 2021
1 parent 29d1daa commit 117599e
Show file tree
Hide file tree
Showing 10 changed files with 693 additions and 24 deletions.
11 changes: 9 additions & 2 deletions .github/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,15 @@
```

* Support for differentiable execution of batches of circuits has been
added, via the beta `pennylane.batch` module.
added, via the beta `pennylane.interfaces.batch` module.
[(#1501)](https://github.com/PennyLaneAI/pennylane/pull/1501)
[(#1508)](https://github.com/PennyLaneAI/pennylane/pull/1508)

For example:

```python
from pennylane.interfaces.batch import execute

def cost_fn(x):
with qml.tape.JacobianTape() as tape1:
qml.RX(x[0], wires=[0])
Expand All @@ -76,7 +79,11 @@
qml.CNOT(wires=[0, 1])
qml.probs(wires=1)

result = execute([tape1, tape2], dev, gradient_fn=param_shift)
result = execute(
[tape1, tape2], dev,
gradient_fn=qml.gradients.param_shift,
interface="autograd"
)
return result[0] + result[1][0, 0]

res = qml.grad(cost_fn)(params)
Expand Down
160 changes: 152 additions & 8 deletions pennylane/interfaces/batch/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,137 @@
This subpackage defines functions for interfacing devices' batch execution
capabilities with different machine learning libraries.
"""
# pylint: disable=import-outside-toplevel,too-many-arguments
# pylint: disable=import-outside-toplevel,too-many-arguments,too-many-branches
from functools import wraps

from cachetools import LRUCache
import numpy as np

import pennylane as qml

from .autograd import execute as execute_autograd


def execute(tapes, device, gradient_fn, interface="autograd", mode="best", gradient_kwargs=None):
def cache_execute(fn, cache, pass_kwargs=False, return_tuple=True):
"""Decorator that adds caching to a function that executes
multiple tapes on a device.
This decorator makes use of :attr:`.QuantumTape.hash` to identify
unique tapes.
- If a tape does not match a hash in the cache, then the tape
has not been previously executed. It is executed, and the result
added to the cache.
- If a tape matches a hash in the cache, then the tape has been previously
executed. The corresponding cached result is
extracted, and the tape is not passed to the execution function.
- Finally, there might be the case where one or more tapes in the current
set of tapes to be executed are identical and thus share a hash. If this is the case,
duplicates are removed, to avoid redundant evaluations.
Args:
fn (callable): The execution function to add caching to.
This function should have the signature ``fn(tapes, **kwargs)``,
and it should return ``list[tensor_like]``, with the
same length as the input ``tapes``.
cache (None or dict or Cache or bool): The cache to use. If ``None``,
caching will not occur.
pass_kwargs (bool): If ``True``, keyword arguments passed to the
wrapped function will be passed directly to ``fn``. If ``False``,
they will be ignored.
return_tuple (bool): If ``True``, the output of ``fn`` is returned
as a tuple ``(fn_ouput, [])``, to match the output of execution functions
that also return gradients.
Returns:
function: a wrapped version of the execution function ``fn`` with caching
support
"""

@wraps(fn)
def wrapper(tapes, **kwargs):

if not pass_kwargs:
kwargs = {}

if cache is None or (isinstance(cache, bool) and not cache):
# No caching. Simply execute the execution function
# and return the results.
res = fn(tapes, **kwargs)
return res, [] if return_tuple else res

execution_tapes = {}
cached_results = {}
hashes = {}
repeated = {}

for i, tape in enumerate(tapes):
h = tape.hash

if h in hashes.values():
# Tape already exists within ``tapes``. Determine the
# index of the first occurrence of the tape, store this,
# and continue to the next iteration.
idx = list(hashes.keys())[list(hashes.values()).index(h)]
repeated[i] = idx
continue

hashes[i] = h

if hashes[i] in cache:
# Tape exists within the cache, store the cached result
cached_results[i] = cache[hashes[i]]
else:
# Tape does not exist within the cache, store the tape
# for execution via the execution function.
execution_tapes[i] = tape

# if there are no execution tapes, simply return!
if not execution_tapes:
if not repeated:
res = list(cached_results.values())
return res, [] if return_tuple else res

else:
# execute all unique tapes that do not exist in the cache
res = fn(execution_tapes.values(), **kwargs)

final_res = []

for i, tape in enumerate(tapes):
if i in cached_results:
# insert cached results into the results vector
final_res.append(cached_results[i])

elif i in repeated:
# insert repeated results into the results vector
final_res.append(final_res[repeated[i]])

else:
# insert evaluated results into the results vector
r = res.pop(0)
final_res.append(r)
cache[hashes[i]] = r

return final_res, [] if return_tuple else final_res

wrapper.fn = fn
return wrapper


def execute(
tapes,
device,
gradient_fn,
interface="autograd",
mode="best",
gradient_kwargs=None,
cache=True,
cachesize=10000,
max_diff=2,
):
"""Execute a batch of tapes on a device in an autodifferentiable-compatible manner.
Args:
Expand All @@ -42,6 +166,13 @@ def execute(tapes, device, gradient_fn, interface="autograd", mode="best", gradi
pass.
gradient_kwargs (dict): dictionary of keyword arguments to pass when
determining the gradients of tapes
cache (bool): Whether to cache evaluations. This can result in
a significant reduction in quantum evaluations during gradient computations.
cachesize (int): the size of the cache
max_diff (int): If ``gradient_fn`` is a gradient transform, this option specifies
the maximum number of derivatives to support. Increasing this value allows
for higher order derivatives to be extracted, at the cost of additional
(classical) computational overhead during the backwards pass.
Returns:
list[list[float]]: A nested list of tape results. Each element in
Expand Down Expand Up @@ -101,11 +232,15 @@ def cost_fn(params, x):
[ 0.01983384, -0.97517033, 0. ],
[ 0. , 0. , -0.95533649]])
"""
# Default execution function; simply call device.batch_execute
# and return no Jacobians.
execute_fn = lambda tapes, **kwargs: (device.batch_execute(tapes), [])
gradient_kwargs = gradient_kwargs or {}

if isinstance(cache, bool) and cache:
# cache=True: create a LRUCache object
cache = LRUCache(maxsize=cachesize, getsizeof=len)

# the default execution function is device.batch_execute
execute_fn = cache_execute(device.batch_execute, cache)

if gradient_fn == "device":
# gradient function is a device method

Expand All @@ -116,8 +251,13 @@ def cost_fn(params, x):
gradient_fn = None

elif mode == "backward":
# disable caching on the forward pass
execute_fn = cache_execute(device.batch_execute, cache=None)

# replace the backward gradient computation
gradient_fn = device.gradients
gradient_fn = cache_execute(
device.gradients, cache, pass_kwargs=True, return_tuple=False
)

elif mode == "forward":
# In "forward" mode, gradients are automatically handled
Expand All @@ -126,6 +266,10 @@ def cost_fn(params, x):
raise ValueError("Gradient transforms cannot be used with mode='forward'")

if interface == "autograd":
return execute_autograd(tapes, device, execute_fn, gradient_fn, gradient_kwargs)
res = execute_autograd(
tapes, device, execute_fn, gradient_fn, gradient_kwargs, _n=1, max_diff=max_diff
)
else:
raise ValueError(f"Unknown interface {interface}")

raise ValueError(f"Unknown interface {interface}")
return res
62 changes: 49 additions & 13 deletions pennylane/interfaces/batch/autograd.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from pennylane import numpy as np


def execute(tapes, device, execute_fn, gradient_fn, gradient_kwargs, _n=1):
def execute(tapes, device, execute_fn, gradient_fn, gradient_kwargs, _n=1, max_diff=2):
"""Execute a batch of tapes with Autograd parameters on a device.
Args:
Expand All @@ -42,6 +42,10 @@ def execute(tapes, device, execute_fn, gradient_fn, gradient_kwargs, _n=1):
gradient_fn (callable): the gradient function to use to compute quantum gradients
_n (int): a positive integer used to track nesting of derivatives, for example
if the nth-order derivative is requested.
max_diff (int): If ``gradient_fn`` is a gradient transform, this option specifies
the maximum order of derivatives to support. Increasing this value allows
for higher order derivatives to be extracted, at the cost of additional
(classical) computational overhead during the backwards pass.
Returns:
list[list[float]]: A nested list of tape results. Each element in
Expand All @@ -64,6 +68,7 @@ def execute(tapes, device, execute_fn, gradient_fn, gradient_kwargs, _n=1):
gradient_fn=gradient_fn,
gradient_kwargs=gradient_kwargs,
_n=_n,
max_diff=max_diff,
)[0]


Expand All @@ -76,6 +81,7 @@ def _execute(
gradient_fn=None,
gradient_kwargs=None,
_n=1,
max_diff=2,
): # pylint: disable=dangerous-default-value,unused-argument
"""Autodifferentiable wrapper around ``Device.batch_execute``.
Expand Down Expand Up @@ -119,6 +125,7 @@ def vjp(
gradient_fn=None,
gradient_kwargs=None,
_n=1,
max_diff=2,
): # pylint: disable=dangerous-default-value,unused-argument
"""Returns the vector-Jacobian product operator for a batch of quantum tapes.
Expand All @@ -139,6 +146,10 @@ def vjp(
determining the gradients of tapes
_n (int): a positive integer used to track nesting of derivatives, for example
if the nth-order derivative is requested.
max_diff (int): If ``gradient_fn`` is a gradient transform, this option specifies
the maximum number of derivatives to support. Increasing this value allows
for higher order derivatives to be extracted, at the cost of additional
(classical) computational overhead during the backwards pass.
Returns:
function: this function accepts the backpropagation
Expand Down Expand Up @@ -169,18 +180,43 @@ def grad_fn(dy):
if "pennylane.gradients" in module_name:

# Generate and execute the required gradient tapes
vjp_tapes, processing_fn = qml.gradients.batch_vjp(
tapes, dy, gradient_fn, reduction="append", gradient_kwargs=gradient_kwargs
)

# This is where the magic happens. Note that we call ``execute``.
# This recursion, coupled with the fact that the gradient transforms
# are differentiable, allows for arbitrary order differentiation.
vjps = processing_fn(
execute(vjp_tapes, device, execute_fn, gradient_fn, gradient_kwargs, _n=_n + 1)
)

elif inspect.ismethod(gradient_fn) and gradient_fn.__self__ is device:
if _n == max_diff:
with qml.tape.Unwrap(*tapes):
vjp_tapes, processing_fn = qml.gradients.batch_vjp(
tapes,
dy,
gradient_fn,
reduction="append",
gradient_kwargs=gradient_kwargs,
)

vjps = processing_fn(execute_fn(vjp_tapes)[0])

else:
vjp_tapes, processing_fn = qml.gradients.batch_vjp(
tapes, dy, gradient_fn, reduction="append", gradient_kwargs=gradient_kwargs
)

# This is where the magic happens. Note that we call ``execute``.
# This recursion, coupled with the fact that the gradient transforms
# are differentiable, allows for arbitrary order differentiation.
vjps = processing_fn(
execute(
vjp_tapes,
device,
execute_fn,
gradient_fn,
gradient_kwargs,
_n=_n + 1,
max_diff=max_diff,
)
)

elif (
hasattr(gradient_fn, "fn")
and inspect.ismethod(gradient_fn.fn)
and gradient_fn.fn.__self__ is device
):
# Gradient function is a device method.
# Note that unlike the previous branch:
#
Expand Down
20 changes: 20 additions & 0 deletions pennylane/measure.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,26 @@ def queue(self, context=qml.QueuingContext):

return self

@property
def hash(self):
"""int: returns an integer hash uniquely representing the measurement process"""
if self.obs is None:
fingerprint = (
str(self.name),
tuple(self.wires.tolist()),
str(self.data),
self.return_type,
)
else:
fingerprint = (
str(self.obs.name),
tuple(self.wires.tolist()),
str(self.obs.data),
self.return_type,
)

return hash(fingerprint)


def expval(op):
r"""Expectation value of the supplied observable.
Expand Down
15 changes: 15 additions & 0 deletions pennylane/operation.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,16 @@ def classproperty(func):
# =============================================================================


def _process_data(op):
if op.name in ("RX", "RY", "RZ", "PhaseShift", "Rot"):
return str([d % (2 * np.pi) for d in op.data])

if op.name in ("CRX", "CRY", "CRZ", "CRot"):
return str([d % (4 * np.pi) for d in op.data])

return str(op.data)


class Operator(abc.ABC):
r"""Base class for quantum operators supported by a device.
Expand Down Expand Up @@ -282,6 +292,11 @@ def __deepcopy__(self, memo):
setattr(copied_op, attribute, copy.deepcopy(value, memo))
return copied_op

@property
def hash(self):
"""int: returns an integer hash uniquely representing the operator"""
return hash((str(self.name), tuple(self.wires.tolist()), _process_data(self)))

@classmethod
def _matrix(cls, *params):
"""Matrix representation of the operator
Expand Down
Loading

0 comments on commit 117599e

Please sign in to comment.