pints-team · MichaelClerx · Aug 17, 2022 · Aug 31, 2022 · Aug 31, 2022
diff --git a/examples/optimisation/adagrad.ipynb b/examples/optimisation/adagrad.ipynb
diff --git a/pints/__init__.py b/pints/__init__.py
@@ -173,6 +173,7 @@ def version(formatted=False):
     Optimiser,
     PopulationBasedOptimiser,
 )
+from ._optimisers._adagrad import AdaGrad
 from ._optimisers._adam import Adam
 from ._optimisers._cmaes import CMAES
 from ._optimisers._cmaes_bare import BareCMAES

diff --git a/pints/_optimisers/_adagrad.py b/pints/_optimisers/_adagrad.py
@@ -0,0 +1,145 @@
+#
+# AdaGrad optimiser.
+#
+# This file is part of PINTS (https://github.com/pints-team/pints/) which is
+# released under the BSD 3-clause license. See accompanying LICENSE.md for
+# copyright notice and full license details.
+#
+import pints
+
+import numpy as np
+
+
+class AdaGrad(pints.Optimiser):
+    """
+    AdaGrad optimiser (diagonal version) by [1]_, as given in [2, 3]_.
+
+    Pseudo code is given below. Here, ``p_j[i]`` denotes the j-th parameter at
+    iteration i, while ``g_j[i]`` is the gradient with respect to parameter j.
+
+        v_j[i] = v_j[i - 1] + g_j[i] ** 2
+        p_j[i] = p_j[i - 1] - alpha * g_j[i] / sqrt(g_j[i] + eps)
+
+    Here ``v_j[0] = 0``, ``alpha`` is a fixed learning rate, and ``eps`` is a
+    small number used to avoid dividing by zero.
+
+    In this implementation, ``eps = 1e-8`` and ``alpha = min(sigma0)``. The
+    algorithm's adaptivity is said to "[eliminate] the need to manually tune
+    the learning rate [alpha]" [3]_.
+
+    References
+    ----------
+    .. [1] Adaptive subgradient methods for online learning and stochastic
+           optimization. Duchi, Hazan, and Singer, 2011.
+           Journal of Machine Learning Research
+           https://dl.acm.org/doi/10.5555/1953048.2021068
+
+    .. [2] A Simple Convergence Proof of Adam and Adagrad
+           Defossez, Bottou, Bach, and Usunier, 2020. arXiv.
+           https://arxiv.org/abs/2003.02395
+
+    .. [3] An overview of gradient descent optimization algorithms.
+           Ruder, 2016. arXiv
+           https://arxiv.org/abs/1609.04747
+
+    """
+
+    def __init__(self, x0, sigma0=0.1, boundaries=None):
+        super().__init__(x0, sigma0, boundaries)
+
+        # Set optimiser state
+        self._running = False
+        self._ready_for_tell = False
+
+        # Best solution found
+        self._x_best = self._x0
+        self._f_best = np.inf
+
+        # Current point, score, and gradient
+        self._current = self._x0
+        self._current_f = np.inf
+        self._current_df = None
+
+        # Proposed next point (read-only, so can be passed to user)
+        self._proposed = self._x0
+        self._proposed.setflags(write=False)
+
+        # Online sum of gradient**2
+        self._v = np.zeros(self._x0.shape)
+
+        # Small number added to avoid divide-by-zero
+        self._eps = 1e-8
+
+        # Step size
+        self._alpha = np.min(self._sigma0)
+
+    def ask(self):
+        """ See :meth:`Optimiser.ask()`. """
+
+        # Running, and ready for tell now
+        self._ready_for_tell = True
+        self._running = True
+
+        # Return proposed points (just the one)
+        return [self._proposed]
+
+    def f_best(self):
+        """ See :meth:`Optimiser.f_best()`. """
+        return self._f_best
+
+    def f_guessed(self):
+        """ See :meth:`Optimiser.f_guessed()`. """
+        return self._current_f
+
+    def name(self):
+        """ See :meth:`Optimiser.name()`. """
+        return 'Adam'
+
+    def needs_sensitivities(self):
+        """ See :meth:`Optimiser.needs_sensitivities()`. """
+        return True
+
+    def n_hyper_parameters(self):
+        """ See :meth:`pints.TunableMethod.n_hyper_parameters()`. """
+        return 0
+
+    def running(self):
+        """ See :meth:`Optimiser.running()`. """
+        return self._running
+
+    def tell(self, reply):
+        """ See :meth:`Optimiser.tell()`. """
+
+        # Check ask-tell pattern
+        if not self._ready_for_tell:
+            raise Exception('ask() not called before tell()')
+        self._ready_for_tell = False
+
+        # Unpack reply
+        fx, dfx = reply[0]
+
+        # Update current point
+        self._current = self._proposed
+        self._current_f = fx
+        self._current_df = dfx
+
+        # Update online sum of gradient**2
+        self._v += dfx**2
+
+        # Take step
+        self._proposed = (
+            self._current - self._alpha * dfx / np.sqrt(self._v + self._eps))
+
+        # Update x_best and f_best
+        if self._f_best > fx:
+            self._f_best = fx
+            self._x_best = self._current
+
+    def x_best(self):
+        """ See :meth:`Optimiser.x_best()`. """
+        return self._x_best
+
+    def x_guessed(self):
+        """ See :meth:`Optimiser.x_guessed()`. """
+        return self._current
+