Merge pull request #185 from BrandonGarciaWx/decay_function_improvements

Decay Function Improvements
JustGlowing · Apr 24, 2024 · 09d048c · 09d048c
2 parents 9882f55 + fb07b2e
commit 09d048c
Showing 1 changed file with 159 additions and 61 deletions.
diff --git a/minisom.py b/minisom.py
@@ -71,39 +71,14 @@ def fast_norm(x):
     return sqrt(dot(x, x.T))
 
 
-def asymptotic_decay(learning_rate, t, max_iter):
-    """Decay function of the learning process.
-    Parameters
-    ----------
-    learning_rate : float
-        current learning rate.
-
-    t : int
-        current iteration.
-
-    max_iter : int
-        maximum number of iterations for the training.
-    """
-    return learning_rate / (1+t/(max_iter/2))
-
-
-def linear_decay(learning_rate, t, max_iter):
-    return learning_rate * (1 - t/max_iter)
-
-
-def inverse_time_decay(learning_rate, t, max_iter):
-    C = max_iter / 100.0
-    return learning_rate * C / (C+t)
-
-
 class MiniSom(object):
     Y_HEX_CONV_FACTOR = (3.0 / 2.0) / sqrt(3)
 
-    def __init__(self, x, y, input_len, sigma=1.0, learning_rate=0.5,
-                 decay_function=asymptotic_decay,
+    def __init__(self, x, y, input_len, sigma=None, learning_rate=0.5,
+                 learning_rate_decay_function='inverse_decay_to_zero',
                  neighborhood_function='gaussian', topology='rectangular',
                  activation_distance='euclidean', random_seed=None,
-                 sigma_decay_function=asymptotic_decay):
+                 sigma_decay_function='inverse_decay_to_one'):
         """Initializes a Self Organizing Maps.
 
         A rule of thumb to set the size of the grid for a dimensionality
@@ -124,31 +99,30 @@ def __init__(self, x, y, input_len, sigma=1.0, learning_rate=0.5,
         input_len : int
             Number of the elements of the vectors in input.
 
-        sigma : float, optional (default=1.0)
-            Spread of the neighborhood function, needs to be adequate
-            to the dimensions of the map.
-            (at the iteration t we have sigma(t) = sigma / (1 + t/T)
-            where T is #num_iteration/2)
-        learning_rate : initial learning rate
-            (at the iteration t we have
-            learning_rate(t) = learning_rate / (1 + t/T)
-            where T is #num_iteration/2)
+        sigma : float, optional (default=sqrt(x^2 + y^2))
+            Spread of the neighborhood function.
+
+            Needs to be adequate to the dimensions of the map.
 
-        decay_function : function (default=asymptotic_decay)
-            Function that reduces learning_rate at each iteration
-            the default function is:
-                        learning_rate / (1+t/(max_iterarations/2))
+            By default, at the iteration t, we have:
+                sigma(t) = sigma / (1 + (t * (sigma - 1) / max_iter))
 
-            A custom decay function will need to to take in input
-            three parameters in the following order:
+        learning_rate : float, optional (default=0.5)
+            Initial learning rate.
 
-            1. learning rate
-            2. current iteration
-            3. maximum number of iterations allowed
+            Adequate values are dependent on the data used for training.
 
+            By default, at the iteration t, we have:
+                learning_rate(t) = learning_rate / (1 + t * (100 / max_iter))
 
-            Note that if a lambda function is used to define the decay
-            MiniSom will not be pickable anymore.
+        learning_rate_decay_function : string, optional
+        (default='inverse_decay_to_zero')
+            Function that reduces learning_rate at each iteration.
+            Possible values: 'inverse_decay_to_zero', 'linear_decay_to_zero',
+                             'asymptotic_decay'
+
+            The default function is:
+                learning_rate(t) = learning_rate / (1 + t * (100 / max_iter))
 
         neighborhood_function : string, optional (default='gaussian')
             Function that weights the neighborhood of a position in the map.
@@ -170,11 +144,19 @@ def euclidean(x, w):
         random_seed : int, optional (default=None)
             Random seed to use.
 
-        sigma_decay_function : function (default=asymptotic_decay)
+        sigma_decay_function : string, optional
+        (default='inverse_decay_to_one')
             Function that reduces sigma at each iteration.
+            Possible values: 'inverse_decay_to_one', 'linear_decay_to_one',
+                             'asymptotic_decay'
+
+            The default function is:
+                sigma(t) = sigma / (1 + (t * (sigma - 1) / max_iter))
         """
+        if sigma is None:
+            sigma = sqrt(x*x + y*y)
         if sigma > sqrt(x*x + y*y):
-            warn('Warning: sigma might be too high' +
+            warn('Warning: sigma might be too high ' +
                  'for the dimension of the map.')
 
         self._random_generator = random.RandomState(random_seed)
@@ -204,8 +186,30 @@ def euclidean(x, w):
                 warn('triangle neighborhood function does not ' +
                      'take in account hexagonal topology')
 
-        self._decay_function = decay_function
-        self._sigma_decay_function = sigma_decay_function
+        lr_decay_functions = {
+            'inverse_decay_to_zero': self._inverse_decay_to_zero,
+            'linear_decay_to_zero': self._linear_decay_to_zero,
+            'asymptotic_decay': self._asymptotic_decay}
+
+        if learning_rate_decay_function not in lr_decay_functions:
+            msg = '%s not supported. Functions available: %s'
+            raise ValueError(msg % (learning_rate_decay_function,
+                                    ', '.join(lr_decay_functions.keys())))
+
+        self._learning_rate_decay_function = \
+            lr_decay_functions[learning_rate_decay_function]
+
+        sig_decay_functions = {
+            'inverse_decay_to_one': self._inverse_decay_to_one,
+            'linear_decay_to_one': self._linear_decay_to_one,
+            'asymptotic_decay': self._asymptotic_decay}
+
+        if sigma_decay_function not in sig_decay_functions:
+            msg = '%s not supported. Functions available: %s'
+            raise ValueError(msg % (sigma_decay_function,
+                                    ', '.join(sig_decay_functions.keys())))
+
+        self._sigma_decay_function = sig_decay_functions[sigma_decay_function]
 
         neig_functions = {'gaussian': self._gaussian,
                           'mexican_hat': self._mexican_hat,
@@ -272,6 +276,93 @@ def activate(self, x):
         self._activate(x)
         return self._activation_map
 
+    def _inverse_decay_to_zero(self, learning_rate, t, max_iter):
+        """Decay function of the learning process that asymptotically
+        approaches zero.
+
+        Parameters
+        ----------
+        learning_rate : float
+            Current learning rate.
+
+        t : int
+            Current iteration.
+
+        max_iter : int
+            Maximum number of iterations for the training.
+        """
+        C = max_iter / 100.0
+        return learning_rate * C / (C + t)
+
+    def _linear_decay_to_zero(self, learning_rate, t, max_iter):
+        """Decay function of the learning process that linearly
+        decreases to zero.
+
+        Parameters
+        ----------
+        learning_rate : float
+            Current learning rate.
+
+        t : int
+            Current iteration.
+
+        max_iter : int
+            Maximum number of iterations for the training.
+        """
+        return learning_rate * (1 - t / max_iter)
+
+    def _inverse_decay_to_one(self, sigma, t, max_iter):
+        """Decay function of sigma that asymptotically approaches one.
+
+        Parameters
+        ----------
+        sigma : float
+            Current sigma.
+
+        t : int
+            Current iteration.
+
+        max_iter : int
+            Maximum number of iterations for the training.
+        """
+        C = (sigma - 1) / max_iter
+        return sigma / (1 + (t * C))
+
+    def _linear_decay_to_one(self, sigma, t, max_iter):
+        """Decay function of sigma that linearly decreases
+        to one.
+
+        Parameters
+        ----------
+        sigma : float
+            Current sigma.
+
+        t : int
+            Current iteration.
+
+        max_iter : int
+            Maximum number of iterations for the training.
+        """
+        return sigma + (t * (1 - sigma) / max_iter)
+
+    def _asymptotic_decay(self, dynamic_parameter, t, max_iter):
+        """Legacy default decay function of the learning process
+        and sigma that decays these values asymptotically to 1/3
+        of their original values.
+
+        Parameters
+        ----------
+        dynamic_parameter : float
+            Current learning rate/sigma.
+
+        t : int
+            Current iteration.
+
+        max_iter : int
+            Maximum number of iterations for the training.
+        """
+        return dynamic_parameter / (1 + t / (max_iter / 2))
+
     def _gaussian(self, c, sigma):
         """Returns a Gaussian centered in c."""
         d = 2*sigma*sigma
@@ -352,8 +443,8 @@ def update(self, x, win, t, max_iteration):
             If use_epochs is False:
                 Maximum number of iterations (one iteration per sample).
         """
-        eta = self._decay_function(self._learning_rate, t, max_iteration)
-        # sigma and learning rate decrease with the same rule
+        eta = self._learning_rate_decay_function(self._learning_rate,
+                                                 t, max_iteration)
         sig = self._sigma_decay_function(self._sigma, t, max_iteration)
         # improves the performances
         g = self.neighborhood(win, sig)*eta
@@ -667,15 +758,22 @@ def setUp(self):
                     self.hex_som._weights[i, j]))
         self.hex_som._weights = zeros((5, 5, 1))  # fake weights
 
-    def test_asymptotic_decay_function(self):
-        assert asymptotic_decay(1., 2., 3.) == 1./(1.+2./(3./2))
+    def test_inverse_decay_to_zero_function(self):
+        C = 3 / 100
+        assert self.som._inverse_decay_to_zero(1, 2, 3) == 1 * C / (C + 2)
 
-    def test_linear_decay_function(self):
-        assert linear_decay(1., 2., 3.) == 1.*(1.-2./3)
+    def test_linear_decay_to_zero_function(self):
+        assert self.som._linear_decay_to_zero(1, 2, 3) == 1 * (1 - 2 / 3)
 
-    def test_inverse_time_function(self):
-        C = 3 / 100.
-        assert inverse_time_decay(1., 2., 3.) == 1. * C / (C + 2)
+    def test_inverse_decay_to_one_function(self):
+        C = (1 - 1) / 3
+        assert self.som._inverse_decay_to_one(1, 2, 3) == 1 / (1 + (2 * C))
+
+    def test_linear_decay_to_one_function(self):
+        assert self.som._linear_decay_to_one(1, 2, 3) == 1 + (2 * (1 - 1) / 3)
+
+    def test_asymptotic_decay_function(self):
+        assert self.som._asymptotic_decay(1, 2, 3) == 1 / (1 + 2 / (3 / 2))
 
     def test_fast_norm(self):
         assert fast_norm(array([1, 3])) == sqrt(1+9)