optimagic-dev · janosg · Nov 6, 2022 · Oct 11, 2022 · Oct 12, 2022 · Oct 18, 2022
diff --git a/src/estimagic/optimization/tranquilo/sample_points.py b/src/estimagic/optimization/tranquilo/sample_points.py
@@ -2,8 +2,11 @@
 import warnings
 from functools import partial
 
+import estimagic as em
 import numpy as np
 from estimagic.optimization.tranquilo.options import Bounds
+from scipy.spatial.distance import pdist
+from scipy.special import logsumexp
 
 
 def get_sampler(sampler, bounds, user_options=None):
@@ -29,7 +32,10 @@ def get_sampler(sampler, bounds, user_options=None):
 
     built_in_samplers = {
         "naive": _naive_sampler,
-        "sphere": _sphere_sampler,
+        "cube": partial(_hull_sampler, ord=np.inf),
+        "sphere": partial(_hull_sampler, ord=2),
+        "optimal_cube": partial(_optimal_hull_sampler, ord=np.inf),
+        "optimal_sphere": partial(_optimal_hull_sampler, ord=2),
     }
 
     if isinstance(sampler, str) and sampler in built_in_samplers:
@@ -40,7 +46,7 @@ def get_sampler(sampler, bounds, user_options=None):
         _sampler_name = getattr(sampler, "__name__", "your sampler")
     else:
         raise ValueError(
-            "Invalid sampler: {sampler}. Must be one of {list(built_in_samplers)} "
+            f"Invalid sampler: {sampler}. Must be one of {list(built_in_samplers)} "
             "or a callable."
         )
 
@@ -93,7 +99,8 @@ def _naive_sampler(
     """Naive random generation of trustregion points.
 
     This is just a reference implementation to illustrate the interface of trustregion
-    samplers.
+    samplers. Mathematically it samples uniformaly from inside the cube defined by the
+    intersection of the trustregion and the bounds.
 
     All arguments but seed are mandatory, even if not used.
 
@@ -110,47 +117,207 @@ def _naive_sampler(
             x vector at which the criterion function has already been evaluated, that
             satisfies lower_bounds <= existing_xs <= upper_bounds.
         rng (numpy.random.Generator): Random number generator.
-        bounds (Bounds or None): NamedTuple
+        bounds (Bounds or None): NamedTuple.
 
     """
-    n_points = _get_effective_n_points(target_size, existing_xs)
+    n_points = _get_effective_n_points(target_size, existing_xs=existing_xs)
     n_params = len(trustregion.center)
-    region_bounds = _get_effective_bounds(trustregion, bounds)
+    effective_bounds = _get_effective_bounds(trustregion, bounds=bounds)
 
     points = rng.uniform(
-        low=region_bounds.lower,
-        high=region_bounds.upper,
+        low=effective_bounds.lower,
+        high=effective_bounds.upper,
         size=(n_points, n_params),
     )
+    return points
+
+
+def _hull_sampler(
+    trustregion,
+    target_size,
+    rng,
+    ord,  # noqa: A002
+    existing_xs=None,
+    bounds=None,
+):
+    """Random generation of trustregion points on the hull of general sphere / cube.
+
+    Points are sampled randomly on a hull (of a sphere for ord=2 and of a cube for
+    ord=np.inf). These points are then mapped into the feasible region, which is defined
+    by the intersection of the trustregion and the bounds.
+
+    Args:
+        trustregion (TrustRegion): NamedTuple with attributes center and radius.
+        target_size (int): Target number of points in the combined sample of existing_xs
+            and newly sampled points. The sampler does not have to guarantee that this
+            number will actually be reached.
+        rng (numpy.random.Generator): Random number generator.
+        ord (int): Type of norm to use when scaling the sampled points. For 2 it will
+            result in sphere sampling, for np.inf in cube sampling.
+        existing_xs (np.ndarray or None): 2d numpy array in which each row is an
+            x vector at which the criterion function has already been evaluated, that
+            satisfies lower_bounds <= existing_xs <= upper_bounds.
+        bounds (Bounds or None): NamedTuple.
 
+    """
+    n_points = _get_effective_n_points(target_size, existing_xs=existing_xs)
+    n_params = len(trustregion.center)
+    effective_bounds = _get_effective_bounds(trustregion, bounds=bounds)
+
+    points = rng.normal(size=(n_points, n_params))
+    points = _project_onto_unit_hull(points, ord=ord)
+    points = _map_into_feasible_trustregion(points, bounds=effective_bounds)
     return points
 
 
-def _sphere_sampler(
+def _optimal_hull_sampler(
     trustregion,
     target_size,
     rng,
+    ord,  # noqa: A002
     existing_xs=None,
     bounds=None,
+    algorithm="scipy_lbfgsb",
+    algo_options=None,
 ):
-    n_points = _get_effective_n_points(target_size, existing_xs)
+    """Optimal generation of trustregion points on the hull of general sphere / cube.
+
+    Points are sampled optimally on a hull (of a sphere for ord=2 and of a cube for
+    ord=np.inf), where the criterion that is maximized is the (smooth) minimum distance
+    of all pairs of points, except for pairs of existing points. These points are then
+    mapped into the feasible region, which is defined by the intersection of the
+    trustregion and the bounds.
+
+    Args:
+        trustregion (TrustRegion): NamedTuple with attributes center and radius.
+        target_size (int): Target number of points in the combined sample of existing_xs
+            and newly sampled points. The sampler does not have to guarantee that this
+            number will actually be reached.
+        rng (numpy.random.Generator): Random number generator.
+        ord (int): Type of norm to use when scaling the sampled points. For 2 it will
+            result in sphere sampling, for np.inf in cube sampling.
+        existing_xs (np.ndarray or None): 2d numpy array in which each row is an
+            x vector at which the criterion function has already been evaluated, that
+            satisfies lower_bounds <= existing_xs <= upper_bounds.
+        bounds (Bounds or None): NamedTuple.
+        algorithm (str): Optimization algorithm.
+        algo_options (dict): Algorithm specific configuration of the optimization. See
+            :ref:`list_of_algorithms` for supported options of each algorithm. Default
+            sets ``stopping_max_iterations=3``.
+
+    Returns:
+        np.ndarray: Generated points. Has shape (target_size, len(trustregion.center)).
+
+    """
+    n_points = _get_effective_n_points(target_size, existing_xs=existing_xs)
     n_params = len(trustregion.center)
-    raw = rng.normal(size=(n_points, n_params))
-    denom = np.linalg.norm(raw, axis=1).reshape(-1, 1)
 
-    points = trustregion.radius * raw / denom + trustregion.center
+    if n_points == 0:
+        return np.array([])
 
-    if bounds is not None and (bounds.lower is not None or bounds.upper is not None):
-        bounds = _get_effective_bounds(trustregion, bounds)
-        points = np.clip(
-            points,
-            a_min=bounds.lower,
-            a_max=bounds.upper,
-        )
+    if algo_options is None:
+        algo_options = {"stopping_max_iterations": 3}
 
+    effective_bounds = _get_effective_bounds(trustregion, bounds=bounds)
+
+    if existing_xs is not None:
+        # map existing points into unit space for easier optimization
+        existing_xs = (existing_xs - trustregion.center) / trustregion.radius
+
+    # start params
+    x0 = rng.normal(size=(n_points, n_params))
+    x0 = _project_onto_unit_hull(x0, ord=ord)
+
+    res = em.maximize(
+        criterion=_pairwise_distance_on_hull,
+        params=x0,
+        algorithm=algorithm,
+        criterion_kwargs={"existing_xs": existing_xs, "ord": ord},
+        lower_bounds=-np.ones_like(x0),
+        upper_bounds=np.ones_like(x0),
+        algo_options=algo_options,
+    )
+
+    points = _project_onto_unit_hull(res.params, ord=ord)
+    points = _map_into_feasible_trustregion(points, bounds=effective_bounds)
     return points
 
 
+# ======================================================================================
+# Helper functions
+# ======================================================================================
+
+
+def _pairwise_distance_on_hull(x, existing_xs, ord):  # noqa: A002
+    """Pairwise distance of new and existing points.
+
+    Instead of optimizing the distance of points in the feasible trustregion, this
+    criterion function leads to the maximization of the minimum distance of the points
+    in the unit space. These can then be mapped into the feasible trustregion.
+
+    Args:
+        x (np.ndarray): 2d array of internal points. Each value is in [-1, 1].
+        existing_xs (np.ndarray or None): 2d numpy array in which each row is an
+            x vector at which the criterion function has already been evaluated, that
+            satisfies -1 <= existing_xs <= 1.
+        ord (int): Type of norm to use when scaling the sampled points. For 2 we project
+            onto the hull of a sphere, for np.inf onto the hull of a cube.
+
+    Returns:
+        float: The criterion value.
+
+    """
+    x = _project_onto_unit_hull(x, ord=ord)
+
+    if existing_xs is not None:
+        sample = np.row_stack([x, existing_xs])
+        n_existing_pairs = len(existing_xs) * (len(existing_xs) - 1) // 2
+        slc = slice(0, -n_existing_pairs) if n_existing_pairs else slice(None)
+    else:
+        sample = x
+        slc = slice(None)
+
+    dist = pdist(sample) ** 2  # pairwise squared distances
+    dist = dist[slc]  # drop distances between existing points as they should not
+    # influence the optimization
+
+    crit_value = -logsumexp(-dist)  # smooth minimum
+    return crit_value
+
+
+def _map_into_feasible_trustregion(points, bounds):
+    """Map points from the unit space into trustregion defined by bounds.
+
+    Args:
+        points (np.ndarray): 2d array of points to be mapped. Each value is in [-1, 1].
+        bounds (Bounds): A NamedTuple with attributes ``lower`` and ``upper``, where
+            lower and upper define the rectangle that is the feasible trustregion.
+
+    Returns:
+        np.ndarray: Mapped points.
+
+    """
+    points = (bounds.upper - bounds.lower) * (points + 1) / 2 + bounds.lower
+    return points
+
+
+def _project_onto_unit_hull(x, ord):  # noqa: A002
+    """Project points from the unit space onto the hull of a geometric figure.
+
+    Args:
+        x (np.ndarray): 2d array of points to be projects. Each value is in [-1, 1].
+        ord (int): Type of norm to use when scaling the sampled points. For 2 we project
+            onto the hull of a sphere, for np.inf onto the hull of a cube.
+
+    Returns:
+        np.ndarray: The projected points.
+
+    """
+    norm = np.linalg.norm(x, axis=1, ord=ord).reshape(-1, 1)
+    projected = x / norm
+    return projected
+
+
 def _get_effective_bounds(trustregion, bounds):
     lower_bounds = trustregion.center - trustregion.radius
     upper_bounds = trustregion.center + trustregion.radius

diff --git a/src/estimagic/optimization/tranquilo/tranquilo_history.py b/src/estimagic/optimization/tranquilo/tranquilo_history.py
@@ -41,7 +41,10 @@ def add_entries(self, xs, fvecs):
                 least square fvecs.
         """
         xs = np.atleast_2d(xs)
-        if len(xs) == 0:
+
+        n_new_points = len(xs) if xs.size != 0 else 0
+
+        if n_new_points == 0:
             return
 
         if self.functype == "scalar":
@@ -50,7 +53,7 @@ def add_entries(self, xs, fvecs):
             fvecs = np.atleast_2d(fvecs)
         fvals = np.atleast_1d(self.aggregate(fvecs))
 
-        if len(xs) != len(fvecs):
+        if n_new_points != len(fvecs):
             raise ValueError()
 
         self.xs = _add_entries_to_array(self.xs, xs, self.n_fun)
@@ -160,15 +163,17 @@ def _add_entries_to_array(arr, new, position):
         shape = 100_000 if new.ndim == 1 else (100_000, new.shape[1])
         arr = np.full(shape, np.nan)
 
-    if len(arr) - position - len(new) < 0:
-        n_extend = max(len(arr), len(new))
+    n_new_points = len(new) if new.size != 0 else 0
+
+    if len(arr) - position - n_new_points < 0:
+        n_extend = max(len(arr), n_new_points)
         if arr.ndim == 2:
             extension_shape = (n_extend, arr.shape[1])
             arr = np.vstack([arr, np.full(extension_shape, np.nan)])
         else:
             arr = np.hstack([arr, np.full(n_extend, np.nan)])
 
-    arr[position : position + len(new)] = new
+    arr[position : position + n_new_points] = new
 
     return arr