Skip to content

Commit

Permalink
Add poisedness check for sample quality (#399)
Browse files Browse the repository at this point in the history
  • Loading branch information
segsell authored Jan 15, 2023
1 parent 608dc5c commit e9d4cd6
Show file tree
Hide file tree
Showing 4 changed files with 638 additions and 2 deletions.
10 changes: 10 additions & 0 deletions docs/source/refs.bib
Original file line number Diff line number Diff line change
Expand Up @@ -884,4 +884,14 @@ @article{Zhang2010
URL = {https://doi.org/10.1137/09075531X},
}


@book{Conn2009,
author = {Conn, Andrew R. and Scheinberg, Katya and Vicente, Luis N.},
title = {Introduction to Derivative-Free Optimization},
publisher = {Society for Industrial and Applied Mathematics},
year = {2009},
doi = {10.1137/1.9780898718768},
URL = {https://epubs.siam.org/doi/abs/10.1137/1.9780898718768},
}

@Comment{jabref-meta: databaseType:bibtex;}
4 changes: 2 additions & 2 deletions src/estimagic/optimization/tranquilo/geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,8 @@ def log_d_quality_calculator(sample, trustregion, bounds):
Args:
sample (np.ndarray): The data sample, shape = (n, p).
trustregion:
bounds:
trustregion (TrustRegion): NamedTuple with attributes center and radius.
bounds (Bounds): The parameter bounds.
Returns:
np.ndarray: The criterion values, shape = (n, ).
Expand Down
253 changes: 253 additions & 0 deletions src/estimagic/optimization/tranquilo/poisedness.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,253 @@
from functools import partial

import numpy as np
from scipy.optimize import Bounds
from scipy.optimize import minimize
from scipy.optimize import NonlinearConstraint


def get_poisedness_constant(sample, shape="sphere"):
"""Calculate the lambda poisedness constant of a sample.
Note that the sample space is a trust-region with center 0 and radius 1.
It may be a (hyper-) sphere or cube.
The implementation is based on :cite:`Conn2009`, Chapters 3 and 4.
In general, if the sample is lambda-poised with a small lambda, where lambda >=1,
the sample is said to have "good" geometry or "spans" the trust-region well.
As lambda grows, the system represented by these points becomes increasingly
linearly dependent.
Formal definition:
A sample Y is said to be lambda-poised on a region of interest if it is linearly
independent and the Lagrange polynomials L(i) of points i through N in Y satisfy:
lambda >= max_i max_x | L(i) | (1)
i.e. for each point i in the sample, we maximize the absolute criterion value
of its lagrange polynomial L(i); we then take the maximum over all these
criterion values as the lambda constant.
When we compare different samples on the same trust-region, we are usually
interested in keeping the sample with the least lambda, so that (1) holds.
Args:
sample (np.ndarry): Array of shape (n_samples, n_params) containing the scaled
sample of points that lie within a trust-region with center 0 and radius 1.
shape (str): Geometric shape of the sample space. One of "sphere", "cube".
Default is "sphere".
Returns:
tuple:
- lambda (float): The lambda poisedness constant.
- argmax (np.ndarray): 1d array of shape (n_params,) containing the
parameter vector that maximizes lambda.
- idx_max (int): Index relating to the position of the argmax in the sample.
"""
n_params = sample.shape[1]
_minimize = _get_minimize_func(shape, n_params)

lambda_, argmax, idx_max = _get_poisedness_constant_internal(
sample=sample, n_params=n_params, _minimize=_minimize
)

return lambda_, argmax, idx_max


def improve_poisedness(sample, shape="sphere", maxiter=5):
"""Improve the lambda poisedness of the sample.
The poisedness of the sample is improved in an incremental manner; replacing
one point at a time and reducing the upper bound on the absolute value of
the Lagrange polynomial.
The implementation is based on algorithm 6.3 in :cite:`Conn2009`,
Chapter 6, p. 95 ff.
Args:
sample (np.ndarry): Array of shape (n_samples, n_params).
shape (str): Geometric shape of the sample space. One of "sphere", "cube".
Default is "sphere".
maxiter (int): Maximum number of replacement iterations. Default is 5.
Returns:
tuple:
- sample_improved (np.ndarray): Sample with improved poisedness.
- lambdas (list): History of lambdas.
"""
n_params = sample.shape[1]
_minimize = _get_minimize_func(shape, n_params)

sample_improved = sample.copy()

lambdas = []

for _ in range(maxiter):

lambda_, argmax, idx_max = _get_poisedness_constant_internal(
sample=sample_improved, n_params=n_params, _minimize=_minimize
)

lambdas += [lambda_]
sample_improved[idx_max] = argmax

return sample_improved, lambdas


def _get_poisedness_constant_internal(sample, n_params, _minimize):
"""Internal interface for the calculation of the lambda poisedness constant.
Args:
sample (np.ndarry): Array of shape (n_samples, n_params) containing the scaled
sample of points that lie within a trust-region with center 0 and radius 1.
n_params (int): Dimensionality of the sample.
_minimize (callable): The minimize function with partialled arguments.
Returns:
tuple:
- lambda (float): The lambda poisedness constant.
- argmax (np.ndarray): 1d array of shape (n_params,) containing the
parameter vector that maximizes lambda.
- idx_max (int): Index relating to the position of the argmax in the sample.
"""
lagrange_mat = _lagrange_poly_matrix(sample)

lambda_ = 0
idx_max = None

for idx, poly in enumerate(lagrange_mat):

intercept = poly[0]
linear_terms = poly[1 : n_params + 1]
_coef_square_terms = poly[n_params + 1 :]
square_terms = _reshape_coef_to_square_terms(_coef_square_terms, n_params)

neg_criterion = partial(
_eval_neg_absolute_value,
intercept=intercept,
linear_terms=linear_terms,
square_terms=square_terms,
)

result_max = _minimize(neg_criterion)

critval = _eval_absolute_value(
result_max.x, intercept, linear_terms, square_terms
)

if critval > lambda_:
lambda_ = critval
argmax = result_max.x
idx_max = idx

return lambda_, argmax, idx_max


def _lagrange_poly_matrix(sample):
"""Construct matrix of lagrange polynomials.
See :cite:`Conn2009`, Chapter 4.2, p. 60.
Args:
sample (np.ndarry): Array of shape (n_samples, n_params).
Returns:
np.ndarray: Matrix of lagrange polynomials of shape
(n_samples, n_params * (n_params + 1) // 2).
"""
basis_mat = _scaled_polynomial_features(sample)
lagrange_mat = basis_mat @ np.linalg.pinv(basis_mat.T @ basis_mat)

return lagrange_mat


def _scaled_polynomial_features(x):
"""Construct linear terms, interactions, and scaled square terms.
The square terms are scaled by 1 / 2.
Args:
x (np.ndarray): Array of shape (n_samples, n_params).
Returns:
np.ndarray: Linear terms, interactions and scaled square terms.
Has shape (n_samples, n_params * (n_params + 1) // 2).
"""
n_samples, n_params = np.atleast_2d(x).shape
n_poly_terms = n_params * (n_params + 1) // 2

poly_terms = np.empty((n_poly_terms, n_samples), np.float64)
xt = x.T

idx = 0
for i in range(n_params):
poly_terms[idx] = 0.5 * xt[i] ** 2
idx += 1

for j in range(i + 1, n_params):
poly_terms[idx] = xt[i] * xt[j]
idx += 1

intercept = np.ones((1, n_samples), x.dtype)
out = np.concatenate((intercept, xt, poly_terms), axis=0)

return out.T


def _reshape_coef_to_square_terms(coef, n_params):
"""Reshape square coefficients to matrix of square terms."""
mat = np.empty((n_params, n_params))
idx = -1

for j in range(n_params):
for i in range(j + 1):
idx += 1
mat[i, j] = coef[idx]
mat[j, i] = coef[idx]

return mat


def _get_minimize_func(shape, n_params):
"""Get the minimizer function with partialled arguments."""
center = np.zeros(n_params)

if shape == "sphere":
nonlinear_constraint = NonlinearConstraint(lambda x: np.linalg.norm(x), 0, 1)

_minimize = partial(
minimize,
x0=center,
method="trust-constr",
constraints=[nonlinear_constraint],
)
elif shape == "cube":
bound_constraints = Bounds(-np.ones(n_params), np.ones(n_params))

_minimize = partial(
minimize,
x0=center,
method="trust-constr",
bounds=bound_constraints,
)
else:
raise ValueError(
f"Invalid shape argument: {shape}. Must be one of sphere, cube."
)

return _minimize


def _eval_absolute_value(x, intercept, linear_terms, square_terms):
return np.abs(intercept + linear_terms.T @ x + 0.5 * x.T @ square_terms @ x)


def _eval_neg_absolute_value(x, intercept, linear_terms, square_terms):
return -_eval_absolute_value(x, intercept, linear_terms, square_terms)
Loading

0 comments on commit e9d4cd6

Please sign in to comment.