Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Anisotropic kernel for the multi-fidelity co-Kriging model (MFCK) #692

Merged
merged 15 commits into from
Dec 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
166 changes: 112 additions & 54 deletions smt/applications/mfck.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from smt.sampling_methods import LHS
from smt.utils.kriging import differences, componentwise_distance
from smt.surrogate_models.krg_based import KrgBased
from smt.utils.misc import standardization


class MFCK(KrgBased):
Expand Down Expand Up @@ -50,10 +51,16 @@ def _initialize(self):
)
declare(
"sigma_bounds",
[1e-1, 100],
[1e-2, 100],
types=(list, np.ndarray),
desc="Bounds for the variance parameter",
)
declare(
"lambda",
0.1,
types=(float),
desc="Regularization parameter",
)

self.options["nugget"] = (
1e-9 # Incresing the nugget for numerical stability reasons
Expand Down Expand Up @@ -83,29 +90,48 @@ def train(self):
self.X = xt
self.y = np.vstack(yt)
self._check_param()
self.nx = 1 # Forcing this in order to consider isotropic kernels (i.e., only one lengthscale)

(
_,
_,
self.X_offset,
self.y_mean,
self.X_scale,
self.y_std,
) = standardization(np.concatenate(xt, axis=0), np.concatenate(yt, axis=0))

self.X_norma_all = [(x - self.X_offset) / self.X_scale for x in xt]
self.y_norma_all = np.vstack([(f - self.y_mean) / self.y_std for f in yt])

if self.lvl == 1:
# For a single level, initialize theta_ini, lower_bounds, and
# upper_bounds with consistent shapes
theta_ini = np.hstack(
(self.options["sigma0"], self.options["theta0"][0])
) # Kernel variance + theta0
(self.options["sigma0"], self.options["theta0"])
) # Variance + initial theta values
lower_bounds = np.hstack(
(self.options["sigma_bounds"][0], self.options["theta_bounds"][0])
(
self.options["sigma_bounds"][0],
np.full(self.nx, self.options["theta_bounds"][0]),
)
)
upper_bounds = np.hstack(
(self.options["sigma_bounds"][1], self.options["theta_bounds"][1])
(
self.options["sigma_bounds"][1],
np.full(self.nx, self.options["theta_bounds"][1]),
)
)
theta_ini = np.log10(theta_ini)
lower_bounds = np.log10(lower_bounds)
upper_bounds = np.log10(upper_bounds)
x_opt = theta_ini
# Apply log10 to theta_ini and bounds
nb_params = len(self.options["theta0"])
theta_ini[: nb_params + 1] = np.log10(theta_ini[: nb_params + 1])
lower_bounds[: nb_params + 1] = np.log10(lower_bounds[: nb_params + 1])
upper_bounds[: nb_params + 1] = np.log10(upper_bounds[: nb_params + 1])
else:
for lvl in range(self.lvl):
if lvl == 0:
# Initialize theta_ini for level 0
theta_ini = np.hstack(
(self.options["sigma0"], self.options["theta0"][0])
(self.options["sigma0"], self.options["theta0"])
) # Variance + initial theta values
lower_bounds = np.hstack(
(
Expand All @@ -131,9 +157,7 @@ def train(self):

elif lvl > 0:
# For additional levels, append to theta_ini, lower_bounds, and upper_bounds
thetat = np.hstack(
(self.options["sigma0"], self.options["theta0"][0])
)
thetat = np.hstack((self.options["sigma0"], self.options["theta0"]))
lower_boundst = np.hstack(
(
self.options["sigma_bounds"][0],
Expand Down Expand Up @@ -188,9 +212,9 @@ def train(self):
method="COBYLA",
constraints=[{"fun": con, "type": "ineq"} for con in constraints],
options={
"rhobeg": 0.1,
"rhobeg": 0.5,
"tol": 1e-4,
"maxiter": 200,
"maxiter": 100,
},
)
x_opt_iter = optimal_theta_res_loop.x
Expand All @@ -213,18 +237,24 @@ def train(self):
opt.set_lower_bounds(lower_bounds) # Lower bounds for each dimension
opt.set_upper_bounds(upper_bounds) # Upper bounds for each dimension
opt.set_min_objective(self.neg_log_likelihood_nlopt)
opt.set_maxeval(5000)
opt.set_maxeval(1000)
opt.set_xtol_rel(1e-6)
x0 = np.copy(theta_ini)
x_opt = opt.optimize(x0)
else:
raise ValueError(
f"The optimizer {self.options['hyper_opt']} is not available"
)

x_opt[0:2] = 10 ** (x_opt[0:2])
x_opt[2::3] = 10 ** (x_opt[2::3])
x_opt[3::3] = 10 ** (x_opt[3::3])
x_opt[0] = 10 ** (x_opt[0]) # Apply 10** to Sigma 0
x_opt[1 : self.nx + 1] = (
10 ** (x_opt[1 : self.nx + 1])
) # Apply 10** to length scales 0
x_opt[self.nx + 1 :: self.nx + 2] = (
10 ** (x_opt[self.nx + 1 :: self.nx + 2])
) # Apply 10** to sigmas gamma

for i in np.arange(self.nx + 2, x_opt.shape[0] - 1, self.nx + 2):
x_opt[i : i + self.nx] = 10 ** x_opt[i : i + self.nx]
self.optimal_theta = x_opt

def eta(self, j, jp, rho):
Expand Down Expand Up @@ -253,10 +283,16 @@ def compute_cross_K(self, x, xp, L, Lp, param):
"""
cov_value = 0.0

param0 = param[0:2]
sigmas_gamma = param[2::3]
ls_gamma = param[3::3]
rho_values = param[4::3]
sigma_0 = param[0]
l_0 = param[1 : self.nx + 1]
# param0 = param[0 : self.nx+1]
sigmas_gamma = param[self.nx + 1 :: self.nx + 2]
l_s = [
param[i : i + self.nx].tolist()
for i in np.arange(self.nx + 2, param.shape[0] - 1, self.nx + 2)
]
# ls_gamma = param[3::3]
rho_values = param[2 + 2 * self.nx :: self.nx + 2]

# Sum of j=0 until l_^prime
for j in range(Lp + 1):
Expand All @@ -265,12 +301,10 @@ def compute_cross_K(self, x, xp, L, Lp, param):

if j == 0:
# Cov(γ_j(x), γ_j(x')) using the kernel for K_00
cov_gamma_j = self._compute_K(x, xp, param0)
cov_gamma_j = self._compute_K(x, xp, [sigma_0, l_0])
else:
# Cov(γ_j(x), γ_j(x')) using the kernel
cov_gamma_j = self._compute_K(
x, xp, [sigmas_gamma[j - 1], ls_gamma[j - 1]]
)
cov_gamma_j = self._compute_K(x, xp, [sigmas_gamma[j - 1], l_s[j - 1]])
# Add to the value of the covariance
cov_value += eta_j_l * eta_j_lp * cov_gamma_j

Expand All @@ -290,23 +324,30 @@ def predict_all_levels(self, x):
covariances: (list, np.array)
Returns the conditional covariance matrixes per level.
"""
self.K = self.compute_blockwise_K(self.optimal_theta)
means = []
covariances = []
x = (x - self.X_offset) / self.X_scale

if self.lvl == 1:
k_XX = self._compute_K(self.X[0], self.X[0], self.optimal_theta[0:2])
k_xX = self._compute_K(x, self.X[0], self.optimal_theta[0:2])
k_xx = self._compute_K(x, x, self.optimal_theta[0:2])
# To be adapted using the Cholesky decomposition
k_XX_inv = np.linalg.inv(
k_XX + self.options["nugget"] * np.eye(k_XX.shape[0])
sigma = self.optimal_theta[0] # Apply 10** to Sigma 0
l_s = [self.optimal_theta[1 : self.nx + 1]] # Apply 10** to length scales 0

k_XX = self._compute_K(
self.X_norma_all[0], self.X_norma_all[0], [sigma, l_s[0]]
)
means.append(np.dot(k_xX, np.matmul(k_XX_inv, self.y)))
covariances.append(
k_xx - np.matmul(k_xX, np.matmul(k_XX_inv, k_xX.transpose()))
k_xX = self._compute_K(x, self.X_norma_all[0], [sigma, l_s[0]])
k_xx = self._compute_K(x, x, [sigma, l_s[0]])

L = np.linalg.cholesky(
k_XX + self.options["nugget"] * np.eye(k_XX.shape[0])
)

beta1 = solve_triangular(L, k_xX.T, lower=True)
alpha1 = solve_triangular(L, self.y_norma_all, lower=True)
means.append(self.y_std * np.dot(beta1.T, alpha1) + self.y_mean)
covariances.append(k_xx - np.dot(beta1.T, beta1))
else:
self.K = self.compute_blockwise_K(self.optimal_theta)
L = np.linalg.cholesky(
self.K + self.options["nugget"] * np.eye(self.K.shape[0])
)
Expand All @@ -317,19 +358,19 @@ def predict_all_levels(self, x):
if ind >= j:
k_xX.append(
self.compute_cross_K(
self.X[j], x, ind, j, self.optimal_theta
self.X_norma_all[j], x, ind, j, self.optimal_theta
)
)
else:
k_xX.append(
self.compute_cross_K(
self.X[j], x, j, ind, self.optimal_theta
self.X_norma_all[j], x, j, ind, self.optimal_theta
)
)

beta1 = solve_triangular(L, np.vstack(k_xX), lower=True)
alpha1 = solve_triangular(L, self.y, lower=True)
means.append(np.dot(beta1.T, alpha1))
alpha1 = solve_triangular(L, self.y_norma_all, lower=True)
means.append(self.y_std * np.dot(beta1.T, alpha1) + self.y_mean)
covariances.append(k_xx - np.dot(beta1.T, beta1))
k_xX.clear()

Expand All @@ -355,15 +396,18 @@ def _predict(self, x):

def neg_log_likelihood(self, param, grad=None):
if self.lvl == 1:
self.K = self._compute_K(self.X[0], self.X[0], param[0:2])
sigma = param[0]
l_s = [param[1 : self.nx + 1]]
self.K = self._compute_K(self.X[0], self.X[0], [sigma, l_s])
else:
self.K = self.compute_blockwise_K(param)

reg_term = self.options["lambda"] * np.sum(np.power(param, 2))
L = np.linalg.cholesky(
self.K + self.options["nugget"] * np.eye(self.K.shape[0])
)
beta = solve_triangular(L, self.y, lower=True)
NMLL = 1 / 2 * (2 * np.sum(np.log(np.diag(L))) + np.dot(beta.T, beta))
beta = solve_triangular(L, self.y_norma_all, lower=True)
NMLL = np.sum(np.log(np.diag(L))) + np.dot(beta.T, beta) + reg_term
(nmll,) = NMLL[0]
return nmll

Expand All @@ -372,19 +416,33 @@ def neg_log_likelihood_scipy(self, param):
Likelihood for Cobyla-scipy (SMT) optimizer
"""
param = np.array(param, copy=True)
param[0:2] = 10 ** (param[0:2])
param[2::3] = 10 ** (param[2::3])
param[3::3] = 10 ** (param[3::3])
param[0] = 10 ** (param[0]) # Apply 10** to Sigma 0
param[1 : self.nx + 1] = (
10 ** (param[1 : self.nx + 1])
) # Apply 10** to length scales 0
param[self.nx + 1 :: self.nx + 2] = (
10 ** (param[self.nx + 1 :: self.nx + 2])
) # Apply 10** to sigmas gamma

for i in np.arange(self.nx + 2, param.shape[0] - 1, self.nx + 2):
param[i : i + self.nx] = 10 ** param[i : i + self.nx]
return self.neg_log_likelihood(param)

def neg_log_likelihood_nlopt(self, param, grad=None):
"""
Likelihood for nlopt optimizers
"""
param = np.array(param, copy=True)
param[0:2] = 10 ** (param[0:2])
param[2::3] = 10 ** (param[2::3])
param[3::3] = 10 ** (param[3::3])
param[0] = 10 ** (param[0]) # Apply 10** to Sigma 0
param[1 : self.nx + 1] = (
10 ** (param[1 : self.nx + 1])
) # Apply 10** to length scales 0
param[self.nx + 1 :: self.nx + 2] = (
10 ** (param[self.nx + 1 :: self.nx + 2])
) # Apply 10** to sigmas gamma

for i in np.arange(self.nx + 2, param.shape[0] - 1, self.nx + 2):
param[i : i + self.nx] = 10 ** param[i : i + self.nx]
return self.neg_log_likelihood(param, grad)

def compute_blockwise_K(self, param):
Expand All @@ -394,7 +452,7 @@ def compute_blockwise_K(self, param):
for j in range(self.lvl):
if jp >= j:
K_block[str(jp) + str(j)] = self.compute_cross_K(
self.X[j], self.X[jp], jp, j, param
self.X_norma_all[j], self.X_norma_all[jp], jp, j, param
)

K = np.zeros((n, n))
Expand Down Expand Up @@ -427,7 +485,7 @@ def _compute_K(self, A: np.ndarray, B: np.ndarray, param):
self.X[0].shape[1],
power=self.options["pow_exp_power"],
)
self.corr.theta = np.full(self.X[0].shape[1], param[1])
self.corr.theta = np.asarray(param[1])
r = self.corr(d)
R = r.reshape(A.shape[0], B.shape[0])
K = param[0] * R
Expand Down
2 changes: 1 addition & 1 deletion smt/applications/tests/test_mfck.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def test_mfck(self):

t_error = num / den

self.assert_error(t_error, 0.0, 1e-6, 1e-6)
self.assert_error(t_error, 0.0, 1e-5, 1e-5)

@staticmethod
def run_mfck_example():
Expand Down
Loading