From 549a8aca809cac67e3cb501efe334b77b225d4ef Mon Sep 17 00:00:00 2001 From: Andrey Velichkevich Date: Tue, 5 Dec 2023 17:54:32 +0000 Subject: [PATCH] Remove legacy BO code --- .../v1beta1/bayesianoptimization/__init__.py | 0 .../bayesianoptimization/acquisition_func.py | 50 --- .../bayesianoptimization/algorithm_manager.py | 215 ------------ .../bayesian_optimization_algorithm.py | 87 ----- .../bayesianoptimization/global_optimizer.py | 323 ------------------ .../bayesianoptimization/model/__init__.py | 0 .../v1beta1/bayesianoptimization/model/gp.py | 52 --- .../v1beta1/bayesianoptimization/model/rf.py | 38 --- .../v1beta1/bayesianoptimization/utils.py | 31 -- 9 files changed, 796 deletions(-) delete mode 100644 pkg/suggestion/v1beta1/bayesianoptimization/__init__.py delete mode 100644 pkg/suggestion/v1beta1/bayesianoptimization/acquisition_func.py delete mode 100644 pkg/suggestion/v1beta1/bayesianoptimization/algorithm_manager.py delete mode 100644 pkg/suggestion/v1beta1/bayesianoptimization/bayesian_optimization_algorithm.py delete mode 100644 pkg/suggestion/v1beta1/bayesianoptimization/global_optimizer.py delete mode 100644 pkg/suggestion/v1beta1/bayesianoptimization/model/__init__.py delete mode 100644 pkg/suggestion/v1beta1/bayesianoptimization/model/gp.py delete mode 100644 pkg/suggestion/v1beta1/bayesianoptimization/model/rf.py delete mode 100644 pkg/suggestion/v1beta1/bayesianoptimization/utils.py diff --git a/pkg/suggestion/v1beta1/bayesianoptimization/__init__.py b/pkg/suggestion/v1beta1/bayesianoptimization/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/pkg/suggestion/v1beta1/bayesianoptimization/acquisition_func.py b/pkg/suggestion/v1beta1/bayesianoptimization/acquisition_func.py deleted file mode 100644 index 996e76ed41f..00000000000 --- a/pkg/suggestion/v1beta1/bayesianoptimization/acquisition_func.py +++ /dev/null @@ -1,50 +0,0 @@ -# Copyright 2022 The Kubeflow Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" module for acquisition function""" -import numpy as np -from scipy.stats import norm - - -class AcquisitionFunc: - """ - Class for acquisition function with options for expected improvement, - probability of improvement, or lower confident bound. - """ - - def __init__(self, model, current_optimal, mode="ei", trade_off=0.01): - """ - :param mode: pi: probability of improvement, ei: expected improvement, lcb: lower confident bound - :param trade_off: a parameter to control the trade off between exploiting and exploring - :param model_type: gp: gaussian process, rf: random forest - """ - self.model = model - self.current_optimal = current_optimal - self.mode = mode - self.trade_off = trade_off - - def compute(self, X_test): - y_mean, y_std, y_variance = self.model.predict(X_test) - - z = (y_mean - self.current_optimal - self.trade_off) / y_std - - if self.mode == "ei": - if y_std.any() < 0.000001: - return 0, y_mean, y_variance - result = y_std * (z * norm.cdf(z) + norm.pdf(z)) - elif self.mode == "pi": - result = norm.cdf(z) - else: - result = - (y_mean - self.trade_off * y_std) - return np.squeeze(result), np.squeeze(y_mean), np.squeeze(y_variance) diff --git a/pkg/suggestion/v1beta1/bayesianoptimization/algorithm_manager.py b/pkg/suggestion/v1beta1/bayesianoptimization/algorithm_manager.py deleted file mode 100644 index 5fb48bee4ab..00000000000 --- a/pkg/suggestion/v1beta1/bayesianoptimization/algorithm_manager.py +++ /dev/null @@ -1,215 +0,0 @@ -# Copyright 2022 The Kubeflow Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" module for algorithm manager """ -import numpy as np - -from pkg.apis.manager.v1beta1.python import api_pb2 - -from pkg.suggestion.v1beta1.bayesianoptimization.utils import get_logger - - -def deal_with_discrete(feasible_values, current_value): - """ function to embed the current values to the feasible discrete space""" - diff = np.subtract(feasible_values, current_value) - diff = np.absolute(diff) - return feasible_values[np.argmin(diff)] - - -def deal_with_categorical(feasible_values, one_hot_values): - """ function to do the one hot encoding of the categorical values """ - index = np.argmax(one_hot_values) - #index = one_hot_values.argmax() - return feasible_values[int(index)] - - -class AlgorithmManager: - """ class for the algorithm manager - provide some helper functions - """ - - def __init__(self, experiment_name, experiment, parameter_config, X_train, y_train, logger=None): - self.logger = logger if (logger is not None) else get_logger() - self._experiment_name = experiment_name - self._experiment = experiment - self._goal = self._experiment.spec.objective.type - self._dim = parameter_config.dim - self._lowerbound = parameter_config.lower_bounds - self._upperbound = parameter_config.upper_bounds - self._types = parameter_config.parameter_types - self._names = parameter_config.names - # record all the feasible values of discrete type variables - self._discrete_info = parameter_config.discrete_info - self._categorical_info = parameter_config.categorical_info - self._name_id = parameter_config.name_ids - - self._X_train = self._mapping_params(X_train) - self.parse_X() - - self._y_train = y_train - self._parse_metric() - - @property - def experiment_name(self): - """ return the experiment_name """ - return self._experiment_name - - @property - def experiment(self): - """ return the experiment """ - return self._experiment - - @property - def goal(self): - """ return the optimization goal""" - return self._goal - - @property - def dim(self): - """ return the dimension """ - return self._dim - - @property - def lower_bound(self): - """ return the lower bound of all the parameters """ - return self._lowerbound - - @property - def upper_bound(self): - """ return the upper bound of all the parameters """ - return self._upperbound - - @property - def types(self): - """ return the types of all the parameters """ - return self._types - - @property - def names(self): - """ return the names of all the parameters """ - return self._names - - @property - def discrete_info(self): - """ return the info of all the discrete parameters """ - return self._discrete_info - - @property - def categorical_info(self): - """ return the info of all the categorical parameters """ - return self._categorical_info - - @property - def X_train(self): - """ return the training data """ - return self._X_train - - @property - def y_train(self): - """ return the target of the training data""" - return self._y_train - - def _mapping_params(self, parameters_list): - if len(parameters_list) == 0: - return None - ret = [] - for parameters in parameters_list: - maplist = [np.zeros(1)]*len(self._names) - for p in parameters: - self.logger.debug("mapping: %r", p, extra={ - "Experiment": self._experiment_name}) - map_id = self._name_id[p.name] - if self._types[map_id] in [api_pb2.DOUBLE, api_pb2.INT, api_pb2.DISCRETE]: - maplist[map_id] = float(p.value) - elif self._types[map_id] == api_pb2.CATEGORICAL: - for ci in self._categorical_info: - if ci["name"] == p.name: - maplist[map_id] = np.zeros(ci["number"]) - for i, v in enumerate(ci["values"]): - if v == p.value: - maplist[map_id][i] = 1 - break - self.logger.debug("mapped: %r", maplist, extra={ - "Experiment": self._experiment_name}) - ret.append(np.hstack(maplist)) - return ret - - def _parse_metric(self): - """ parse the metric to the dictionary """ - self.logger.info("Ytrain: %r", self._y_train, extra={ - "Experiment": self._experiment_name}) - if not self._y_train: - self._y_train = None - return - y = [] - for metric in self._y_train: - if self._goal == api_pb2.MAXIMIZE: - y.append(float(metric)) - else: - y.append(-float(metric)) - self.logger.debug("Ytrain: %r", y, extra={ - "Experiment": self._experiment_name}) - self._y_train = np.array(y) - - def parse_X(self): - if not self._X_train: - self._X_train = None - return - self.logger.debug("Xtrain: %r", self._X_train, extra={ - "Experiment": self._experiment_name}) - self._X_train = np.array(self._X_train) - - def parse_x_next(self, x_next): - """ parse the next suggestion to the proper format """ - counter = 0 - result = [] - for i in range(len(self._types)): - if self._types[i] == api_pb2.INT: - result.append(int(round(x_next[counter], 0))) - counter = counter + 1 - elif self._types[i] == api_pb2.DISCRETE: - for param in self._discrete_info: - if param["name"] == self._names[i]: - result.append( - deal_with_discrete( - param["values"], x_next[counter]) - ) - counter = counter + 1 - break - elif self._types[i] == api_pb2.CATEGORICAL: - for param in self._categorical_info: - if param["name"] == self._names[i]: - result.append(deal_with_categorical( - feasible_values=param["values"], - one_hot_values=x_next[counter:counter + - param["number"]], - )) - counter = counter + param["number"] - break - elif self._types[i] == api_pb2.DOUBLE: - result.append(x_next[counter]) - counter = counter + 1 - return result - - def convert_to_dict(self, x_next): - """ convert the next suggestion to the dictionary """ - result = [] - for i in range(len(x_next)): - tmp = dict({ - "name": self._names[i], - "value": x_next[i], - "type": self._types[i], - }) - result.append(tmp) - return result diff --git a/pkg/suggestion/v1beta1/bayesianoptimization/bayesian_optimization_algorithm.py b/pkg/suggestion/v1beta1/bayesianoptimization/bayesian_optimization_algorithm.py deleted file mode 100644 index 5d2cdca760d..00000000000 --- a/pkg/suggestion/v1beta1/bayesianoptimization/bayesian_optimization_algorithm.py +++ /dev/null @@ -1,87 +0,0 @@ -# Copyright 2022 The Kubeflow Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" module for bayesian optimization algorithm """ -import numpy as np -from sklearn.preprocessing import MinMaxScaler - -from pkg.suggestion.v1beta1.bayesianoptimization.global_optimizer import GlobalOptimizer - - -class BOAlgorithm: - """ class for bayesian optimization """ - - def __init__(self, experiment_name, dim, N, lowerbound, upperbound, X_train, y_train, mode, trade_off, - length_scale, noise, nu, kernel_type, n_estimators, max_features, model_type, logger=None): - # np.random.seed(0) - self._experiment_name = experiment_name - self.dim = dim - self.N = N or 100 - self.l = np.zeros((1, dim)) - self.u = np.ones((1, dim)) - self.lowerbound = lowerbound.reshape(1, dim) - self.upperbound = upperbound.reshape(1, dim) - self.logger = logger - - # normalize the upperbound and lowerbound to [0, 1] - self.scaler = MinMaxScaler() - self.scaler.fit(np.append(self.lowerbound, self.upperbound, axis=0)) - - self.X_train = X_train - self.y_train = y_train - if self.y_train is None: - self.current_optimal = None - else: - self.current_optimal = max(self.y_train) - self.logger.debug("create optimizer", extra={ - "Experiment": self._experiment_name}) - # initialize the global optimizer - self.optimizer = GlobalOptimizer( - N, - self.l, - self.u, - self.scaler, - self.X_train, - self.y_train, - self.current_optimal, - experiment_name=self._experiment_name, - mode=mode, - trade_off=trade_off, - length_scale=length_scale, - noise=noise, - nu=nu, - kernel_type=kernel_type, - n_estimators=n_estimators, - max_features=max_features, - model_type=model_type, - logger=logger, - ) - self.logger.debug("optimizer created", extra={ - "Experiment": self._experiment_name}) - - def get_suggestion(self, request_num): - """ main function to provide suggestion """ - x_next_list = [] - if self.X_train is None and self.y_train is None and self.current_optimal is None: - # randomly pick a point as the first trial - for _ in range(request_num): - x_next_list.append(np.random.uniform( - self.lowerbound, self.upperbound, size=(1, self.dim))) - else: - _, x_next_list_que = self.optimizer.direct(request_num) - for xn in x_next_list_que: - x = np.array(xn).reshape(1, self.dim) - x = self.scaler.inverse_transform(x) - x_next_list.append(x) - return x_next_list diff --git a/pkg/suggestion/v1beta1/bayesianoptimization/global_optimizer.py b/pkg/suggestion/v1beta1/bayesianoptimization/global_optimizer.py deleted file mode 100644 index c5a23f5b86a..00000000000 --- a/pkg/suggestion/v1beta1/bayesianoptimization/global_optimizer.py +++ /dev/null @@ -1,323 +0,0 @@ -# Copyright 2022 The Kubeflow Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" module for the global optimizer -DIRECT algorithm is used in this case -""" -import copy - -import numpy as np - -from pkg.suggestion.v1beta1.bayesianoptimization.acquisition_func import AcquisitionFunc -from pkg.suggestion.v1beta1.bayesianoptimization.model.gp import GaussianProcessModel -from pkg.suggestion.v1beta1.bayesianoptimization.model.rf import RandomForestModel -from pkg.suggestion.v1beta1.bayesianoptimization.utils import get_logger - - -class RectPack: - """ class for the rectangular - including border, center and acquisition function value - """ - - def __init__(self, l, u, division_num, dim, scaler, aq_func): - self.l = l - self.u = u - self.center = (l + u) / 2 - j = np.mod(division_num, dim) - k = (division_num - j) / dim - self.d = np.sqrt(j * np.power(3, float(-2 * (k + 1))) + - (dim - j) * np.power(3, float(-2 * k))) / 2 - self.division_num = division_num - self.fc, _, _ = aq_func.compute(scaler.inverse_transform(self.center)) - self.fc = -self.fc - - -class RectBucket: - """ class for the rectangular bucket - rectangular with the same size are put in the same bucket - the rectangular is sorted by the acquisition function value - """ - - def __init__(self, diff, pack): - self.diff = diff - self.array = [pack] - - def insert(self, new_pack): - """ insert a new rectangular to a bucket """ - for i in range(len(self.array)): - if new_pack.fc < self.array[i].fc: - self.array.insert(i, new_pack) - return - self.array.append(new_pack) - - def delete(self): - """ delete the first rectangular""" - del self.array[0] - - def diff_exist(self, diff): - """ detect the size difference """ - return abs(self.diff - diff) < 0.00001 - - -class OptimalPoint: - """ helper class to find potential optimal points""" - - def __init__(self, point, prev, slope): - self.point = point - self.prev = prev - self.slope = slope - - -class DimPack: - def __init__(self, dim, fc): - self.dim = dim - self.fc = fc - - -class GlobalOptimizer: - """ class for the global optimizer """ - - def __init__(self, N, l, u, scaler, X_train, y_train, current_optimal, - experiment_name, mode, trade_off, length_scale, - noise, nu, kernel_type, n_estimators, max_features, model_type, logger=None): - self.logger = logger if (logger is not None) else get_logger() - self.N = N - self.l = l - self.u = u - self.scaler = scaler - self.buckets = [] - self.dim = None - self._experiment_name = experiment_name - if model_type == "gp": - model = GaussianProcessModel( - length_scale=length_scale, - noise=noise, - nu=nu, - kernel_type=kernel_type, - ) - else: - model = RandomForestModel( - n_estimators=n_estimators, - max_features=max_features, - ) - self.logger.debug("before model fit", extra={ - "Experiment": self._experiment_name}) - model.fit(X_train, y_train) - self.logger.debug("after model fit", extra={ - "Experiment": self._experiment_name}) - self.aq_func = AcquisitionFunc( - model=model, - current_optimal=current_optimal, - mode=mode, - trade_off=trade_off, - ) - - def potential_opt(self, f_min): - """ find the potential optimal rectangular """ - b = [] - for i in range(len(self.buckets)): - b.append(self.buckets[i].array[0]) - b.sort(key=lambda x: x.d) - index = 0 - min_fc = b[0].fc - for i in range(len(b)): - if b[i].fc < min_fc: - min_fc = b[i].fc - index = i - - opt_list = [OptimalPoint(b[index], 0, 0)] - for i in range(index + 1, len(b)): - prev = len(opt_list) - 1 - diff1 = b[i].d - diff2 = opt_list[prev].point.d - current_slope = ( - b[i].fc - opt_list[prev].point.fc) / (diff1 - diff2) - prev_slope = opt_list[prev].slope - - while prev >= 0 and current_slope < prev_slope: - temp = opt_list[prev].prev - opt_list[prev].prev = -1 - prev = temp - prev_slope = opt_list[prev].slope - diff1 = b[i].d - diff2 = opt_list[prev].point.d - current_slope = ( - b[i].fc - opt_list[prev].point.fc) / (diff1 - diff2) - - opt_list.append(OptimalPoint(b[i], prev, current_slope)) - - opt_list2 = [] - for i in range(len(opt_list)): - if opt_list[i].prev != -1: - opt_list2.append(opt_list[i]) - - for i in range(len(opt_list2) - 1): - c1 = opt_list2[i].point.d - c2 = opt_list2[i + 1].point.d - fc1 = opt_list2[i].point.fc - fc2 = opt_list2[i + 1].point.fc - if fc1 - c1 * (fc1 - fc2) / (c1 - c2) > (1 - 0.001) * f_min: - # if abs(fc1-fc2)<0.0001: - opt_list2[i] = None - while None in opt_list2: - index = opt_list2.index(None) - del opt_list2[index] - # for opt in opt_list2: - # print(opt.point.fc) - return opt_list2 - - def direct(self, request_num): - """ main algorithm """ - self.dim = self.l.shape[1] - division_num = 0 - - # create the first rectangle and put it in the first bucket - first_rect = RectPack(self.l, self.u, division_num, self.dim, - self.scaler, self.aq_func) - self.buckets.append(RectBucket(first_rect.d, first_rect)) - - ei_min = [] - f_min = first_rect.fc - x_next = first_rect.center - ei_min.append(f_min) - - for _ in range(self.N): - opt_set = self.potential_opt(f_min) - - # for bucket in self.buckets: - # for i in range(len(bucket.array)): - # print(bucket.array[i].fc) - # plt.plot(bucket.diff, bucket.array[i].fc, 'b.') - # - # for opt in opt_set: - # plt.plot(opt.point.d, opt.point.fc, 'r.') - # plt.show() - - for opt in opt_set: - f_min, x_next = self.divide_rect( - opt.point, - f_min, - x_next, - self.aq_func, - self.scaler - ) - for bucket in self.buckets: - if bucket.diff_exist(opt.point.d): - bucket.delete() - if not bucket.array: - index = self.buckets.index(bucket) - del self.buckets[index] - ei_min.append(f_min) - x_next_candidate = self.sample_buckets(request_num) - return f_min, x_next_candidate - - def sample_buckets(self, request_num): - self.logger.debug("In lne self.buckets: %r", len(self.buckets)) - bucket_index = [] - fc_sum = 0.0 - x_next_candidate = [] - for bucket in self.buckets: - for a in bucket.array: - self.logger.debug("fc: %r, %r", a.fc, a.center) - fc_sum -= a.fc - bucket_index.append([-a.fc, a.center]) - bucket_index = sorted(bucket_index, key=lambda x: x[0]) - for _ in range(request_num): - sample = np.random.rand() - stick = 0.0 - for b in bucket_index: - stick += b[0]/fc_sum - if stick > sample: - x_next_candidate.append(b[1]) - break - return x_next_candidate - - def divide_rect(self, opt_rect, f_min, x_next, aq_func, scaler): - """ divide the rectangular into smaller ones """ - rect = copy.deepcopy(opt_rect) - division_num = rect.division_num - j = np.mod(division_num, self.dim) - k = (division_num - j) / self.dim - max_side_len = np.power(3, float(-k)) - delta = max_side_len / 3 - dim_set = [] - for i in range(self.dim): - if abs(max_side_len - (rect.u[0, i] - rect.l[0, i])) < 0.0000001: - dim_set.append(i) - - dim_list = [] - for i in dim_set: - e = np.zeros((1, self.dim)) - e[0, i] = 1 - function_value = min( - aq_func.compute(scaler.inverse_transform( - rect.center + delta * e)), - aq_func.compute(scaler.inverse_transform( - rect.center - delta * e)) - ) - dim_list.append(DimPack(i, function_value)) - dim_list.sort(key=lambda x: x.fc) - - for i in range(len(dim_list)): - division_num = division_num + 1 - temp = np.zeros((1, self.dim)) - temp[0, dim_list[i].dim] = delta - left_rect = RectPack( - rect.l, - rect.u - 2 * temp, - division_num, - self.dim, - self.scaler, - aq_func - ) - middle_rect = RectPack( - rect.l + temp, - rect.u - temp, - division_num, - self.dim, - self.scaler, - aq_func - ) - right_rect = RectPack( - rect.l + 2 * temp, - rect.u, - division_num, - self.dim, - self.scaler, - aq_func - ) - if left_rect.fc < f_min: - f_min = left_rect.fc - x_next = left_rect.center - if right_rect.fc < f_min: - f_min = right_rect.fc - x_next = right_rect.center - - insert = 0 - for bucket in self.buckets: - if bucket.diff_exist(left_rect.d): - bucket.insert(left_rect) - bucket.insert(right_rect) - if i == len(dim_list) - 1: - bucket.insert(middle_rect) - insert = 1 - break - if insert == 0: - new_bucket = RectBucket(left_rect.d, left_rect) - new_bucket.insert(right_rect) - if i == len(dim_list) - 1: - new_bucket.insert(middle_rect) - self.buckets.append(new_bucket) - rect = middle_rect - return f_min, x_next diff --git a/pkg/suggestion/v1beta1/bayesianoptimization/model/__init__.py b/pkg/suggestion/v1beta1/bayesianoptimization/model/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/pkg/suggestion/v1beta1/bayesianoptimization/model/gp.py b/pkg/suggestion/v1beta1/bayesianoptimization/model/gp.py deleted file mode 100644 index 08e9e4b1e6e..00000000000 --- a/pkg/suggestion/v1beta1/bayesianoptimization/model/gp.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright 2022 The Kubeflow Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" module for gaussian process prior """ -from sklearn.gaussian_process.kernels import RBF, Matern -from sklearn.gaussian_process import GaussianProcessRegressor - - -class GaussianProcessModel: - """ use the gaussian process as a prior """ - def __init__(self, length_scale=0.5, noise=0.00005, - nu=1.5, kernel_type="matern"): - """ - :param length_scale: the larger the length_scale is, the smoother the gaussian prior is. If a float, - an isotropic kernel is used. If an array, an anisotropic kernel is used where each dimension of it defines - the length-scale of the respective feature dimension. - :param noise: - :param nu: control the smoothness of the prior using Matern kernel. The larger nu is, the smoother the - approximate function is. - :param kernel_type: "rbf": squared exponential kernel, "matern": Matern kernel. - """ - if kernel_type == "rbf": - kernel = RBF(length_scale=length_scale) - elif kernel_type == "matern": - kernel = Matern(length_scale=length_scale, nu=nu) - else: - raise Exception("kernel_type must be 'rbf' or 'matern'") - self.gp = GaussianProcessRegressor( - kernel=kernel, - alpha=noise, - random_state=0, - optimizer=None, - ) - - def fit(self, X_train, y_train): - self.gp.fit(X_train, y_train) - - def predict(self, X_test): - y_mean, y_std = self.gp.predict(X_test, return_std=True) - y_variance = y_std ** 2 - return y_mean, y_std, y_variance diff --git a/pkg/suggestion/v1beta1/bayesianoptimization/model/rf.py b/pkg/suggestion/v1beta1/bayesianoptimization/model/rf.py deleted file mode 100644 index 41757cfd2dc..00000000000 --- a/pkg/suggestion/v1beta1/bayesianoptimization/model/rf.py +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright 2022 The Kubeflow Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import forestci as fci -from sklearn.ensemble import RandomForestRegressor - - -class RandomForestModel: - - def __init__(self, n_estimators=50, max_features="auto"): - self.rf = RandomForestRegressor( - n_estimators=n_estimators, - max_features=max_features, - ) - self.X_train = None - - def fit(self, X_train, y_train): - print(X_train.shape, y_train.shape) - self.X_train = X_train - self.rf.fit(X_train, y_train) - - def predict(self, X_test): - y_mean = self.rf.predict(X_test) - y_variance = fci.random_forest_error(self.rf, self.X_train, X_test) - y_std = np.sqrt(y_variance) - return y_mean, y_std, y_variance diff --git a/pkg/suggestion/v1beta1/bayesianoptimization/utils.py b/pkg/suggestion/v1beta1/bayesianoptimization/utils.py deleted file mode 100644 index 5af90c62f66..00000000000 --- a/pkg/suggestion/v1beta1/bayesianoptimization/utils.py +++ /dev/null @@ -1,31 +0,0 @@ -# Copyright 2022 The Kubeflow Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import logging -from logging import getLogger, StreamHandler - - -FORMAT = '%(asctime)-15s Experiment %(experiment_name)s %(message)s' -LOG_LEVEL = os.environ.get("LOG_LEVEL", "INFO") - - -def get_logger(name=__name__): - logger = getLogger(name) - logging.basicConfig(format=FORMAT) - handler = StreamHandler() - logger.setLevel(LOG_LEVEL) - logger.addHandler(handler) - logger.propagate = False - return logger