From b893e4d0d49785cce57e6389b607ca42f7de791d Mon Sep 17 00:00:00 2001 From: Yingbo Date: Thu, 10 May 2018 12:06:35 -0700 Subject: [PATCH 1/3] add random forest prior to Bayesian Optimization --- pkg/suggestion/bayesian_service.py | 16 ++++++-- .../src/acquisition_func/acquisition_func.py | 40 ++++++++++++++----- .../src/bayesian_optimization_algorithm.py | 5 ++- .../src/global_optimizer/global_optimizer.py | 5 ++- .../src/model/{Model.py => gp.py} | 2 +- .../bayesianoptimization/src/model/rf.py | 11 +++++ pkg/suggestion/test_client.py | 10 ++++- 7 files changed, 71 insertions(+), 18 deletions(-) rename pkg/suggestion/bayesianoptimization/src/model/{Model.py => gp.py} (97%) create mode 100644 pkg/suggestion/bayesianoptimization/src/model/rf.py diff --git a/pkg/suggestion/bayesian_service.py b/pkg/suggestion/bayesian_service.py index 85716d15cd2..0cc975421d9 100644 --- a/pkg/suggestion/bayesian_service.py +++ b/pkg/suggestion/bayesian_service.py @@ -69,7 +69,10 @@ def GenerateTrials(self, request, context): length_scale=self.service_params[request.study_id]["length_scale"], noise=self.service_params[request.study_id]["noise"], nu=self.service_params[request.study_id]["nu"], - kernel_type=self.service_params[request.study_id]["kernel_type"] + kernel_type=self.service_params[request.study_id]["kernel_type"], + n_estimators=self.service_params[request.study_id]["n_estimators"], + max_features=self.service_params[request.study_id]["max_features"], + model_type=self.service_params[request.study_id]["model_type"], ) x_next = alg.get_suggestion().squeeze() @@ -113,7 +116,10 @@ def SetSuggestionParameters(self, request, context): "nu": None, "kernel_type": None, "mode": None, - "trade_off": None + "trade_off": None, + "n_estimators": None, + "max_features": None, + "model_type": None, } for param in request.suggestion_parameters: if param.name not in self.service_params[request.study_id].keys(): @@ -122,7 +128,7 @@ def SetSuggestionParameters(self, request, context): return api_pb2.SetSuggestionParametersReply() if param.name == "length_scale" or param.name == "noise" or param.name == "nu" or param.name == "trade_off": self.service_params[request.study_id][param.name] = float(param.value) - elif param.name == "N": + elif param.name == "N" or param.name == "n_estimators": self.service_params[request.study_id][param.name] = int(param.value) elif param.name == "kernel_type": if param.value != "rbf" and param.value != "matern": @@ -134,6 +140,10 @@ def SetSuggestionParameters(self, request, context): context.set_code(grpc.StatusCode.UNKNOWN) context.set_details("unknown acquisition mode: " + param.name) self.service_params[request.study_id][param.name] = param.value + elif param.name == "model_type": + if param.value != "rf" and param.value != "gp": + context.set_code(grpc.StatusCode.UNKNOWN) + context.set_details("unknown model_type: " + param.name) return api_pb2.SetSuggestionParametersReply() diff --git a/pkg/suggestion/bayesianoptimization/src/acquisition_func/acquisition_func.py b/pkg/suggestion/bayesianoptimization/src/acquisition_func/acquisition_func.py index 35d9a0bcae2..b980f2658f7 100644 --- a/pkg/suggestion/bayesianoptimization/src/acquisition_func/acquisition_func.py +++ b/pkg/suggestion/bayesianoptimization/src/acquisition_func/acquisition_func.py @@ -1,35 +1,53 @@ """ module for acquisition function""" import numpy as np from scipy.stats import norm +import forestci as fci -from pkg.suggestion.bayesianoptimization.src.model.Model import Model +from pkg.suggestion.bayesianoptimization.src.model.gp import GaussianProcessModel +from pkg.suggestion.bayesianoptimization.src.model.rf import RandomForestModel class AcquisitionFunc: """ class for acquisition function expected improvement in this case """ - def __init__(self, X_train, y_train, current_optimal, mode, trade_off, length_scale, noise, nu, kernel_type): + def __init__(self, X_train, y_train, current_optimal, mode, trade_off, length_scale, + noise, nu, kernel_type, n_estimators, max_features, model_type): """ :param mode: pi: probability of improvement, ei: expected improvement, lcb: lower confident bound :param trade_off: a parameter to control the trade off between exploiting and exploring + :param model_type: gp: gaussian process, rf: random forest """ self.X_train = X_train self.y_train = y_train self.current_optimal = current_optimal self.mode = mode or "ei" self.trade_off = trade_off or 0.01 - self.model = Model( - length_scale=length_scale, - noise=noise, - nu=nu, - kernel_type=kernel_type, - ) + self.model_type = model_type or "gp" + if self.model_type == "gp": + self.model = GaussianProcessModel( + length_scale=length_scale, + noise=noise, + nu=nu, + kernel_type=kernel_type, + ) + else: + self.model = RandomForestModel( + n_estimators=n_estimators, + max_features=max_features, + ) def compute(self, X_test): - self.model.gp.fit(self.X_train, self.y_train) - y_mean, y_std = self.model.gp.predict(X_test, return_std=True) - y_variance = y_std ** 2 + if self.model_type == "gp": + self.model.gp.fit(self.X_train, self.y_train) + y_mean, y_std = self.model.gp.predict(X_test, return_std=True) + y_variance = y_std ** 2 + else: + self.model.rf.fit(self.y_train, self.y_train) + y_mean = self.model.rf.predict(X_test) + y_variance = fci.random_forest_error(self.model.rf, self.X_train, X_test) + y_std = np.sqrt(y_variance) + z = (y_mean - self.current_optimal - self.trade_off) / y_std if self.mode == "ei": diff --git a/pkg/suggestion/bayesianoptimization/src/bayesian_optimization_algorithm.py b/pkg/suggestion/bayesianoptimization/src/bayesian_optimization_algorithm.py index 62bbc15a71e..4bca66d1879 100644 --- a/pkg/suggestion/bayesianoptimization/src/bayesian_optimization_algorithm.py +++ b/pkg/suggestion/bayesianoptimization/src/bayesian_optimization_algorithm.py @@ -8,7 +8,7 @@ class BOAlgorithm: """ class for bayesian optimization """ def __init__(self, dim, N, lowerbound, upperbound, X_train, y_train, mode, trade_off, - length_scale, noise, nu, kernel_type): + length_scale, noise, nu, kernel_type, n_estimators, max_features, model_type): # np.random.seed(0) self.dim = dim self.N = N or 100 @@ -44,6 +44,9 @@ def __init__(self, dim, N, lowerbound, upperbound, X_train, y_train, mode, trade noise=noise, nu=nu, kernel_type=kernel_type, + n_estimators=n_estimators, + max_features=max_features, + model_type=model_type, ) def get_suggestion(self): diff --git a/pkg/suggestion/bayesianoptimization/src/global_optimizer/global_optimizer.py b/pkg/suggestion/bayesianoptimization/src/global_optimizer/global_optimizer.py index bcc2f132832..4d7c7d6bd0e 100644 --- a/pkg/suggestion/bayesianoptimization/src/global_optimizer/global_optimizer.py +++ b/pkg/suggestion/bayesianoptimization/src/global_optimizer/global_optimizer.py @@ -70,7 +70,7 @@ class GlobalOptimizer: """ class for the global optimizer """ def __init__(self, N, l, u, scaler, X_train, y_train, current_optimal, mode, trade_off, length_scale, - noise, nu, kernel_type): + noise, nu, kernel_type, n_estimators, max_features, model_type): self.N = N self.l = l self.u = u @@ -87,6 +87,9 @@ def __init__(self, N, l, u, scaler, X_train, y_train, current_optimal, mode, tra noise=noise, nu=nu, kernel_type=kernel_type, + n_estimators=n_estimators, + max_features=max_features, + model_type=model_type, ) def potential_opt(self, f_min): diff --git a/pkg/suggestion/bayesianoptimization/src/model/Model.py b/pkg/suggestion/bayesianoptimization/src/model/gp.py similarity index 97% rename from pkg/suggestion/bayesianoptimization/src/model/Model.py rename to pkg/suggestion/bayesianoptimization/src/model/gp.py index b23e393e5a2..9f8a750a5ab 100644 --- a/pkg/suggestion/bayesianoptimization/src/model/Model.py +++ b/pkg/suggestion/bayesianoptimization/src/model/gp.py @@ -3,7 +3,7 @@ from sklearn.gaussian_process import GaussianProcessRegressor -class Model: +class GaussianProcessModel: """ use the gaussian process as a prior """ def __init__(self, length_scale, noise, nu, kernel_type): """ diff --git a/pkg/suggestion/bayesianoptimization/src/model/rf.py b/pkg/suggestion/bayesianoptimization/src/model/rf.py new file mode 100644 index 00000000000..59818902c66 --- /dev/null +++ b/pkg/suggestion/bayesianoptimization/src/model/rf.py @@ -0,0 +1,11 @@ +from sklearn.ensemble import RandomForestRegressor + + +class RandomForestModel: + def __init__(self, n_estimators, max_features): + n_estimators = n_estimators or 50 + max_features = max_features or "auto" + self.rf = RandomForestRegressor( + n_estimators=n_estimators, + max_features=max_features, + ) diff --git a/pkg/suggestion/test_client.py b/pkg/suggestion/test_client.py index 5543f0353aa..c81c1470225 100644 --- a/pkg/suggestion/test_client.py +++ b/pkg/suggestion/test_client.py @@ -27,7 +27,15 @@ def run(): api_pb2.SuggestionParameter( name="trade_off", value="0.01", - ) + ), + api_pb2.SuggestionParameter( + name="model_type", + value="gp", + ), + api_pb2.SuggestionParameter( + name="n_estimators", + value="50", + ), ] )) completed_trials = [] From 068f3fe8b546f9e098b59d78c3b9159a14e6613c Mon Sep 17 00:00:00 2001 From: Yingbo Date: Thu, 10 May 2018 23:37:33 -0700 Subject: [PATCH 2/3] add citation for BO in readme --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 191503cb519..99c5f5c6b76 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,7 @@ A Suggestion is an algorithm to construct a parameter set. Currently Katib suppo * random * grid * [hyperband](https://arxiv.org/pdf/1603.06560.pdf) +* [bayesian optimization](https://arxiv.org/pdf/1012.2599.pdf) ## Components in Katib @@ -43,6 +44,7 @@ Each component communicates with others via GRPC and the API is defined at `api/ - vizier-suggestion-random - vizier-suggestion-grid - vizier-suggestion-hyperband + - vizier-suggestion-bayesianoptimization - modeldb : WebUI - modeldb-frontend - modeldb-backend From 47059ca2f3801df19488ba343cb1d626d8cf794b Mon Sep 17 00:00:00 2001 From: Yingbo Date: Wed, 16 May 2018 20:17:13 -0400 Subject: [PATCH 3/3] add requirements --- cmd/suggestion/bayesianoptimization/requirements.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cmd/suggestion/bayesianoptimization/requirements.txt b/cmd/suggestion/bayesianoptimization/requirements.txt index d2241fdcc2b..909f83866e8 100644 --- a/cmd/suggestion/bayesianoptimization/requirements.txt +++ b/cmd/suggestion/bayesianoptimization/requirements.txt @@ -2,3 +2,5 @@ grpcio numpy>=1.13.3 scikit-learn>=0.19.0 scipy>=0.19.1 +forestci +protobuf