From 1f4d3f014cb95922e97523a8a93202c234209f1c Mon Sep 17 00:00:00 2001 From: Ce Gao Date: Sun, 22 Sep 2019 16:52:18 +0800 Subject: [PATCH 01/20] feat: Add HyperBand Signed-off-by: Ce Gao --- cmd/suggestion/hyperband/v1alpha3/Dockerfile | 11 +- cmd/suggestion/hyperband/v1alpha3/main.py | 5 +- .../hyperband/v1alpha3/requirements.txt | 10 +- pkg/suggestion/v1alpha3/hyperband_service.py | 405 ++++++++++-------- pkg/suggestion/v1alpha3/hyperopt_service.py | 1 - scripts/v1alpha3/build.sh | 1 + .../v1alpha3/test_hyperband_service.py | 183 ++++++++ 7 files changed, 422 insertions(+), 194 deletions(-) create mode 100644 test/suggestion/v1alpha3/test_hyperband_service.py diff --git a/cmd/suggestion/hyperband/v1alpha3/Dockerfile b/cmd/suggestion/hyperband/v1alpha3/Dockerfile index ee1632d5a89..7c3efdf6c08 100644 --- a/cmd/suggestion/hyperband/v1alpha3/Dockerfile +++ b/cmd/suggestion/hyperband/v1alpha3/Dockerfile @@ -1,13 +1,18 @@ FROM python:3 -ADD . /usr/src/app/github.com/kubeflow/katib -WORKDIR /usr/src/app/github.com/kubeflow/katib/cmd/suggestion/hyperband/v1alpha3 RUN if [ "$(uname -m)" = "ppc64le" ]; then \ apt-get -y update && \ apt-get -y install gfortran libopenblas-dev liblapack-dev && \ pip install cython; \ fi +RUN GRPC_HEALTH_PROBE_VERSION=v0.3.0 && \ + wget -qO/bin/grpc_health_probe https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/${GRPC_HEALTH_PROBE_VERSION}/grpc_health_probe-linux-amd64 && \ + chmod +x /bin/grpc_health_probe + +ADD . /usr/src/app/github.com/kubeflow/katib +WORKDIR /usr/src/app/github.com/kubeflow/katib/cmd/suggestion/hyperband/v1alpha3 RUN pip install --no-cache-dir -r requirements.txt -ENV PYTHONPATH /usr/src/app/github.com/kubeflow/katib:/usr/src/app/github.com/kubeflow/katib/pkg/apis/manager/v1alpha3/python + +ENV PYTHONPATH /usr/src/app/github.com/kubeflow/katib:/usr/src/app/github.com/kubeflow/katib/pkg/apis/manager/v1alpha3/python:/usr/src/app/github.com/kubeflow/katib/pkg/apis/manager/health/python ENTRYPOINT ["python", "main.py"] diff --git a/cmd/suggestion/hyperband/v1alpha3/main.py b/cmd/suggestion/hyperband/v1alpha3/main.py index 7ab50d7e09a..57ccc1fe571 100644 --- a/cmd/suggestion/hyperband/v1alpha3/main.py +++ b/cmd/suggestion/hyperband/v1alpha3/main.py @@ -1,6 +1,7 @@ import grpc import time from pkg.apis.manager.v1alpha3.python import api_pb2_grpc +from pkg.apis.manager.health.python import health_pb2_grpc from pkg.suggestion.v1alpha3.hyperband_service import HyperbandService from concurrent import futures @@ -9,7 +10,9 @@ def serve(): server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) - api_pb2_grpc.add_SuggestionServicer_to_server(HyperbandService(), server) + service = HyperbandService() + api_pb2_grpc.add_SuggestionServicer_to_server(service, server) health_pb2_grpc.add_HealthServicer_to_server(service, server) + server.add_insecure_port(DEFAULT_PORT) print("Listening...") server.start() diff --git a/cmd/suggestion/hyperband/v1alpha3/requirements.txt b/cmd/suggestion/hyperband/v1alpha3/requirements.txt index 8d2c9d4bda7..210d97f7f81 100644 --- a/cmd/suggestion/hyperband/v1alpha3/requirements.txt +++ b/cmd/suggestion/hyperband/v1alpha3/requirements.txt @@ -1,9 +1,9 @@ -grpcio -duecredit +grpcio==1.23.0 +duecredit===0.7.0 cloudpickle==0.5.6 numpy>=1.13.3 scikit-learn>=0.19.0 scipy>=0.19.1 -forestci -protobuf -googleapis-common-protos +forestci==0.3 +protobuf==3.9.1 +googleapis-common-protos==1.6.0 diff --git a/pkg/suggestion/v1alpha3/hyperband_service.py b/pkg/suggestion/v1alpha3/hyperband_service.py index 9498b3c383d..0d4e6290fa6 100644 --- a/pkg/suggestion/v1alpha3/hyperband_service.py +++ b/pkg/suggestion/v1alpha3/hyperband_service.py @@ -1,182 +1,116 @@ import math import traceback - +import grpc import logging from logging import getLogger, StreamHandler, DEBUG + from pkg.apis.manager.v1alpha3.python import api_pb2 from pkg.apis.manager.v1alpha3.python import api_pb2_grpc -import grpc -from . import parsing_util - -class HyperbandService(api_pb2_grpc.SuggestionServicer): - def __init__(self): - self.manager_addr = "katib-manager" - self.manager_port = 6789 - FORMAT = '%(asctime)-15s Experiment %(experiment_name)s %(message)s' - self.logger = getLogger(__name__) - logging.basicConfig(format=FORMAT) - handler = StreamHandler() - handler.setLevel(DEBUG) - self.logger.setLevel(DEBUG) - self.logger.addHandler(handler) - - def _get_experiment(self, name): - channel = grpc.beta.implementations.insecure_channel(self.manager_addr, self.manager_port) - with api_pb2.beta_create_Manager_stub(channel) as client: - exp = client.GetExperiment(api_pb2.GetExperimentRequest(experiment_name=name), 10) - return exp.experiment - - def _get_algorithm_settings(self, experiment_name): - channel = grpc.beta.implementations.insecure_channel(self.manager_addr, self.manager_port) - with api_pb2.beta_create_Manager_stub(channel) as client: - alg = client.GetAlgorithmExtraSettings(api_pb2.GetAlgorithmExtraSettingsRequest( - experiment_name=experiment_name), 10) - params = alg.extra_algorithm_settings - alg_settings = {} - for param in params: - alg_settings[param.name] = param.value - return alg_settings - - def _get_trials(self, experiment_name): - channel = grpc.beta.implementations.insecure_channel(self.manager_addr, self.manager_port) - with api_pb2.beta_create_Manager_stub(channel) as client: - reply = client.GetTrialList(api_pb2.GetTrialListRequest(experiment_name=experiment_name), 10) - return reply.trials - +from pkg.suggestion.v1alpha3 import parsing_util +from pkg.suggestion.v1alpha3.base_health_service import HealthServicer + +logger = getLogger(__name__) +FORMAT = '%(asctime)-15s Experiment %(experiment_name)s %(message)s' +logging.basicConfig(format=FORMAT) +handler = StreamHandler() +handler.setLevel(DEBUG) +logger.setLevel(DEBUG) +logger.addHandler(handler) + +class HyperbandService(api_pb2_grpc.SuggestionServicer, HealthServicer): def GetSuggestions(self, request, context): """ Main function to provide suggestion. """ try: - reply = api_pb2.GetSuggestionsReply() - experiment_name = request.experiment_name - experiment = self._get_experiment(experiment_name) - alg_settings = self._get_algorithm_settings(experiment_name) - - sParams = self._parse_suggestionParameters(experiment, alg_settings) - if sParams["current_s"] < 0: + reply = api_pb2.GetSuggestionsReply() + experiment = request.experiment + alg_settings = experiment.spec.algorithm.algorithm_setting + + param = HyperBandParam.convert(alg_settings) + if param.current_s < 0: + # Hyperband outlerloop has finished + return reply + # This is a hack to get request number. + param.n = request.request_number + + trials = self._make_bracket(experiment, param) + for trial in trials: + reply.parameter_assignments.add(assignments=trial.parameter_assignments.assignments) + reply.algorithm.CopyFrom(HyperBandParam.generate(param)) return reply - - trials = self._make_bracket(experiment, sParams) - self._update_algorithm_extrasettings(experiment_name, sParams) - for trial in trials: - reply.trials.add(spec=trial) - return reply except Exception as e: - self.logger.error("Fail to generate trials: \n%s", - traceback.format_exc(), extra={"experiment_name": experiment_name}) + logger.error("Fail to generate trials: \n%s", + traceback.format_exc(), extra={"experiment_name": experiment.name}) raise e - def _update_hbParameters(self, sParams): - sParams["current_i"] += 1 - if sParams["current_i"] > sParams["current_s"]: - self._new_hbParameters(sParams) - - def _new_hbParameters(self, sParams): - sParams["current_s"] -= 1 - sParams["current_i"] = 0 - if sParams["current_s"] >= 0: - # when sParams["current_s"] < 0, hyperband algorithm reaches the end - sParams["n"] = int(math.ceil(float(sParams["sMax"] + 1) * ( - float(sParams["eta"]**sParams["current_s"]) / float(sParams["current_s"]+1)))) - sParams["r"] = sParams["r_l"]*sParams["eta"]**(-sParams["current_s"]) - - def _parse_suggestionParameters(self, experiment, alg_settings): - sParams = { - "eta": 3, - "sMax": -1, - "r_l": -1, - "b_l": -1, - "r": -1, - "n": -1, - "current_s": -2, - "current_i": -1, - "resourceName": "", - "evaluatingTrials": 0, - } - - for k, v in alg_settings.items(): - if k in ["eta", "r_l", "b_l"]: - sParams[k] = float(v) - elif k in ["n", "r", "current_s", "current_i", "sMax", "evaluatingTrials"]: - sParams[k] = int(float(v)) - elif k == "resourceName": - sParams[k] = v - else: - self.logger.info("Unknown HyperBand Param %s, ignore it", - k, extra={"experiment_name": experiment.name}) - if sParams["current_s"] == -1: - # Hyperband outlerloop has finished - self.logger.info("HyperBand outlerloop has finished.", - extra={"experiment_name": experiment.name}) - return sParams - - if sParams["eta"] <= 0: - sParams["eta"] = 3 - if sParams["sMax"] < 0: - sParams["sMax"] = int(math.log(sParams["r_l"]) / math.log(sParams["eta"])) - if sParams["b_l"] < 0: - sParams["b_l"] = (sParams["sMax"] + 1) * sParams["r_l"] - if sParams["current_s"] < 0: - sParams["current_s"] = sParams["sMax"] - if sParams["current_i"] < 0: - sParams["current_i"] = 0 - if sParams["n"] < 0: - sParams["n"] = int(math.ceil(float(sParams["sMax"] + 1) * ( - float(sParams["eta"]**sParams["current_s"]) / float(sParams["current_s"]+1)))) - if sParams["r"] < 0: - sParams["r"] = sParams["r_l"]*sParams["eta"]**(-sParams["current_s"]) - - return sParams - - def _make_bracket(self, experiment, sParams): - if sParams["evaluatingTrials"] == 0: - trialSpecs = self._make_master_bracket(experiment, sParams) + def _update_hbParameters(self, param): + param.current_i += 1 + if param.current_i > param.current_s: + self._new_hbParameters(param) + + def _new_hbParameters(self, param): + param.current_s -= 1 + param.current_i = 0 + if param.current_s >= 0: + # when param.current_s < 0, hyperband algorithm reaches the end + param.n = int(math.ceil(float(param.s_max + 1) * ( + float(param.eta**param.current_s) / float(param.current_s+1)))) + param.r = param.r_l * \ + param.eta**(-param.current_s) + + def _make_bracket(self, experiment, param): + if param.evaluating_trials == 0: + trialSpecs = self._make_master_bracket(experiment, param) else: - trialSpecs = self._make_child_bracket(experiment, sParams) - if sParams["current_i"] < sParams["current_s"]: - sParams["evaluatingTrials"] = len(trialSpecs) + trialSpecs = self._make_child_bracket(experiment, param) + if param.current_i < param.current_s: + param.evaluating_trials = len(trialSpecs) else: - sParams["evaluatingTrials"] = 0 - - self.logger.info("HyperBand Param eta %d.", - sParams["eta"], extra={"experiment_name": experiment.name}) - self.logger.info("HyperBand Param R %d.", - sParams["r_l"], extra={"experiment_name": experiment.name}) - self.logger.info("HyperBand Param sMax %d.", - sParams["sMax"], extra={"experiment_name": experiment.name}) - self.logger.info("HyperBand Param B %d.", - sParams["b_l"], extra={"experiment_name": experiment.name}) - self.logger.info("HyperBand Param n %d.", - sParams["n"], extra={"experiment_name": experiment.name}) - self.logger.info("HyperBand Param r %d.", - sParams["r"], extra={"experiment_name": experiment.name}) - self.logger.info("HyperBand Param s %d.", - sParams["current_s"], extra={"experiment_name": experiment.name}) - self.logger.info("HyperBand Param i %d.", - sParams["current_i"], extra={"experiment_name": experiment.name}) - self.logger.info("HyperBand evaluating trials count %d.", - sParams["evaluatingTrials"], extra={"experiment_name": experiment.name}) - - if sParams["evaluatingTrials"] == 0: - self._new_hbParameters(sParams) + param.evaluating_trials = 0 + + logger.info("HyperBand Param eta %d.", + param.eta, extra={"experiment_name": experiment.name}) + logger.info("HyperBand Param R %d.", + param.r_l, extra={"experiment_name": experiment.name}) + logger.info("HyperBand Param sMax %d.", + param.s_max, extra={"experiment_name": experiment.name}) + logger.info("HyperBand Param B %d.", + param.b_l, extra={"experiment_name": experiment.name}) + logger.info("HyperBand Param n %d.", + param.n, extra={"experiment_name": experiment.name}) + logger.info("HyperBand Param r %d.", + param.r, extra={"experiment_name": experiment.name}) + logger.info("HyperBand Param s %d.", + param.current_s, extra={"experiment_name": experiment.name}) + logger.info("HyperBand Param i %d.", + param.current_i, extra={"experiment_name": experiment.name}) + logger.info("HyperBand evaluating trials count %d.", + param.evaluating_trials, extra={"experiment_name": experiment.name}) + logger.info("HyperBand budget resource name %s.", + param.resource_name, extra={"experiment_name": experiment.name}) + if param.evaluating_trials == 0: + self._new_hbParameters(param) return trialSpecs - def _make_child_bracket(self, experiment, sParams): - n_i = math.ceil(sParams["n"] * sParams["eta"]**(-sParams["current_i"])) - top_trials_num = int(math.ceil(n_i / sParams["eta"])) - self._update_hbParameters(sParams) - r_i = int(sParams["r"] * sParams["eta"]**sParams["current_i"]) - last_trials = self._get_top_trial(sParams["evaluatingTrials"], top_trials_num, experiment) - trialSpecs = self._copy_trials(last_trials, r_i, sParams["resourceName"]) - - self.logger.info("Generate %d trials by child bracket.", - top_trials_num, extra={"experiment_name": experiment.name}) + def _make_child_bracket(self, experiment, param): + n_i = math.ceil(param.n * param.eta**(-param.current_i)) + top_trials_num = int(math.ceil(n_i / param.eta)) + self._update_hbParameters(param) + r_i = int(param.r * param.eta**param.current_i) + last_trials = self._get_top_trial( + param.evaluating_trials, top_trials_num, experiment) + trialSpecs = self._copy_trials( + last_trials, r_i, param.resource_name) + + logger.info("Generate %d trials by child bracket.", + top_trials_num, extra={"experiment_name": experiment.name}) return trialSpecs def _get_last_trials(self, all_trials, latest_trials_num): - sorted_trials = sorted(all_trials, key=lambda trial: trial.status.start_time) + sorted_trials = sorted( + all_trials, key=lambda trial: trial.status.start_time) if len(sorted_trials) > latest_trials_num: return sorted_trials[-latest_trials_num:] else: @@ -201,7 +135,8 @@ def get_objective_value(t): "There are some trials which are not completed yet for experiment %s." % experiment.name) if objective_type == api_pb2.MAXIMIZE: - top_trials.extend(sorted(latest_trials, key=get_objective_value, reverse=True)) + top_trials.extend( + sorted(latest_trials, key=get_objective_value, reverse=True)) else: top_trials.extend(sorted(latest_trials, key=get_objective_value)) return top_trials[:top_trials_num] @@ -220,45 +155,36 @@ def _copy_trials(self, trials, r_i, resourceName): trialSpecs.append(trial_spec) return trialSpecs - def _make_master_bracket(self, experiment, sParams): - n = sParams["n"] - r = int(sParams["r"]) + def _make_master_bracket(self, experiment, param): + n = param.n + r = int(param.r) parameter_config = parsing_util.parse_parameter_configs( experiment.spec.parameter_specs.parameters) trial_specs = [] for _ in range(n): sample = parameter_config.random_sample() suggestion = parsing_util.parse_x_next_vector( - sample, - parameter_config.parameter_types, - parameter_config.names, - parameter_config.discrete_info, - parameter_config.categorical_info) + sample, + parameter_config.parameter_types, + parameter_config.names, + parameter_config.discrete_info, + parameter_config.categorical_info) trial_spec = api_pb2.TrialSpec() trial_spec.experiment_name = experiment.name - for param in suggestion: - if param['name'] == sParams["resourceName"]: - param['value'] = str(r) - trial_spec.parameter_assignments.assignments.add(name=param['name'], - value=str(param['value'])) + for hp in suggestion: + if hp['name'] == param.resource_name: + hp['value'] = str(r) + trial_spec.parameter_assignments.assignments.add(name=hp['name'], + value=str(hp['value'])) trial_specs.append(trial_spec) - self.logger.info("Generate %d trials by master bracket.", - n, extra={"experiment_name": experiment.name}) + logger.info("Generate %d trials by master bracket.", + n, extra={"experiment_name": experiment.name}) return trial_specs - def _update_algorithm_extrasettings(self, experiment_name, sParams): - as_list = [] - for k, v in sParams.items(): - as_list.append(api_pb2.AlgorithmSetting(name=k, value=str(v))) - channel = grpc.beta.implementations.insecure_channel(self.manager_addr, self.manager_port) - with api_pb2.beta_create_Manager_stub(channel) as client: - client.UpdateAlgorithmExtraSettings(api_pb2.UpdateAlgorithmExtraSettingsRequest( - experiment_name=experiment_name, extra_algorithm_settings=as_list), 10) - def _set_validate_context_error(self, context, error_message): context.set_code(grpc.StatusCode.INVALID_ARGUMENT) context.set_details(error_message) - self.logger.info(error_message) + logger.info(error_message) return api_pb2.ValidateAlgorithmSettingsReply() def ValidateAlgorithmSettings(self, request, context): @@ -288,7 +214,7 @@ def ValidateAlgorithmSettings(self, request, context): max_parallel = int(math.ceil(eta**smax)) if request.experiment_spec.parallel_trial_count < max_parallel: return self._set_validate_context_error(context, - "parallelTrialCount must be not less than %d." % max_parallel) + "parallelTrialCount must be not less than %d." % max_parallel) valid_resourceName = False for param in params: @@ -297,6 +223,117 @@ def ValidateAlgorithmSettings(self, request, context): break if not valid_resourceName: return self._set_validate_context_error(context, - "value of resourceName setting must be in parameters.") + "value of resourceName setting must be in parameters.") return api_pb2.ValidateAlgorithmSettingsReply() + + +class HyperBandParam(object): + def __init__(self, eta=3, s_max=-1, r_l=-1, + b_l=-1, r=-1, n=-1, current_s=-2, + current_i=-1, resource_name="", + evaluating_trials=0): + self.eta = eta + self.s_max = s_max + self.r_l = r_l + self.b_l = b_l + self.r = r + self.n = n + self.current_s = current_s + self.current_i = current_i + self.resource_name = resource_name + self.evaluating_trials = evaluating_trials + + @staticmethod + def generate(param): + algorithm_settings = [ + api_pb2.AlgorithmSetting( + name="eta", + value=str(param.eta) + ), api_pb2.AlgorithmSetting( + name="s_max", + value=str(param.s_max) + ), api_pb2.AlgorithmSetting( + name="r_l", + value=str(param.r_l) + ), api_pb2.AlgorithmSetting( + name="b_l", + value=str(param.b_l) + ), api_pb2.AlgorithmSetting( + name="r", + value=str(param.r) + ), api_pb2.AlgorithmSetting( + name="n", + value=str(param.n) + ), api_pb2.AlgorithmSetting( + name="current_s", + value=str(param.current_s) + ), api_pb2.AlgorithmSetting( + name="current_i", + value=str(param.current_i) + ), api_pb2.AlgorithmSetting( + name="resource_name", + value=param.resource_name + ), api_pb2.AlgorithmSetting( + name="evaluating_trials", + value=str(param.evaluating_trials) + )] + return api_pb2.AlgorithmSpec( + algorithm_setting=algorithm_settings + ) + + @staticmethod + def convert(alg_settings): + """Convert the algorithm settings to HyperBandParam. + """ + param = HyperBandParam() + # Set the param from the algorithm settings. + for setting in alg_settings: + if setting.name == "eta": + param.eta = float(setting.value) + elif setting.name == "r_l": + param.r_l = float(setting.value) + elif setting.name == "b_l": + param.b_l = float(setting.value) + elif setting.name == "n": + param.n = int(float(setting.value)) + elif setting.name == "r": + param.r = int(float(setting.value)) + elif setting.name == "current_s": + param.current_s = int(float(setting.value)) + elif setting.name == "current_i": + param.current_i = int(float(setting.value)) + elif setting.name == "s_max": + param.s_max = int(float(setting.value)) + elif setting.name == "evaluating_trials": + param.evaluating_trials = int(float(setting.value)) + elif setting.name == "resource_name": + param.resource_name = setting.value + else: + logger.info( + "Unknown HyperBand Param %s, ignore it", setting.name) + if param.current_s == -1: + # Hyperband outlerloop has finished + logger.info("HyperBand outlerloop has finished.") + return param + + # Deal with illegal parameter values. + if param.eta <= 0: + param.eta = 3 + if param.s_max < 0: + param.s_max = int( + math.log(param.r_l) / math.log(param.eta)) + if param.b_l < 0: + param.b_l = (param.s_max + 1) * param.r_l + if param.current_s < 0: + param.current_s = param.s_max + if param.current_i < 0: + param.current_i = 0 + if param.n < 0: + param.n = int(math.ceil(float(param.s_max + 1) * ( + float(param.eta**param.current_s) / float(param.current_s+1)))) + if param.r < 0: + param.r = param.r_l * \ + param.eta**(-param.current_s) + + return param diff --git a/pkg/suggestion/v1alpha3/hyperopt_service.py b/pkg/suggestion/v1alpha3/hyperopt_service.py index 89ce2950fce..ff00a0915f7 100644 --- a/pkg/suggestion/v1alpha3/hyperopt_service.py +++ b/pkg/suggestion/v1alpha3/hyperopt_service.py @@ -2,7 +2,6 @@ from pkg.apis.manager.v1alpha3.python import api_pb2 from pkg.apis.manager.v1alpha3.python import api_pb2_grpc -from pkg.apis.manager.health.python import health_pb2 from pkg.suggestion.v1alpha3.internal.search_space import HyperParameter, HyperParameterSearchSpace from pkg.suggestion.v1alpha3.internal.trial import Trial, Assignment diff --git a/scripts/v1alpha3/build.sh b/scripts/v1alpha3/build.sh index 8efb945c8fa..391a94d3642 100755 --- a/scripts/v1alpha3/build.sh +++ b/scripts/v1alpha3/build.sh @@ -66,3 +66,4 @@ docker build -t ${REGISTRY}/${PREFIX}/suggestion-hyperopt:${TAG} -f ${CMD_PREFIX docker build -t ${REGISTRY}/${PREFIX}/suggestion-skopt:${TAG} -f ${CMD_PREFIX}/suggestion/skopt/v1alpha3/Dockerfile . docker build -t ${REGISTRY}/${PREFIX}/suggestion-chocolate:${TAG} -f ${CMD_PREFIX}/suggestion/chocolate/v1alpha3/Dockerfile . docker build -t ${REGISTRY}/${PREFIX}/suggestion-nasrl:${TAG} -f ${CMD_PREFIX}/suggestion/nasrl/v1alpha3/Dockerfile . +docker build -t ${REGISTRY}/${PREFIX}/suggestion-hyperband:${TAG} -f ${CMD_PREFIX}/suggestion/hyperband/v1alpha3/Dockerfile . diff --git a/test/suggestion/v1alpha3/test_hyperband_service.py b/test/suggestion/v1alpha3/test_hyperband_service.py new file mode 100644 index 00000000000..cdc811bc8af --- /dev/null +++ b/test/suggestion/v1alpha3/test_hyperband_service.py @@ -0,0 +1,183 @@ +import grpc +import grpc_testing +import unittest + +from pkg.apis.manager.v1alpha3.python import api_pb2_grpc +from pkg.apis.manager.v1alpha3.python import api_pb2 + +from pkg.suggestion.v1alpha3.hyperband_service import HyperbandService + + +class TestHyperband(unittest.TestCase): + def setUp(self): + servicers = { + api_pb2.DESCRIPTOR.services_by_name['Suggestion']: HyperbandService( + ) + } + + self.test_server = grpc_testing.server_from_dictionary( + servicers, grpc_testing.strict_real_time()) + + def test_get_suggestion(self): + trials = [ + api_pb2.Trial( + name="test-asfjh", + spec=api_pb2.TrialSpec( + objective=api_pb2.ObjectiveSpec( + type=api_pb2.MAXIMIZE, + objective_metric_name="metric-2", + goal=0.9 + ), + parameter_assignments=api_pb2.TrialSpec.ParameterAssignments( + assignments=[ + api_pb2.ParameterAssignment( + name="param-1", + value="2", + ), + api_pb2.ParameterAssignment( + name="param-2", + value="cat1", + ), + api_pb2.ParameterAssignment( + name="param-3", + value="2", + ), + api_pb2.ParameterAssignment( + name="param-4", + value="3.44", + ) + ] + ) + ), + status=api_pb2.TrialStatus( + observation=api_pb2.Observation( + metrics=[ + api_pb2.Metric( + name="metric=1", + value="435" + ), + api_pb2.Metric( + name="metric=2", + value="5643" + ), + ] + ) + ) + ), + api_pb2.Trial( + name="test-234hs", + spec=api_pb2.TrialSpec( + objective=api_pb2.ObjectiveSpec( + type=api_pb2.MAXIMIZE, + objective_metric_name="metric-2", + goal=0.9 + ), + parameter_assignments=api_pb2.TrialSpec.ParameterAssignments( + assignments=[ + api_pb2.ParameterAssignment( + name="param-1", + value="3", + ), + api_pb2.ParameterAssignment( + name="param-2", + value="cat2", + ), + api_pb2.ParameterAssignment( + name="param-3", + value="6", + ), + api_pb2.ParameterAssignment( + name="param-4", + value="4.44", + ) + ] + ) + ), + status=api_pb2.TrialStatus( + observation=api_pb2.Observation( + metrics=[ + api_pb2.Metric( + name="metric=1", + value="123" + ), + api_pb2.Metric( + name="metric=2", + value="3028" + ), + ] + ) + ) + ) + ] + experiment = api_pb2.Experiment( + name="test", + spec=api_pb2.ExperimentSpec( + algorithm=api_pb2.AlgorithmSpec( + algorithm_name="hyperband", + algorithm_setting=[ + api_pb2.AlgorithmSetting( + name="r_l", + value="10" + ), + api_pb2.AlgorithmSetting( + name="resource_name", + value="--num-epochs" + ) + ], + ), + objective=api_pb2.ObjectiveSpec( + type=api_pb2.MAXIMIZE, + goal=0.9 + ), + parameter_specs=api_pb2.ExperimentSpec.ParameterSpecs( + parameters=[ + api_pb2.ParameterSpec( + name="param-1", + parameter_type=api_pb2.INT, + feasible_space=api_pb2.FeasibleSpace( + max="5", min="1", list=[]), + ), + api_pb2.ParameterSpec( + name="param-2", + parameter_type=api_pb2.CATEGORICAL, + feasible_space=api_pb2.FeasibleSpace( + max=None, min=None, list=["cat1", "cat2", "cat3"]) + ), + api_pb2.ParameterSpec( + name="param-3", + parameter_type=api_pb2.DISCRETE, + feasible_space=api_pb2.FeasibleSpace( + max=None, min=None, list=["3", "2", "6"]) + ), + api_pb2.ParameterSpec( + name="param-4", + parameter_type=api_pb2.DOUBLE, + feasible_space=api_pb2.FeasibleSpace( + max="5", min="1", list=[]) + ) + ] + ) + ) + ) + + request = api_pb2.GetSuggestionsRequest( + experiment=experiment, + trials=trials, + request_number=2, + ) + + get_suggestion = self.test_server.invoke_unary_unary( + method_descriptor=(api_pb2.DESCRIPTOR + .services_by_name['Suggestion'] + .methods_by_name['GetSuggestions']), + invocation_metadata={}, + request=request, timeout=1) + + response, metadata, code, details = get_suggestion.termination() + print(response.parameter_assignments) + self.assertEqual(code, grpc.StatusCode.OK) + self.assertEqual(2, len(response.parameter_assignments)) + + +if __name__ == '__main__': + unittest.main() From 4623840b717c745bf1ccc17eaa0de7b234cb0c98 Mon Sep 17 00:00:00 2001 From: Ce Gao Date: Tue, 24 Sep 2019 10:28:49 +0800 Subject: [PATCH 02/20] chore: Add test in CI Signed-off-by: Ce Gao --- test/scripts/v1alpha3/python-tests.sh | 1 + .../v1alpha3/run-suggestion-hyperband.sh | 67 +++++++++++++++++++ .../components/workflows-v1alpha3.libsonnet | 7 ++ 3 files changed, 75 insertions(+) create mode 100755 test/scripts/v1alpha3/run-suggestion-hyperband.sh diff --git a/test/scripts/v1alpha3/python-tests.sh b/test/scripts/v1alpha3/python-tests.sh index 3de6d0e0e8b..cad9fd38dcb 100755 --- a/test/scripts/v1alpha3/python-tests.sh +++ b/test/scripts/v1alpha3/python-tests.sh @@ -26,4 +26,5 @@ pip install -r cmd/suggestion/chocolate/v1alpha3/requirements.txt pip install -r cmd/suggestion/hyperopt/v1alpha3/requirements.txt pip install -r cmd/suggestion/skopt/v1alpha3/requirements.txt pip install -r cmd/suggestion/nasrl/v1alpha3/requirements.txt +pip install -r cmd/suggestion/hyperband/v1alpha3/requirements.txt pytest -s ./test diff --git a/test/scripts/v1alpha3/run-suggestion-hyperband.sh b/test/scripts/v1alpha3/run-suggestion-hyperband.sh new file mode 100755 index 00000000000..cf9840e1fb3 --- /dev/null +++ b/test/scripts/v1alpha3/run-suggestion-hyperband.sh @@ -0,0 +1,67 @@ +#!/bin/bash + +# Copyright 2018 The Kubernetes Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This shell script is used to build a cluster and create a namespace from our +# argo workflow + +set -o errexit +set -o nounset +set -o pipefail + +CLUSTER_NAME="${CLUSTER_NAME}" +ZONE="${GCP_ZONE}" +PROJECT="${GCP_PROJECT}" +NAMESPACE="${DEPLOY_NAMESPACE}" +REGISTRY="${GCP_REGISTRY}" +# VERSION=$(git describe --tags --always --dirty) +VERSION="latest" +GO_DIR=${GOPATH}/src/github.com/${REPO_OWNER}/${REPO_NAME} + +echo "Activating service-account" +gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} + +echo "Configuring kubectl" + +echo "CLUSTER_NAME: ${CLUSTER_NAME}" +echo "ZONE: ${GCP_ZONE}" +echo "PROJECT: ${GCP_PROJECT}" + +gcloud --project ${PROJECT} container clusters get-credentials ${CLUSTER_NAME} \ + --zone ${ZONE} +kubectl config set-context $(kubectl config current-context) --namespace=default +USER=`gcloud config get-value account` + +echo "All Katib components are running." +kubectl version +kubectl cluster-info +echo "Katib deployments" +kubectl -n kubeflow get deploy +echo "Katib services" +kubectl -n kubeflow get svc +echo "Katib pods" +kubectl -n kubeflow get pod + +mkdir -p ${GO_DIR} +cp -r . ${GO_DIR}/ +cp -r pkg/apis/manager/v1alpha3/python/* ${GO_DIR}/test/e2e/v1alpha3 +cd ${GO_DIR}/test/e2e/v1alpha3 + +echo "Running e2e hyperband experiment" +export KUBECONFIG=$HOME/.kube/config +go run run-e2e-experiment.go ../../../examples/v1alpha3/hyperband-example.yaml +kubectl -n kubeflow describe suggestion +kubectl -n kubeflow delete experiment hyperband-example +exit 0 diff --git a/test/workflows/components/workflows-v1alpha3.libsonnet b/test/workflows/components/workflows-v1alpha3.libsonnet index 5c7681d5571..65529f61aed 100644 --- a/test/workflows/components/workflows-v1alpha3.libsonnet +++ b/test/workflows/components/workflows-v1alpha3.libsonnet @@ -283,6 +283,10 @@ name: "run-nasrl-e2e-tests", template: "run-nasrl-e2e-tests", }, + { + name: "run-hyperband-e2e-tests", + template: "run-hyperband-e2e-tests", + }, ], ], }, @@ -332,6 +336,9 @@ $.parts(namespace, name, overrides).e2e(prow_env, bucket).buildTemplate("run-random-e2e-tests", testWorkerImage, [ "test/scripts/v1alpha3/run-suggestion-random.sh", ]), // run random algorithm + $.parts(namespace, name, overrides).e2e(prow_env, bucket).buildTemplate("run-hyperband-e2e-tests", testWorkerImage, [ + "test/scripts/v1alpha3/run-suggestion-hyperband.sh", + ]), // run hyperband algorithm $.parts(namespace, name, overrides).e2e(prow_env, bucket).buildTemplate("run-grid-e2e-tests", testWorkerImage, [ "test/scripts/v1alpha3/run-suggestion-grid.sh", ]), // run grid algorithm From c84a8fdc3d1374f9a850b8830880a4dda443b9b3 Mon Sep 17 00:00:00 2001 From: Ce Gao Date: Tue, 24 Sep 2019 11:10:38 +0800 Subject: [PATCH 03/20] fix: Fix Signed-off-by: Ce Gao --- cmd/suggestion/hyperband/v1alpha3/main.py | 5 ++++- pkg/suggestion/v1alpha3/hyperband_service.py | 3 ++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/cmd/suggestion/hyperband/v1alpha3/main.py b/cmd/suggestion/hyperband/v1alpha3/main.py index 57ccc1fe571..e9ad4aebc1a 100644 --- a/cmd/suggestion/hyperband/v1alpha3/main.py +++ b/cmd/suggestion/hyperband/v1alpha3/main.py @@ -8,10 +8,12 @@ _ONE_DAY_IN_SECONDS = 60 * 60 * 24 DEFAULT_PORT = "0.0.0.0:6789" + def serve(): server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) service = HyperbandService() - api_pb2_grpc.add_SuggestionServicer_to_server(service, server) health_pb2_grpc.add_HealthServicer_to_server(service, server) + api_pb2_grpc.add_SuggestionServicer_to_server(service, server) + health_pb2_grpc.add_HealthServicer_to_server(service, server) server.add_insecure_port(DEFAULT_PORT) print("Listening...") @@ -22,5 +24,6 @@ def serve(): except KeyboardInterrupt: server.stop(0) + if __name__ == "__main__": serve() diff --git a/pkg/suggestion/v1alpha3/hyperband_service.py b/pkg/suggestion/v1alpha3/hyperband_service.py index 0d4e6290fa6..bec1e628a30 100644 --- a/pkg/suggestion/v1alpha3/hyperband_service.py +++ b/pkg/suggestion/v1alpha3/hyperband_service.py @@ -25,6 +25,7 @@ def GetSuggestions(self, request, context): try: reply = api_pb2.GetSuggestionsReply() experiment = request.experiment + self.all_trials = request.trials alg_settings = experiment.spec.algorithm.algorithm_setting param = HyperBandParam.convert(alg_settings) @@ -126,7 +127,7 @@ def get_objective_value(t): return float(m.value) top_trials = [] - all_trials = self._get_trials(experiment.name) + all_trials = self.all_trials latest_trials = self._get_last_trials(all_trials, latest_trials_num) for t in latest_trials: From dbfa6ea9591067fa917ad3772eabcac9c1647074 Mon Sep 17 00:00:00 2001 From: Ce Gao Date: Tue, 24 Sep 2019 11:34:15 +0800 Subject: [PATCH 04/20] fix: Fix name Signed-off-by: Ce Gao --- pkg/suggestion/v1alpha3/hyperband_service.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/suggestion/v1alpha3/hyperband_service.py b/pkg/suggestion/v1alpha3/hyperband_service.py index bec1e628a30..b4eaf25c785 100644 --- a/pkg/suggestion/v1alpha3/hyperband_service.py +++ b/pkg/suggestion/v1alpha3/hyperband_service.py @@ -189,8 +189,8 @@ def _set_validate_context_error(self, context, error_message): return api_pb2.ValidateAlgorithmSettingsReply() def ValidateAlgorithmSettings(self, request, context): - params = request.experiment_spec.parameter_specs.parameters - settings = request.experiment_spec.algorithm.algorithm_setting + params = request.experiment.spec.parameter_specs.parameters + settings = request.experiment.spec.algorithm.algorithm_setting setting_dict = {} for setting in settings: setting_dict[setting.name] = setting.value From c563825c94284832f01539549e9ad2fdb64a9985 Mon Sep 17 00:00:00 2001 From: Ce Gao Date: Tue, 24 Sep 2019 12:06:06 +0800 Subject: [PATCH 05/20] fix: Fix name Signed-off-by: Ce Gao --- pkg/suggestion/v1alpha3/hyperband_service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/suggestion/v1alpha3/hyperband_service.py b/pkg/suggestion/v1alpha3/hyperband_service.py index b4eaf25c785..7d9dbbde267 100644 --- a/pkg/suggestion/v1alpha3/hyperband_service.py +++ b/pkg/suggestion/v1alpha3/hyperband_service.py @@ -213,7 +213,7 @@ def ValidateAlgorithmSettings(self, request, context): smax = int(math.log(rl)/math.log(eta)) max_parallel = int(math.ceil(eta**smax)) - if request.experiment_spec.parallel_trial_count < max_parallel: + if request.experiment.spec.parallel_trial_count < max_parallel: return self._set_validate_context_error(context, "parallelTrialCount must be not less than %d." % max_parallel) From 79caf8bcbf7ef72a9b066c8496d4282d52d4fe2d Mon Sep 17 00:00:00 2001 From: Ce Gao Date: Tue, 24 Sep 2019 12:40:52 +0800 Subject: [PATCH 06/20] fix: Fix script Signed-off-by: Ce Gao --- test/scripts/v1alpha3/check-katib-ready.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/scripts/v1alpha3/check-katib-ready.sh b/test/scripts/v1alpha3/check-katib-ready.sh index 2e5f2ac5308..5499cec4277 100755 --- a/test/scripts/v1alpha3/check-katib-ready.sh +++ b/test/scripts/v1alpha3/check-katib-ready.sh @@ -81,7 +81,7 @@ sed -i -e "s@image: gcr.io\/kubeflow-images-public\/katib\/v1alpha3\/katib-ui@im # Suggestion algorithms sed -i -e "s@gcr.io\/kubeflow-images-public\/katib\/v1alpha3\/suggestion-nasrl@${REGISTRY}\/${REPO_NAME}\/v1alpha3\/suggestion-nasrl:${VERSION}@" manifests/v1alpha3/katib-controller/katib-config.yaml -sed -i -e "s@image: gcr.io\/kubeflow-images-public\/katib\/v1alpha3\/suggestion-hyperband@image: ${REGISTRY}\/${REPO_NAME}\/v1alpha3\/suggestion-hyperband:${VERSION}@" manifests/v1alpha3/katib-controller/katib-config.yaml +sed -i -e "s@gcr.io\/kubeflow-images-public\/katib\/v1alpha3\/suggestion-hyperband@${REGISTRY}\/${REPO_NAME}\/v1alpha3\/suggestion-hyperband:${VERSION}@" manifests/v1alpha3/katib-controller/katib-config.yaml sed -i -e "s@gcr.io\/kubeflow-images-public\/katib\/v1alpha3\/suggestion-chocolate@${REGISTRY}\/${REPO_NAME}\/v1alpha3\/suggestion-chocolate:${VERSION}@" manifests/v1alpha3/katib-controller/katib-config.yaml sed -i -e "s@gcr.io\/kubeflow-images-public\/katib\/v1alpha3\/suggestion-hyperopt@${REGISTRY}\/${REPO_NAME}\/v1alpha3\/suggestion-hyperopt:${VERSION}@" manifests/v1alpha3/katib-controller/katib-config.yaml sed -i -e "s@gcr.io\/kubeflow-images-public\/katib\/v1alpha3\/suggestion-skopt@${REGISTRY}\/${REPO_NAME}\/v1alpha3\/suggestion-skopt:${VERSION}@" manifests/v1alpha3/katib-controller/katib-config.yaml From 526ae9d42552aae645f4c0512c2135d7a421d9fa Mon Sep 17 00:00:00 2001 From: Ce Gao Date: Tue, 24 Sep 2019 14:19:40 +0800 Subject: [PATCH 07/20] fix: Fix r_l Signed-off-by: Ce Gao --- examples/v1alpha3/hyperband-example.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/v1alpha3/hyperband-example.yaml b/examples/v1alpha3/hyperband-example.yaml index 7dee8c83ad2..fa106dcf25a 100644 --- a/examples/v1alpha3/hyperband-example.yaml +++ b/examples/v1alpha3/hyperband-example.yaml @@ -19,7 +19,7 @@ spec: - name: "eta" value: "3" - name: "r_l" - value: "9" + value: "2" maxFailedTrialCount: 9 parameters: - name: --lr From 8328321482d537fd98641f64b561dd3a135a78bc Mon Sep 17 00:00:00 2001 From: Ce Gao Date: Tue, 24 Sep 2019 15:26:29 +0800 Subject: [PATCH 08/20] fix: Add parallel trial count Signed-off-by: Ce Gao --- .../suggestion/suggestionclient/suggestionclient.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pkg/controller.v1alpha3/suggestion/suggestionclient/suggestionclient.go b/pkg/controller.v1alpha3/suggestion/suggestionclient/suggestionclient.go index a7625016c35..caf68225281 100644 --- a/pkg/controller.v1alpha3/suggestion/suggestionclient/suggestionclient.go +++ b/pkg/controller.v1alpha3/suggestion/suggestionclient/suggestionclient.go @@ -151,6 +151,12 @@ func (g *General) ConvertExperiment(e *experimentsv1alpha3.Experiment) *suggesti if e.Spec.NasConfig != nil { res.Spec.NasConfig = convertNasConfig(e.Spec.NasConfig) } + if e.Spec.ParallelTrialCount != nil { + res.Spec.ParallelTrialCount = *e.Spec.ParallelTrialCount + } + if e.Spec.MaxTrialCount != nil { + res.Spec.MaxTrialCount = *e.Spec.MaxTrialCount + } return res } From 18d57a7564c6fcac06d34229450e03f5482c1463 Mon Sep 17 00:00:00 2001 From: Ce Gao Date: Tue, 24 Sep 2019 17:08:42 +0800 Subject: [PATCH 09/20] fix: Add output Signed-off-by: Ce Gao --- test/scripts/v1alpha3/run-suggestion-bayesian.sh | 1 + test/scripts/v1alpha3/run-suggestion-grid.sh | 1 + test/scripts/v1alpha3/run-suggestion-hyperband.sh | 4 +--- test/scripts/v1alpha3/run-suggestion-random.sh | 1 + 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/test/scripts/v1alpha3/run-suggestion-bayesian.sh b/test/scripts/v1alpha3/run-suggestion-bayesian.sh index 768d170b5b5..f94770b85f5 100755 --- a/test/scripts/v1alpha3/run-suggestion-bayesian.sh +++ b/test/scripts/v1alpha3/run-suggestion-bayesian.sh @@ -60,4 +60,5 @@ go run run-e2e-experiment.go ../../../examples/v1alpha3/bayesianoptimization-exa kubectl -n kubeflow describe suggestion kubectl -n kubeflow describe pods kubectl -n kubeflow delete experiment bayesianoptimization-example +kubectl describe pods exit 0 diff --git a/test/scripts/v1alpha3/run-suggestion-grid.sh b/test/scripts/v1alpha3/run-suggestion-grid.sh index 3359a852e4e..d799391af46 100755 --- a/test/scripts/v1alpha3/run-suggestion-grid.sh +++ b/test/scripts/v1alpha3/run-suggestion-grid.sh @@ -59,4 +59,5 @@ export KUBECONFIG=$HOME/.kube/config go run run-e2e-experiment.go ../../../examples/v1alpha3/grid-example.yaml kubectl -n kubeflow describe suggestion kubectl -n kubeflow delete experiment grid-example +kubectl describe pods exit 0 diff --git a/test/scripts/v1alpha3/run-suggestion-hyperband.sh b/test/scripts/v1alpha3/run-suggestion-hyperband.sh index cf9840e1fb3..7d60bf77110 100755 --- a/test/scripts/v1alpha3/run-suggestion-hyperband.sh +++ b/test/scripts/v1alpha3/run-suggestion-hyperband.sh @@ -25,9 +25,6 @@ CLUSTER_NAME="${CLUSTER_NAME}" ZONE="${GCP_ZONE}" PROJECT="${GCP_PROJECT}" NAMESPACE="${DEPLOY_NAMESPACE}" -REGISTRY="${GCP_REGISTRY}" -# VERSION=$(git describe --tags --always --dirty) -VERSION="latest" GO_DIR=${GOPATH}/src/github.com/${REPO_OWNER}/${REPO_NAME} echo "Activating service-account" @@ -64,4 +61,5 @@ export KUBECONFIG=$HOME/.kube/config go run run-e2e-experiment.go ../../../examples/v1alpha3/hyperband-example.yaml kubectl -n kubeflow describe suggestion kubectl -n kubeflow delete experiment hyperband-example +kubectl describe pods exit 0 diff --git a/test/scripts/v1alpha3/run-suggestion-random.sh b/test/scripts/v1alpha3/run-suggestion-random.sh index 215b36e7155..0fc38ab5faa 100755 --- a/test/scripts/v1alpha3/run-suggestion-random.sh +++ b/test/scripts/v1alpha3/run-suggestion-random.sh @@ -59,4 +59,5 @@ export KUBECONFIG=$HOME/.kube/config go run run-e2e-experiment.go ../../../examples/v1alpha3/random-example.yaml kubectl -n kubeflow describe suggestion kubectl -n kubeflow delete experiment random-example +kubectl describe pods exit 0 From ff85045426e393ecd6dceded2ff2121309e8255f Mon Sep 17 00:00:00 2001 From: Ce Gao Date: Wed, 25 Sep 2019 10:59:50 +0800 Subject: [PATCH 10/20] feat: Append algorithm settings Signed-off-by: Ce Gao --- .../suggestionclient/algorithm_settings.go | 53 +++++++++++++++++++ .../suggestionclient/suggestionclient.go | 14 ++++- 2 files changed, 65 insertions(+), 2 deletions(-) create mode 100644 pkg/controller.v1alpha3/suggestion/suggestionclient/algorithm_settings.go diff --git a/pkg/controller.v1alpha3/suggestion/suggestionclient/algorithm_settings.go b/pkg/controller.v1alpha3/suggestion/suggestionclient/algorithm_settings.go new file mode 100644 index 00000000000..ead1c75f0a3 --- /dev/null +++ b/pkg/controller.v1alpha3/suggestion/suggestionclient/algorithm_settings.go @@ -0,0 +1,53 @@ +package suggestionclient + +import ( + common "github.com/kubeflow/katib/pkg/apis/controller/common/v1alpha3" + experimentsv1alpha3 "github.com/kubeflow/katib/pkg/apis/controller/experiments/v1alpha3" + suggestionsv1alpha3 "github.com/kubeflow/katib/pkg/apis/controller/suggestions/v1alpha3" + suggestionapi "github.com/kubeflow/katib/pkg/apis/manager/v1alpha3" +) + +// appendAlgorithmSettingsFromSuggestion appends the algorithm settings +// in suggestion to Experiment. +// Algorithm settings in suggestion will overwrite the settings in experiment. +func appendAlgorithmSettingsFromSuggestion(experiment *experimentsv1alpha3.Experiment, algoSettingsInSuggestion *common.AlgorithmSpec) { + algoSettingsInExperiment := experiment.Spec.Algorithm + for _, setting := range algoSettingsInSuggestion.AlgorithmSettings { + if index, found := contains(algoSettingsInExperiment, setting.Name); found { + // If the setting is found in Experiment, update it. + algoSettingsInExperiment.AlgorithmSettings[index].Value = setting.Value + } else { + // If not found, append it. + algoSettingsInExperiment.AlgorithmSettings = append( + algoSettingsInExperiment.AlgorithmSettings, setting) + } + } +} + +func updateAlgorithmSettings(suggestion *suggestionsv1alpha3.Suggestion, algorithm *suggestionapi.AlgorithmSpec) { + algoSettingsInSuggestion := suggestion.Spec.AlgorithmSpec + for _, setting := range algorithm.AlgorithmSetting { + if setting != nil { + if index, found := contains(algoSettingsInSuggestion, setting.Name); found { + // If the setting is found in Suggestion, update it. + algoSettingsInSuggestion.AlgorithmSettings[index].Value = setting.Value + } else { + // If not found, append it. + algoSettingsInSuggestion.AlgorithmSettings = append(algoSettingsInSuggestion.AlgorithmSettings, common.AlgorithmSetting{ + Name: setting.Name, + Value: setting.Value, + }) + } + } + } +} + +func contains(algorithmSettings *common.AlgorithmSpec, + name string) (int, bool) { + for i, s := range algorithmSettings.AlgorithmSettings { + if s.Name == name { + return i, true + } + } + return -1, false +} diff --git a/pkg/controller.v1alpha3/suggestion/suggestionclient/suggestionclient.go b/pkg/controller.v1alpha3/suggestion/suggestionclient/suggestionclient.go index caf68225281..513b6bfad4b 100644 --- a/pkg/controller.v1alpha3/suggestion/suggestionclient/suggestionclient.go +++ b/pkg/controller.v1alpha3/suggestion/suggestionclient/suggestionclient.go @@ -25,6 +25,7 @@ var ( timeout = 60 * time.Second ) +// SuggestionClient is the interface to communicate with algorithm services. type SuggestionClient interface { SyncAssignments(instance *suggestionsv1alpha3.Suggestion, e *experimentsv1alpha3.Experiment, ts []trialsv1alpha3.Trial) error @@ -32,13 +33,16 @@ type SuggestionClient interface { ValidateAlgorithmSettings(instance *suggestionsv1alpha3.Suggestion, e *experimentsv1alpha3.Experiment) error } +// General is the implementation for SuggestionClient. type General struct { } +// New creates a new SuggestionClient. func New() SuggestionClient { return &General{} } +// SyncAssignments syncs assignments from algorithm services. func (g *General) SyncAssignments( instance *suggestionsv1alpha3.Suggestion, e *experimentsv1alpha3.Experiment, @@ -60,8 +64,13 @@ func (g *General) SyncAssignments( ctx, cancel := context.WithTimeout(context.Background(), timeout) defer cancel() + // Algorithm settings in suggestion will overwrite the settings in experiment. + filledE := e.DeepCopy() + appendAlgorithmSettingsFromSuggestion(filledE, instance.Spec.AlgorithmSpec) + experiment := g.ConvertExperiment(e) + request := &suggestionapi.GetSuggestionsRequest{ - Experiment: g.ConvertExperiment(e), + Experiment: experiment, Trials: g.ConvertTrials(ts), RequestNumber: int32(requestNum), } @@ -83,10 +92,11 @@ func (g *General) SyncAssignments( }) } - // TODO(gaocegege): Set algorithm settings + updateAlgorithmSettings(instance, response.Algorithm) return nil } +// ValidateAlgorithmSettings validates if the algorithm specific configurations are valid. func (g *General) ValidateAlgorithmSettings(instance *suggestionsv1alpha3.Suggestion, e *experimentsv1alpha3.Experiment) error { logger := log.WithValues("Suggestion", types.NamespacedName{Name: instance.GetName(), Namespace: instance.GetNamespace()}) endpoint := fmt.Sprintf("%s:%d", instance.Name, consts.DefaultSuggestionPort) From 102e1cfb0b677289ff7f7c0c574d1bc0ae157299 Mon Sep 17 00:00:00 2001 From: Ce Gao Date: Wed, 25 Sep 2019 11:10:29 +0800 Subject: [PATCH 11/20] fix: Add output Signed-off-by: Ce Gao --- test/scripts/v1alpha3/run-suggestion-bayesian.sh | 1 + test/scripts/v1alpha3/run-suggestion-grid.sh | 1 + test/scripts/v1alpha3/run-suggestion-hyperband.sh | 1 + test/scripts/v1alpha3/run-suggestion-random.sh | 1 + 4 files changed, 4 insertions(+) diff --git a/test/scripts/v1alpha3/run-suggestion-bayesian.sh b/test/scripts/v1alpha3/run-suggestion-bayesian.sh index f94770b85f5..9116dc152f6 100755 --- a/test/scripts/v1alpha3/run-suggestion-bayesian.sh +++ b/test/scripts/v1alpha3/run-suggestion-bayesian.sh @@ -61,4 +61,5 @@ kubectl -n kubeflow describe suggestion kubectl -n kubeflow describe pods kubectl -n kubeflow delete experiment bayesianoptimization-example kubectl describe pods +kubectl describe deploy exit 0 diff --git a/test/scripts/v1alpha3/run-suggestion-grid.sh b/test/scripts/v1alpha3/run-suggestion-grid.sh index d799391af46..d2538a79fd7 100755 --- a/test/scripts/v1alpha3/run-suggestion-grid.sh +++ b/test/scripts/v1alpha3/run-suggestion-grid.sh @@ -60,4 +60,5 @@ go run run-e2e-experiment.go ../../../examples/v1alpha3/grid-example.yaml kubectl -n kubeflow describe suggestion kubectl -n kubeflow delete experiment grid-example kubectl describe pods +kubectl describe deploy exit 0 diff --git a/test/scripts/v1alpha3/run-suggestion-hyperband.sh b/test/scripts/v1alpha3/run-suggestion-hyperband.sh index 7d60bf77110..c2f96191282 100755 --- a/test/scripts/v1alpha3/run-suggestion-hyperband.sh +++ b/test/scripts/v1alpha3/run-suggestion-hyperband.sh @@ -62,4 +62,5 @@ go run run-e2e-experiment.go ../../../examples/v1alpha3/hyperband-example.yaml kubectl -n kubeflow describe suggestion kubectl -n kubeflow delete experiment hyperband-example kubectl describe pods +kubectl describe deploy exit 0 diff --git a/test/scripts/v1alpha3/run-suggestion-random.sh b/test/scripts/v1alpha3/run-suggestion-random.sh index 0fc38ab5faa..097f2dae842 100755 --- a/test/scripts/v1alpha3/run-suggestion-random.sh +++ b/test/scripts/v1alpha3/run-suggestion-random.sh @@ -60,4 +60,5 @@ go run run-e2e-experiment.go ../../../examples/v1alpha3/random-example.yaml kubectl -n kubeflow describe suggestion kubectl -n kubeflow delete experiment random-example kubectl describe pods +kubectl describe deploy exit 0 From 675471437f60d7e6def61b7371800afe77e84656 Mon Sep 17 00:00:00 2001 From: Ce Gao Date: Wed, 25 Sep 2019 13:03:12 +0800 Subject: [PATCH 12/20] fix: Fix useless variable Signed-off-by: Ce Gao --- .../suggestion/suggestionclient/suggestionclient.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pkg/controller.v1alpha3/suggestion/suggestionclient/suggestionclient.go b/pkg/controller.v1alpha3/suggestion/suggestionclient/suggestionclient.go index 513b6bfad4b..c2b2547d28f 100644 --- a/pkg/controller.v1alpha3/suggestion/suggestionclient/suggestionclient.go +++ b/pkg/controller.v1alpha3/suggestion/suggestionclient/suggestionclient.go @@ -67,10 +67,9 @@ func (g *General) SyncAssignments( // Algorithm settings in suggestion will overwrite the settings in experiment. filledE := e.DeepCopy() appendAlgorithmSettingsFromSuggestion(filledE, instance.Spec.AlgorithmSpec) - experiment := g.ConvertExperiment(e) request := &suggestionapi.GetSuggestionsRequest{ - Experiment: experiment, + Experiment: g.ConvertExperiment(filledE), Trials: g.ConvertTrials(ts), RequestNumber: int32(requestNum), } From 1fb71aaa46c6b69940de55ad618be7d32134f280 Mon Sep 17 00:00:00 2001 From: Ce Gao Date: Wed, 25 Sep 2019 14:51:58 +0800 Subject: [PATCH 13/20] fix: Use resource_name instead of ResourceName Signed-off-by: Ce Gao --- examples/v1alpha3/hyperband-example.yaml | 4 ++-- pkg/suggestion/v1alpha3/hyperband_service.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/v1alpha3/hyperband-example.yaml b/examples/v1alpha3/hyperband-example.yaml index fa106dcf25a..87efb5021d4 100644 --- a/examples/v1alpha3/hyperband-example.yaml +++ b/examples/v1alpha3/hyperband-example.yaml @@ -14,7 +14,7 @@ spec: algorithm: algorithmName: hyperband algorithmSettings: - - name: "resourceName" + - name: "resource_name" value: "--num-epochs" - name: "eta" value: "3" @@ -41,7 +41,7 @@ spec: - ftrl - name: --num-epochs parametertype: int - feasible: + feasibleSpace: min: "20" max: "20" trialTemplate: diff --git a/pkg/suggestion/v1alpha3/hyperband_service.py b/pkg/suggestion/v1alpha3/hyperband_service.py index 7d9dbbde267..32f0754dfb4 100644 --- a/pkg/suggestion/v1alpha3/hyperband_service.py +++ b/pkg/suggestion/v1alpha3/hyperband_service.py @@ -194,8 +194,8 @@ def ValidateAlgorithmSettings(self, request, context): setting_dict = {} for setting in settings: setting_dict[setting.name] = setting.value - if "r_l" not in setting_dict or "resourceName" not in setting_dict: - return self._set_validate_context_error(context, "r_l and resourceName must be set.") + if "r_l" not in setting_dict or "resource_name" not in setting_dict: + return self._set_validate_context_error(context, "r_l and resource_name must be set.") try: rl = float(setting_dict["r_l"]) except: @@ -219,12 +219,12 @@ def ValidateAlgorithmSettings(self, request, context): valid_resourceName = False for param in params: - if param.name == setting_dict["resourceName"]: + if param.name == setting_dict["resource_name"]: valid_resourceName = True break if not valid_resourceName: return self._set_validate_context_error(context, - "value of resourceName setting must be in parameters.") + "value of resource_name setting must be in parameters.") return api_pb2.ValidateAlgorithmSettingsReply() From f3b1ea925d128b41bfd599a25d05fc894fa25abe Mon Sep 17 00:00:00 2001 From: Ce Gao Date: Wed, 25 Sep 2019 15:17:06 +0800 Subject: [PATCH 14/20] fix: Update Signed-off-by: Ce Gao --- examples/v1alpha3/hyperband-example.yaml | 2 +- manifests/v1alpha3/katib-controller/rbac.yaml | 1 + .../suggestion/suggestion_controller.go | 3 +++ .../suggestion/suggestion_controller_status.go | 9 +++++++++ test/e2e/v1alpha3/run-e2e-experiment.go | 12 ++++++++---- 5 files changed, 22 insertions(+), 5 deletions(-) diff --git a/examples/v1alpha3/hyperband-example.yaml b/examples/v1alpha3/hyperband-example.yaml index 87efb5021d4..46294f55da3 100644 --- a/examples/v1alpha3/hyperband-example.yaml +++ b/examples/v1alpha3/hyperband-example.yaml @@ -19,7 +19,7 @@ spec: - name: "eta" value: "3" - name: "r_l" - value: "2" + value: "9" maxFailedTrialCount: 9 parameters: - name: --lr diff --git a/manifests/v1alpha3/katib-controller/rbac.yaml b/manifests/v1alpha3/katib-controller/rbac.yaml index 7e55d84c875..f2bf88a3ba2 100644 --- a/manifests/v1alpha3/katib-controller/rbac.yaml +++ b/manifests/v1alpha3/katib-controller/rbac.yaml @@ -10,6 +10,7 @@ rules: - serviceaccounts - services - secrets + - events verbs: - "*" - apiGroups: diff --git a/pkg/controller.v1alpha3/suggestion/suggestion_controller.go b/pkg/controller.v1alpha3/suggestion/suggestion_controller.go index 571d8103bde..2d5fd35e5f4 100644 --- a/pkg/controller.v1alpha3/suggestion/suggestion_controller.go +++ b/pkg/controller.v1alpha3/suggestion/suggestion_controller.go @@ -154,6 +154,9 @@ func (r *ReconcileSuggestion) Reconcile(request reconcile.Request) (reconcile.Re } } + if err := r.updateSpec(instance, oldS); err != nil { + return reconcile.Result{}, err + } if err := r.updateStatus(instance, oldS); err != nil { return reconcile.Result{}, err } diff --git a/pkg/controller.v1alpha3/suggestion/suggestion_controller_status.go b/pkg/controller.v1alpha3/suggestion/suggestion_controller_status.go index eea85e5ed75..320ea16c807 100644 --- a/pkg/controller.v1alpha3/suggestion/suggestion_controller_status.go +++ b/pkg/controller.v1alpha3/suggestion/suggestion_controller_status.go @@ -17,6 +17,15 @@ const ( SuggestionKilledReason = "SuggestionKilled" ) +func (r *ReconcileSuggestion) updateSpec(s *suggestionsv1alpha3.Suggestion, oldS *suggestionsv1alpha3.Suggestion) error { + if !equality.Semantic.DeepEqual(s.Spec, oldS.Spec) { + if err := r.Update(context.TODO(), s); err != nil { + return err + } + } + return nil +} + func (r *ReconcileSuggestion) updateStatus(s *suggestionsv1alpha3.Suggestion, oldS *suggestionsv1alpha3.Suggestion) error { if !equality.Semantic.DeepEqual(s.Status, oldS.Status) { if err := r.Status().Update(context.TODO(), s); err != nil { diff --git a/test/e2e/v1alpha3/run-e2e-experiment.go b/test/e2e/v1alpha3/run-e2e-experiment.go index 7bd9d9dc3ca..eadc084b98b 100644 --- a/test/e2e/v1alpha3/run-e2e-experiment.go +++ b/test/e2e/v1alpha3/run-e2e-experiment.go @@ -54,10 +54,14 @@ func main() { if err != nil { log.Fatal("NewClient for Katib failed: ", err) } - var maxtrials int32 = 3 - var paralleltrials int32 = 2 - exp.Spec.MaxTrialCount = &maxtrials - exp.Spec.ParallelTrialCount = ¶lleltrials + if exp.Spec.Algorithm.AlgorithmName != "hyperband" { + // Hyperband will validate the parallel trial count, + // thus we should not change it. + var maxtrials int32 = 3 + var paralleltrials int32 = 2 + exp.Spec.MaxTrialCount = &maxtrials + exp.Spec.ParallelTrialCount = ¶lleltrials + } err = kclient.CreateExperiment(exp) if err != nil { log.Fatal("CreateExperiment from YAML failed: ", err) From ccdc296efb33545eebf595b22d86f67cc7b90c4f Mon Sep 17 00:00:00 2001 From: Ce Gao Date: Wed, 25 Sep 2019 15:30:57 +0800 Subject: [PATCH 15/20] fix: Avoid nil pointer exception Signed-off-by: Ce Gao --- .../suggestion/suggestionclient/suggestionclient.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pkg/controller.v1alpha3/suggestion/suggestionclient/suggestionclient.go b/pkg/controller.v1alpha3/suggestion/suggestionclient/suggestionclient.go index c2b2547d28f..ab38d5e6677 100644 --- a/pkg/controller.v1alpha3/suggestion/suggestionclient/suggestionclient.go +++ b/pkg/controller.v1alpha3/suggestion/suggestionclient/suggestionclient.go @@ -91,7 +91,9 @@ func (g *General) SyncAssignments( }) } - updateAlgorithmSettings(instance, response.Algorithm) + if response.Algorithm != nil { + updateAlgorithmSettings(instance, response.Algorithm) + } return nil } From 9421e117a96803d81d88e1ecef0a92cd08214cb0 Mon Sep 17 00:00:00 2001 From: Ce Gao Date: Wed, 25 Sep 2019 16:13:15 +0800 Subject: [PATCH 16/20] feat: Move algorithm to status Signed-off-by: Ce Gao --- .../suggestions/v1alpha3/suggestion_types.go | 7 ++++--- .../suggestions/v1alpha3/zz_generated.deepcopy.go | 12 ++++++------ .../experiment/suggestion/suggestion.go | 2 +- .../suggestion/composer/composer.go | 2 +- .../suggestion/suggestion_controller.go | 3 --- .../suggestionclient/algorithm_settings.go | 5 ++++- .../suggestion/suggestionclient/suggestionclient.go | 4 +++- 7 files changed, 19 insertions(+), 16 deletions(-) diff --git a/pkg/apis/controller/suggestions/v1alpha3/suggestion_types.go b/pkg/apis/controller/suggestions/v1alpha3/suggestion_types.go index 857ca04c401..7c3ae0c439d 100644 --- a/pkg/apis/controller/suggestions/v1alpha3/suggestion_types.go +++ b/pkg/apis/controller/suggestions/v1alpha3/suggestion_types.go @@ -25,15 +25,16 @@ import ( // SuggestionSpec defines the desired state of Suggestion type SuggestionSpec struct { + AlgorithmName string `json:"algorithmName"` // Number of suggestions requested Requests int32 `json:"requests,omitempty"` - - //Algorithm settings set by the user in the experiment config - AlgorithmSpec *common.AlgorithmSpec `json:"algorithmSpec,omitempty"` } // SuggestionStatus defines the observed state of Suggestion type SuggestionStatus struct { + //Algorithm settings set by the user in the experiment config + Algorithm *common.AlgorithmSpec `json:"algorithm,omitempty"` + // Suggestion results Suggestions []TrialAssignment `json:"suggestions,omitempty"` diff --git a/pkg/apis/controller/suggestions/v1alpha3/zz_generated.deepcopy.go b/pkg/apis/controller/suggestions/v1alpha3/zz_generated.deepcopy.go index 256ba5e4c66..f0e103d3075 100644 --- a/pkg/apis/controller/suggestions/v1alpha3/zz_generated.deepcopy.go +++ b/pkg/apis/controller/suggestions/v1alpha3/zz_generated.deepcopy.go @@ -29,7 +29,7 @@ func (in *Suggestion) DeepCopyInto(out *Suggestion) { *out = *in out.TypeMeta = in.TypeMeta in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) - in.Spec.DeepCopyInto(&out.Spec) + out.Spec = in.Spec in.Status.DeepCopyInto(&out.Status) return } @@ -106,11 +106,6 @@ func (in *SuggestionList) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *SuggestionSpec) DeepCopyInto(out *SuggestionSpec) { *out = *in - if in.AlgorithmSpec != nil { - in, out := &in.AlgorithmSpec, &out.AlgorithmSpec - *out = new(commonv1alpha3.AlgorithmSpec) - (*in).DeepCopyInto(*out) - } return } @@ -127,6 +122,11 @@ func (in *SuggestionSpec) DeepCopy() *SuggestionSpec { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *SuggestionStatus) DeepCopyInto(out *SuggestionStatus) { *out = *in + if in.Algorithm != nil { + in, out := &in.Algorithm, &out.Algorithm + *out = new(commonv1alpha3.AlgorithmSpec) + (*in).DeepCopyInto(*out) + } if in.Suggestions != nil { in, out := &in.Suggestions, &out.Suggestions *out = make([]TrialAssignment, len(*in)) diff --git a/pkg/controller.v1alpha3/experiment/suggestion/suggestion.go b/pkg/controller.v1alpha3/experiment/suggestion/suggestion.go index 44d38545c70..2540719fa50 100644 --- a/pkg/controller.v1alpha3/experiment/suggestion/suggestion.go +++ b/pkg/controller.v1alpha3/experiment/suggestion/suggestion.go @@ -61,7 +61,7 @@ func (g *General) createSuggestion(instance *experimentsv1alpha3.Experiment, sug Namespace: instance.Namespace, }, Spec: suggestionsv1alpha3.SuggestionSpec{ - AlgorithmSpec: instance.Spec.Algorithm, + AlgorithmName: instance.Spec.Algorithm.AlgorithmName, Requests: suggestionRequests, }, } diff --git a/pkg/controller.v1alpha3/suggestion/composer/composer.go b/pkg/controller.v1alpha3/suggestion/composer/composer.go index 2a252ed0359..50cd66e2f01 100644 --- a/pkg/controller.v1alpha3/suggestion/composer/composer.go +++ b/pkg/controller.v1alpha3/suggestion/composer/composer.go @@ -109,7 +109,7 @@ func (g *General) DesiredService(s *suggestionsv1alpha3.Suggestion) (*corev1.Ser } func (g *General) desiredContainer(s *suggestionsv1alpha3.Suggestion) (*corev1.Container, error) { - suggestionContainerImage, err := g.getSuggestionContainerImage(s.Spec.AlgorithmSpec.AlgorithmName) + suggestionContainerImage, err := g.getSuggestionContainerImage(s.Spec.AlgorithmName) if err != nil { return nil, err } diff --git a/pkg/controller.v1alpha3/suggestion/suggestion_controller.go b/pkg/controller.v1alpha3/suggestion/suggestion_controller.go index 2d5fd35e5f4..571d8103bde 100644 --- a/pkg/controller.v1alpha3/suggestion/suggestion_controller.go +++ b/pkg/controller.v1alpha3/suggestion/suggestion_controller.go @@ -154,9 +154,6 @@ func (r *ReconcileSuggestion) Reconcile(request reconcile.Request) (reconcile.Re } } - if err := r.updateSpec(instance, oldS); err != nil { - return reconcile.Result{}, err - } if err := r.updateStatus(instance, oldS); err != nil { return reconcile.Result{}, err } diff --git a/pkg/controller.v1alpha3/suggestion/suggestionclient/algorithm_settings.go b/pkg/controller.v1alpha3/suggestion/suggestionclient/algorithm_settings.go index ead1c75f0a3..4b124f2fe60 100644 --- a/pkg/controller.v1alpha3/suggestion/suggestionclient/algorithm_settings.go +++ b/pkg/controller.v1alpha3/suggestion/suggestionclient/algorithm_settings.go @@ -25,7 +25,10 @@ func appendAlgorithmSettingsFromSuggestion(experiment *experimentsv1alpha3.Exper } func updateAlgorithmSettings(suggestion *suggestionsv1alpha3.Suggestion, algorithm *suggestionapi.AlgorithmSpec) { - algoSettingsInSuggestion := suggestion.Spec.AlgorithmSpec + if suggestion.Status.Algorithm == nil { + suggestion.Status.Algorithm = &common.AlgorithmSpec{} + } + algoSettingsInSuggestion := suggestion.Status.Algorithm for _, setting := range algorithm.AlgorithmSetting { if setting != nil { if index, found := contains(algoSettingsInSuggestion, setting.Name); found { diff --git a/pkg/controller.v1alpha3/suggestion/suggestionclient/suggestionclient.go b/pkg/controller.v1alpha3/suggestion/suggestionclient/suggestionclient.go index ab38d5e6677..b9389839ad0 100644 --- a/pkg/controller.v1alpha3/suggestion/suggestionclient/suggestionclient.go +++ b/pkg/controller.v1alpha3/suggestion/suggestionclient/suggestionclient.go @@ -66,7 +66,9 @@ func (g *General) SyncAssignments( // Algorithm settings in suggestion will overwrite the settings in experiment. filledE := e.DeepCopy() - appendAlgorithmSettingsFromSuggestion(filledE, instance.Spec.AlgorithmSpec) + if instance.Status.Algorithm != nil { + appendAlgorithmSettingsFromSuggestion(filledE, instance.Status.Algorithm) + } request := &suggestionapi.GetSuggestionsRequest{ Experiment: g.ConvertExperiment(filledE), From 751c7396edaf0019ec8899417e6c58ceb16f2eea Mon Sep 17 00:00:00 2001 From: Ce Gao Date: Wed, 25 Sep 2019 16:24:07 +0800 Subject: [PATCH 17/20] fix: Add max Signed-off-by: Ce Gao --- examples/v1alpha3/hyperband-example.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/v1alpha3/hyperband-example.yaml b/examples/v1alpha3/hyperband-example.yaml index 46294f55da3..ea9958a61db 100644 --- a/examples/v1alpha3/hyperband-example.yaml +++ b/examples/v1alpha3/hyperband-example.yaml @@ -5,6 +5,7 @@ metadata: name: hyperband-example spec: parallelTrialCount: 9 + maxTrialCount: 9 objective: type: maximize goal: 0.99 From bbe5087aa2b5e1314cf1ba731891630b41638774 Mon Sep 17 00:00:00 2001 From: Ce Gao Date: Wed, 25 Sep 2019 16:31:34 +0800 Subject: [PATCH 18/20] fix: Use algorithm settings Signed-off-by: Ce Gao --- .../suggestions/v1alpha3/suggestion_types.go | 4 ++-- .../v1alpha3/zz_generated.deepcopy.go | 8 +++---- .../suggestionclient/algorithm_settings.go | 21 ++++++++----------- .../suggestionclient/suggestionclient.go | 5 ++--- 4 files changed, 17 insertions(+), 21 deletions(-) diff --git a/pkg/apis/controller/suggestions/v1alpha3/suggestion_types.go b/pkg/apis/controller/suggestions/v1alpha3/suggestion_types.go index 7c3ae0c439d..20c3a0c0513 100644 --- a/pkg/apis/controller/suggestions/v1alpha3/suggestion_types.go +++ b/pkg/apis/controller/suggestions/v1alpha3/suggestion_types.go @@ -32,8 +32,8 @@ type SuggestionSpec struct { // SuggestionStatus defines the observed state of Suggestion type SuggestionStatus struct { - //Algorithm settings set by the user in the experiment config - Algorithm *common.AlgorithmSpec `json:"algorithm,omitempty"` + // Algorithmsettings set by the algorithm services. + AlgorithmSettings []common.AlgorithmSetting `json:"algorithmSettings,omitempty"` // Suggestion results Suggestions []TrialAssignment `json:"suggestions,omitempty"` diff --git a/pkg/apis/controller/suggestions/v1alpha3/zz_generated.deepcopy.go b/pkg/apis/controller/suggestions/v1alpha3/zz_generated.deepcopy.go index f0e103d3075..57faf970554 100644 --- a/pkg/apis/controller/suggestions/v1alpha3/zz_generated.deepcopy.go +++ b/pkg/apis/controller/suggestions/v1alpha3/zz_generated.deepcopy.go @@ -122,10 +122,10 @@ func (in *SuggestionSpec) DeepCopy() *SuggestionSpec { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *SuggestionStatus) DeepCopyInto(out *SuggestionStatus) { *out = *in - if in.Algorithm != nil { - in, out := &in.Algorithm, &out.Algorithm - *out = new(commonv1alpha3.AlgorithmSpec) - (*in).DeepCopyInto(*out) + if in.AlgorithmSettings != nil { + in, out := &in.AlgorithmSettings, &out.AlgorithmSettings + *out = make([]commonv1alpha3.AlgorithmSetting, len(*in)) + copy(*out, *in) } if in.Suggestions != nil { in, out := &in.Suggestions, &out.Suggestions diff --git a/pkg/controller.v1alpha3/suggestion/suggestionclient/algorithm_settings.go b/pkg/controller.v1alpha3/suggestion/suggestionclient/algorithm_settings.go index 4b124f2fe60..aa782b06fe1 100644 --- a/pkg/controller.v1alpha3/suggestion/suggestionclient/algorithm_settings.go +++ b/pkg/controller.v1alpha3/suggestion/suggestionclient/algorithm_settings.go @@ -10,10 +10,11 @@ import ( // appendAlgorithmSettingsFromSuggestion appends the algorithm settings // in suggestion to Experiment. // Algorithm settings in suggestion will overwrite the settings in experiment. -func appendAlgorithmSettingsFromSuggestion(experiment *experimentsv1alpha3.Experiment, algoSettingsInSuggestion *common.AlgorithmSpec) { +func appendAlgorithmSettingsFromSuggestion(experiment *experimentsv1alpha3.Experiment, algoSettingsInSuggestion []common.AlgorithmSetting) { algoSettingsInExperiment := experiment.Spec.Algorithm - for _, setting := range algoSettingsInSuggestion.AlgorithmSettings { - if index, found := contains(algoSettingsInExperiment, setting.Name); found { + for _, setting := range algoSettingsInSuggestion { + if index, found := contains( + algoSettingsInExperiment.AlgorithmSettings, setting.Name); found { // If the setting is found in Experiment, update it. algoSettingsInExperiment.AlgorithmSettings[index].Value = setting.Value } else { @@ -25,18 +26,14 @@ func appendAlgorithmSettingsFromSuggestion(experiment *experimentsv1alpha3.Exper } func updateAlgorithmSettings(suggestion *suggestionsv1alpha3.Suggestion, algorithm *suggestionapi.AlgorithmSpec) { - if suggestion.Status.Algorithm == nil { - suggestion.Status.Algorithm = &common.AlgorithmSpec{} - } - algoSettingsInSuggestion := suggestion.Status.Algorithm for _, setting := range algorithm.AlgorithmSetting { if setting != nil { - if index, found := contains(algoSettingsInSuggestion, setting.Name); found { + if index, found := contains(suggestion.Status.AlgorithmSettings, setting.Name); found { // If the setting is found in Suggestion, update it. - algoSettingsInSuggestion.AlgorithmSettings[index].Value = setting.Value + suggestion.Status.AlgorithmSettings[index].Value = setting.Value } else { // If not found, append it. - algoSettingsInSuggestion.AlgorithmSettings = append(algoSettingsInSuggestion.AlgorithmSettings, common.AlgorithmSetting{ + suggestion.Status.AlgorithmSettings = append(suggestion.Status.AlgorithmSettings, common.AlgorithmSetting{ Name: setting.Name, Value: setting.Value, }) @@ -45,9 +42,9 @@ func updateAlgorithmSettings(suggestion *suggestionsv1alpha3.Suggestion, algorit } } -func contains(algorithmSettings *common.AlgorithmSpec, +func contains(algorithmSettings []common.AlgorithmSetting, name string) (int, bool) { - for i, s := range algorithmSettings.AlgorithmSettings { + for i, s := range algorithmSettings { if s.Name == name { return i, true } diff --git a/pkg/controller.v1alpha3/suggestion/suggestionclient/suggestionclient.go b/pkg/controller.v1alpha3/suggestion/suggestionclient/suggestionclient.go index b9389839ad0..1eae2e71d42 100644 --- a/pkg/controller.v1alpha3/suggestion/suggestionclient/suggestionclient.go +++ b/pkg/controller.v1alpha3/suggestion/suggestionclient/suggestionclient.go @@ -66,9 +66,8 @@ func (g *General) SyncAssignments( // Algorithm settings in suggestion will overwrite the settings in experiment. filledE := e.DeepCopy() - if instance.Status.Algorithm != nil { - appendAlgorithmSettingsFromSuggestion(filledE, instance.Status.Algorithm) - } + appendAlgorithmSettingsFromSuggestion(filledE, + instance.Status.AlgorithmSettings) request := &suggestionapi.GetSuggestionsRequest{ Experiment: g.ConvertExperiment(filledE), From 38ed84edaff3d5538a444e526754c44de0f6d173 Mon Sep 17 00:00:00 2001 From: Ce Gao Date: Wed, 25 Sep 2019 16:39:45 +0800 Subject: [PATCH 19/20] fix: Remove updateSpec Signed-off-by: Ce Gao --- .../suggestion/suggestion_controller_status.go | 9 --------- 1 file changed, 9 deletions(-) diff --git a/pkg/controller.v1alpha3/suggestion/suggestion_controller_status.go b/pkg/controller.v1alpha3/suggestion/suggestion_controller_status.go index 320ea16c807..eea85e5ed75 100644 --- a/pkg/controller.v1alpha3/suggestion/suggestion_controller_status.go +++ b/pkg/controller.v1alpha3/suggestion/suggestion_controller_status.go @@ -17,15 +17,6 @@ const ( SuggestionKilledReason = "SuggestionKilled" ) -func (r *ReconcileSuggestion) updateSpec(s *suggestionsv1alpha3.Suggestion, oldS *suggestionsv1alpha3.Suggestion) error { - if !equality.Semantic.DeepEqual(s.Spec, oldS.Spec) { - if err := r.Update(context.TODO(), s); err != nil { - return err - } - } - return nil -} - func (r *ReconcileSuggestion) updateStatus(s *suggestionsv1alpha3.Suggestion, oldS *suggestionsv1alpha3.Suggestion) error { if !equality.Semantic.DeepEqual(s.Status, oldS.Status) { if err := r.Status().Update(context.TODO(), s); err != nil { From 78252f018ae0eda90a5b252f55c0b4e8d8ecd559 Mon Sep 17 00:00:00 2001 From: Ce Gao Date: Wed, 25 Sep 2019 16:41:32 +0800 Subject: [PATCH 20/20] fix: Fix test Signed-off-by: Ce Gao --- .../suggestion/suggestion_controller_test.go | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/pkg/controller.v1alpha3/suggestion/suggestion_controller_test.go b/pkg/controller.v1alpha3/suggestion/suggestion_controller_test.go index d6353abf17f..78cf0287422 100644 --- a/pkg/controller.v1alpha3/suggestion/suggestion_controller_test.go +++ b/pkg/controller.v1alpha3/suggestion/suggestion_controller_test.go @@ -33,7 +33,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/reconcile" logf "sigs.k8s.io/controller-runtime/pkg/runtime/log" - commonv1alpha3 "github.com/kubeflow/katib/pkg/apis/controller/common/v1alpha3" suggestionsv1alpha3 "github.com/kubeflow/katib/pkg/apis/controller/suggestions/v1alpha3" ) @@ -56,10 +55,8 @@ func TestReconcile(t *testing.T) { Namespace: "default", }, Spec: suggestionsv1alpha3.SuggestionSpec{ - Requests: 1, - AlgorithmSpec: &commonv1alpha3.AlgorithmSpec{ - AlgorithmName: "random", - }, + Requests: 1, + AlgorithmName: "random", }, } configMap := newKatibConfigMapInstance()