kubeflow · k8s-ci-robot · Sep 22, 2019 · Sep 20, 2019 · Sep 21, 2019 · Sep 21, 2019
diff --git a/cmd/suggestion/chocolate/v1alpha3/Dockerfile b/cmd/suggestion/chocolate/v1alpha3/Dockerfile
@@ -0,0 +1,18 @@
+FROM python:3
+
+RUN if [ "$(uname -m)" = "ppc64le" ]; then \
+        apt-get -y update && \
+        apt-get -y install gfortran libopenblas-dev liblapack-dev && \
+        pip install cython; \
+    fi
+RUN GRPC_HEALTH_PROBE_VERSION=v0.3.0 && \
+    wget -qO/bin/grpc_health_probe https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/${GRPC_HEALTH_PROBE_VERSION}/grpc_health_probe-linux-amd64 && \
+    chmod +x /bin/grpc_health_probe
+
+ADD . /usr/src/app/github.com/kubeflow/katib
+WORKDIR /usr/src/app/github.com/kubeflow/katib/cmd/suggestion/chocolate/v1alpha3
+RUN pip install --no-cache-dir -r requirements.txt
+
+ENV PYTHONPATH /usr/src/app/github.com/kubeflow/katib:/usr/src/app/github.com/kubeflow/katib/pkg/apis/manager/v1alpha3/python:/usr/src/app/github.com/kubeflow/katib/pkg/apis/manager/health/python
+
+ENTRYPOINT ["python", "main.py"]
diff --git a/cmd/suggestion/chocolate/v1alpha3/__init__.py b/cmd/suggestion/chocolate/v1alpha3/__init__.py
diff --git a/cmd/suggestion/chocolate/v1alpha3/main.py b/cmd/suggestion/chocolate/v1alpha3/main.py
@@ -0,0 +1,26 @@
+import grpc
+import time
+from pkg.apis.manager.v1alpha3.python import api_pb2_grpc
+from pkg.apis.manager.health.python import health_pb2_grpc
+from pkg.suggestion.v1alpha3.chocolate_service import ChocolateService
+from concurrent import futures
+
+_ONE_DAY_IN_SECONDS = 60 * 60 * 24
+DEFAULT_PORT = "0.0.0.0:6789"
+
+def serve():
+    server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
+    service = ChocolateService()
+    api_pb2_grpc.add_SuggestionServicer_to_server(service, server)
+    health_pb2_grpc.add_HealthServicer_to_server(service, server)
+    server.add_insecure_port(DEFAULT_PORT)
+    print("Listening...")
+    server.start()
+    try:
+        while True:
+            time.sleep(_ONE_DAY_IN_SECONDS)
+    except KeyboardInterrupt:
+        server.stop(0)
+
+if __name__ == "__main__":
+    serve()
diff --git a/cmd/suggestion/chocolate/v1alpha3/requirements.txt b/cmd/suggestion/chocolate/v1alpha3/requirements.txt
@@ -0,0 +1,11 @@
+grpcio==1.23.0
+duecredit===0.7.0
+cloudpickle==0.5.6
+numpy>=1.13.3
+scikit-learn>=0.19.0
+scipy>=0.19.1
+forestci==0.3
+protobuf==3.9.1
+googleapis-common-protos==1.6.0
+SQLAlchemy==1.3.8
+git+https://github.com/AIworx-Labs/chocolate@master
diff --git a/examples/v1alpha3/chocolate-grid-example.yaml b/examples/v1alpha3/chocolate-grid-example.yaml
@@ -0,0 +1,63 @@
+apiVersion: "kubeflow.org/v1alpha3"
+kind: Experiment
+metadata:
+  namespace: kubeflow
+  labels:
+    controller-tools.k8s.io: "1.0"
+  name: chocolate-grid-example
+spec:
+  objective:
+    type: maximize
+    goal: 0.99
+    objectiveMetricName: Validation-accuracy
+    additionalMetricNames:
+      - accuracy
+  algorithm:
+    algorithmName: chocolate-grid
+  parallelTrialCount: 3
+  maxTrialCount: 12
+  maxFailedTrialCount: 3
+  parameters:
+    - name: --lr
+      parameterType: double
+      feasibleSpace:
+        min: "0.001"
+        max: "0.01"
+        step: "0.001"
+    - name: --num-layers
+      parameterType: int
+      feasibleSpace:
+        min: "2"
+        max: "5"
+    - name: --optimizer
+      parameterType: categorical
+      feasibleSpace:
+        list:
+        - sgd
+        - adam
+        - ftrl
+  trialTemplate:
+    goTemplate:
+        rawTemplate: |-
+          apiVersion: batch/v1
+          kind: Job
+          metadata:
+            name: {{.Trial}}
+            namespace: {{.NameSpace}}
+          spec:
+            template:
+              spec:
+                serviceAccountName: metrics-collector # will be dropped
+                containers:
+                - name: {{.Trial}}
+                  image: docker.io/katib/mxnet-mnist-example
+                  command:
+                  - "python"
+                  - "/mxnet/example/image-classification/train_mnist.py"
+                  - "--batch-size=64"
+                  {{- with .HyperParameters}}
+                  {{- range .}}
+                  - "{{.Name}}={{.Value}}"
+                  {{- end}}
+                  {{- end}}
+                restartPolicy: Never
diff --git a/manifests/v1alpha3/katib-controller/katib-config.yaml b/manifests/v1alpha3/katib-controller/katib-config.yaml
@@ -33,6 +33,9 @@ data:
       "nasrl": {
         "image": "gcr.io/kubeflow-images-public/katib/v1alpha3/suggestion-nasrl"
       },
+      "chocolate-grid": {
+        "image": "gcr.io/kubeflow-images-public/katib/v1alpha3/suggestion-chocolate"
+      },
       "hyperopt-tpe": {
         "image": "gcr.io/kubeflow-images-public/katib/v1alpha3/suggestion-hyperopt"
       },

diff --git a/pkg/suggestion/v1alpha3/chocolate/base_chocolate_service.py b/pkg/suggestion/v1alpha3/chocolate/base_chocolate_service.py
@@ -0,0 +1,120 @@
+import json
+import numpy as np
+import chocolate as choco
+import logging
+import base64
+
+from pkg.suggestion.v1alpha3.internal.search_space import *
+from pkg.suggestion.v1alpha3.internal.trial import *
+
+logger = logging.getLogger("BaseChocolateService")
+
+
+class BaseChocolateService(object):
+    """
+    Refer to https://chocolate.readthedocs.io/
+    """
+
+    def __init__(self, algorithm_name=""):
+        self.algorithm_name = algorithm_name
+
+    def getSuggestions(self, search_space, trials, request_number):
+        """
+        Get the new suggested trials with chocolate algorithm.
+        """
+
+        # Example: {"x" : choco.uniform(-6, 6), "y" : choco.uniform(-6, 6)}
+        chocolate_search_space = {}
+
+        for param in search_space.params:
+            key = BaseChocolateService.encode(param.name)
+            if param.type == INTEGER:
+                chocolate_search_space[key] = choco.quantized_uniform(
+                    int(param.min), int(param.max), 1)
+            elif param.type == DOUBLE:
+                if param.step != None:
+                    chocolate_search_space[key] = choco.quantized_uniform(
+                        float(param.min), float(param.max), float(param.step))
+                else:
+                    chocolate_search_space[key] = choco.uniform(
+                        float(param.min), float(param.max))
+            elif param.type == CATEGORICAL:
+                chocolate_search_space[key] = choco.choice(param.list)
+            else:
+                chocolate_search_space[key] = choco.choice(
+                    [float(e) for e in param.list])
+
+        conn = choco.SQLiteConnection("sqlite:///my_db.db")
+        # Refer to https://chocolate.readthedocs.io/tutorials/algo.html
+        if self.algorithm_name == "chocolate-grid":
+            sampler = choco.Grid(conn, chocolate_search_space, clear_db=True)
+        elif self.algorithm_name == "chocolate-random":
+            sampler = choco.Random(conn, chocolate_search_space, clear_db=True)
+        elif self.algorithm_name == "chocolate-quasirandom":
+            sampler = choco.QuasiRandom(
+                conn, chocolate_search_space, clear_db=True)
+        elif self.algorithm_name == "chocolate-bayesian-optimization":
+            sampler = choco.Bayes(conn, chocolate_search_space, clear_db=True)
+        # elif self.algorithm_name == "chocolate-CMAES":
+        #     sampler = choco.CMAES(conn, chocolate_search_space, clear_db=True)
+        elif self.algorithm_name == "chocolate-MOCMAES":
+            mu = 1
+            sampler = choco.MOCMAES(
+                conn, chocolate_search_space, mu=mu, clear_db=True)
+        logger.info("algortihm: %s", self.algorithm_name)
+
+        for index, trial in enumerate(trials):
+            loss_for_choco = float(trial.target_metric.value)
+            if search_space.goal == MAX_GOAL:
+                loss_for_choco = -1 * loss_for_choco
+
+            entry = {"_chocolate_id": index, "_loss": loss_for_choco}
+            for param in search_space.params:
+                param_assignment = None
+                for assignment in trial.assignments:
+                    if param.name == assignment.name:
+                        param_assignment = assignment.value
+                        break
+                if param.type == INTEGER:
+                    param_assignment = int(param_assignment)
+                elif param.type == DOUBLE:
+                    param_assignment = float(param_assignment)
+                entry.update({BaseChocolateService.encode(
+                    param.name): param_assignment})
+            logger.info(entry)
+            # Should not use sampler.update(token, loss), because we will create
+            # a new BaseChocolateService instance for every request. Thus we need
+            # to insert all previous trials every time.
+            conn.insert_result(entry)
+
+        list_of_assignments = []
+
+        for i in range(request_number):
+            token, chocolate_params = sampler.next()
+            list_of_assignments.append(
+                BaseChocolateService.convert(search_space, chocolate_params))
+        return list_of_assignments
+
+    @staticmethod
+    def convert(search_space, chocolate_params):
+        assignments = []
+        for i in range(len(search_space.params)):
+            param = search_space.params[i]
+            key = BaseChocolateService.encode(param.name)
+            if param.type == INTEGER:
+                assignments.append(Assignment(
+                    param.name, chocolate_params[key]))
+            elif param.type == DOUBLE:
+                assignments.append(Assignment(
+                    param.name, chocolate_params[key]))
+            elif param.type == CATEGORICAL or param.type == DISCRETE:
+                assignments.append(Assignment(
+                    param.name, chocolate_params[key]))
+        return assignments
+
+    @staticmethod
+    def encode(name):
+        """Encode the name. Chocolate will check if the name contains hyphens.
+        Thus we need to encode it.
+        """
+        return base64.b64encode(name.encode('utf-8')).decode('utf-8')
diff --git a/pkg/suggestion/v1alpha3/chocolate_service.py b/pkg/suggestion/v1alpha3/chocolate_service.py
@@ -0,0 +1,29 @@
+import logging
+
+from pkg.apis.manager.v1alpha3.python import api_pb2
+from pkg.apis.manager.v1alpha3.python import api_pb2_grpc
+from pkg.apis.manager.health.python import health_pb2
+
+from pkg.suggestion.v1alpha3.internal.search_space import HyperParameter, HyperParameterSearchSpace
+from pkg.suggestion.v1alpha3.internal.trial import Trial, Assignment
+from pkg.suggestion.v1alpha3.chocolate.base_chocolate_service import BaseChocolateService
+from pkg.suggestion.v1alpha3.base_health_service import HealthServicer
+
+logger = logging.getLogger("ChocolateService")
+
+
+class ChocolateService(
+        api_pb2_grpc.SuggestionServicer, HealthServicer):
+    def GetSuggestions(self, request, context):
+        """
+        Main function to provide suggestion.
+        """
+        base_serice = BaseChocolateService(
+            algorithm_name=request.experiment.spec.algorithm.algorithm_name)
+        search_space = HyperParameterSearchSpace.convert(request.experiment)
+        trials = Trial.convert(request.trials)
+        new_assignments = base_serice.getSuggestions(
+            search_space, trials, request.request_number)
+        return api_pb2.GetSuggestionsReply(
+            parameter_assignments=Assignment.generate(new_assignments)
+        )
diff --git a/scripts/v1alpha3/build.sh b/scripts/v1alpha3/build.sh
@@ -43,3 +43,4 @@ docker build -t ${REGISTRY}/${PREFIX}/v1alpha3/tfevent-metrics-collector -f ${CM
 echo "Building suggestion images..."
 docker build -t ${REGISTRY}/${PREFIX}/v1alpha3/suggestion-hyperopt -f ${CMD_PREFIX}/suggestion/hyperopt/v1alpha3/Dockerfile .
 docker build -t ${REGISTRY}/${PREFIX}/v1alpha3/suggestion-skopt -f ${CMD_PREFIX}/suggestion/skopt/v1alpha3/Dockerfile .
+docker build -t ${REGISTRY}/${PREFIX}/v1alpha3/suggestion-chocolate -f ${CMD_PREFIX}/suggestion/chocolate/v1alpha3/Dockerfile .
diff --git a/test/scripts/v1alpha3/build-suggestion-chocolate.sh b/test/scripts/v1alpha3/build-suggestion-chocolate.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+# Copyright 2018 The Kubeflow Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This shell script is used to build an image from our argo workflow
+
+set -o errexit
+set -o nounset
+set -o pipefail
+
+export PATH=${GOPATH}/bin:/usr/local/go/bin:${PATH}
+REGISTRY="${GCP_REGISTRY}"
+PROJECT="${GCP_PROJECT}"
+GO_DIR=${GOPATH}/src/github.com/${REPO_OWNER}/${REPO_NAME}-suggestion-chocolate
+VERSION=$(git describe --tags --always --dirty)
+
+echo "Activating service-account"
+gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS}
+
+echo "Copy source to GOPATH"
+mkdir -p ${GO_DIR}
+cp -r cmd ${GO_DIR}/cmd
+cp -r pkg ${GO_DIR}/pkg
+cp -r vendor ${GO_DIR}/vendor
+
+cd ${GO_DIR}
+
+cp cmd/suggestion/chocolate/v1alpha3/Dockerfile .
+gcloud builds submit . --tag=${REGISTRY}/${REPO_NAME}/v1alpha3/suggestion-chocolate:${VERSION} --project=${PROJECT}
+gcloud container images add-tag --quiet ${REGISTRY}/${REPO_NAME}/v1alpha3/suggestion-chocolate:${VERSION} ${REGISTRY}/${REPO_NAME}/v1alpha3/suggestion-chocolate:latest --verbosity=info
diff --git a/test/scripts/v1alpha3/python-tests.sh b/test/scripts/v1alpha3/python-tests.sh
@@ -22,6 +22,7 @@ set -o pipefail
 
 export PYTHONPATH=$(pwd):$(pwd)/pkg/apis/manager/v1alpha3/python:$(pwd)/pkg/apis/manager/health/python
 pip install -r test/suggestion/v1alpha3/test_requirements.txt
+pip install -r cmd/suggestion/chocolate/v1alpha3/requirements.txt
 pip install -r cmd/suggestion/hyperopt/v1alpha3/requirements.txt
 pip install -r cmd/suggestion/skopt/v1alpha3/requirements.txt
 pytest -s ./test
diff --git a/test/scripts/v1alpha3/run-tests.sh b/test/scripts/v1alpha3/run-tests.sh
@@ -83,6 +83,7 @@ sed -i -e "s@image: gcr.io\/kubeflow-images-public\/katib\/v1alpha3\/katib-ui@im
 # Suggestion algorithms
 sed -i -e "s@image: gcr.io\/kubeflow-images-public\/katib\/v1alpha3\/suggestion-nasrl@image: ${REGISTRY}\/${REPO_NAME}\/v1alpha3\/suggestion-nasrl:${VERSION}@" manifests/v1alpha3/katib-controller/katib-config.yaml
 sed -i -e "s@image: gcr.io\/kubeflow-images-public\/katib\/v1alpha3\/suggestion-hyperband@image: ${REGISTRY}\/${REPO_NAME}\/v1alpha3\/suggestion-hyperband:${VERSION}@" manifests/v1alpha3/katib-controller/katib-config.yaml
+sed -i -e "s@gcr.io\/kubeflow-images-public\/katib\/v1alpha3\/suggestion-chocolate@${REGISTRY}\/${REPO_NAME}\/v1alpha3\/suggestion-chocolate:${VERSION}@" manifests/v1alpha3/katib-controller/katib-config.yaml
 sed -i -e "s@gcr.io\/kubeflow-images-public\/katib\/v1alpha3\/suggestion-hyperopt@${REGISTRY}\/${REPO_NAME}\/v1alpha3\/suggestion-hyperopt:${VERSION}@" manifests/v1alpha3/katib-controller/katib-config.yaml
 sed -i -e "s@gcr.io\/kubeflow-images-public\/katib\/v1alpha3\/suggestion-skopt@${REGISTRY}\/${REPO_NAME}\/v1alpha3\/suggestion-skopt:${VERSION}@" manifests/v1alpha3/katib-controller/katib-config.yaml
 
@@ -136,6 +137,7 @@ done
 
 echo "Running e2e hyperopt random experiment"
 export KUBECONFIG=$HOME/.kube/config
+go run run-e2e-experiment.go ../../../examples/v1alpha3/chocolate-grid-example.yaml
 go run run-e2e-experiment.go ../../../examples/v1alpha3/hyperopt-random-example.yaml
 go run run-e2e-experiment.go ../../../examples/v1alpha3/skopt-bayesian-optimization-example.yaml