Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add grid with the help of chocolate #780

Merged
merged 4 commits into from
Sep 22, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions cmd/suggestion/chocolate/v1alpha3/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
FROM python:3

RUN if [ "$(uname -m)" = "ppc64le" ]; then \
apt-get -y update && \
apt-get -y install gfortran libopenblas-dev liblapack-dev && \
pip install cython; \
fi
RUN GRPC_HEALTH_PROBE_VERSION=v0.3.0 && \
wget -qO/bin/grpc_health_probe https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/${GRPC_HEALTH_PROBE_VERSION}/grpc_health_probe-linux-amd64 && \
chmod +x /bin/grpc_health_probe

ADD . /usr/src/app/github.com/kubeflow/katib
WORKDIR /usr/src/app/github.com/kubeflow/katib/cmd/suggestion/chocolate/v1alpha3
RUN pip install --no-cache-dir -r requirements.txt

ENV PYTHONPATH /usr/src/app/github.com/kubeflow/katib:/usr/src/app/github.com/kubeflow/katib/pkg/apis/manager/v1alpha3/python:/usr/src/app/github.com/kubeflow/katib/pkg/apis/manager/health/python

ENTRYPOINT ["python", "main.py"]
Empty file.
26 changes: 26 additions & 0 deletions cmd/suggestion/chocolate/v1alpha3/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import grpc
import time
from pkg.apis.manager.v1alpha3.python import api_pb2_grpc
from pkg.apis.manager.health.python import health_pb2_grpc
from pkg.suggestion.v1alpha3.chocolate_service import ChocolateService
from concurrent import futures

_ONE_DAY_IN_SECONDS = 60 * 60 * 24
DEFAULT_PORT = "0.0.0.0:6789"

def serve():
server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
service = ChocolateService()
api_pb2_grpc.add_SuggestionServicer_to_server(service, server)
health_pb2_grpc.add_HealthServicer_to_server(service, server)
server.add_insecure_port(DEFAULT_PORT)
print("Listening...")
server.start()
try:
while True:
time.sleep(_ONE_DAY_IN_SECONDS)
except KeyboardInterrupt:
server.stop(0)

if __name__ == "__main__":
serve()
11 changes: 11 additions & 0 deletions cmd/suggestion/chocolate/v1alpha3/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
grpcio==1.23.0
duecredit===0.7.0
cloudpickle==0.5.6
numpy>=1.13.3
scikit-learn>=0.19.0
scipy>=0.19.1
forestci==0.3
protobuf==3.9.1
googleapis-common-protos==1.6.0
SQLAlchemy==1.3.8
git+https://github.com/AIworx-Labs/chocolate@master
63 changes: 63 additions & 0 deletions examples/v1alpha3/chocolate-grid-example.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
apiVersion: "kubeflow.org/v1alpha3"
kind: Experiment
metadata:
namespace: kubeflow
labels:
controller-tools.k8s.io: "1.0"
name: chocolate-grid-example
spec:
objective:
type: maximize
goal: 0.99
objectiveMetricName: Validation-accuracy
additionalMetricNames:
- accuracy
algorithm:
algorithmName: chocolate-grid
parallelTrialCount: 3
maxTrialCount: 12
maxFailedTrialCount: 3
parameters:
- name: --lr
parameterType: double
feasibleSpace:
min: "0.001"
max: "0.01"
step: "0.001"
- name: --num-layers
parameterType: int
feasibleSpace:
min: "2"
max: "5"
- name: --optimizer
parameterType: categorical
feasibleSpace:
list:
- sgd
- adam
- ftrl
trialTemplate:
goTemplate:
rawTemplate: |-
apiVersion: batch/v1
kind: Job
metadata:
name: {{.Trial}}
namespace: {{.NameSpace}}
spec:
template:
spec:
serviceAccountName: metrics-collector # will be dropped
containers:
- name: {{.Trial}}
image: docker.io/katib/mxnet-mnist-example
command:
- "python"
- "/mxnet/example/image-classification/train_mnist.py"
- "--batch-size=64"
{{- with .HyperParameters}}
{{- range .}}
- "{{.Name}}={{.Value}}"
{{- end}}
{{- end}}
restartPolicy: Never
3 changes: 3 additions & 0 deletions manifests/v1alpha3/katib-controller/katib-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ data:
"nasrl": {
"image": "gcr.io/kubeflow-images-public/katib/v1alpha3/suggestion-nasrl"
},
"chocolate-grid": {
"image": "gcr.io/kubeflow-images-public/katib/v1alpha3/suggestion-chocolate"
},
"hyperopt-tpe": {
"image": "gcr.io/kubeflow-images-public/katib/v1alpha3/suggestion-hyperopt"
},
Expand Down
120 changes: 120 additions & 0 deletions pkg/suggestion/v1alpha3/chocolate/base_chocolate_service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
import json
import numpy as np
import chocolate as choco
import logging
import base64

from pkg.suggestion.v1alpha3.internal.search_space import *
from pkg.suggestion.v1alpha3.internal.trial import *

logger = logging.getLogger("BaseChocolateService")


class BaseChocolateService(object):
"""
Refer to https://chocolate.readthedocs.io/
"""

def __init__(self, algorithm_name=""):
self.algorithm_name = algorithm_name

def getSuggestions(self, search_space, trials, request_number):
"""
Get the new suggested trials with chocolate algorithm.
"""

# Example: {"x" : choco.uniform(-6, 6), "y" : choco.uniform(-6, 6)}
chocolate_search_space = {}

for param in search_space.params:
key = BaseChocolateService.encode(param.name)
if param.type == INTEGER:
chocolate_search_space[key] = choco.quantized_uniform(
int(param.min), int(param.max), 1)
elif param.type == DOUBLE:
if param.step != None:
chocolate_search_space[key] = choco.quantized_uniform(
float(param.min), float(param.max), float(param.step))
else:
chocolate_search_space[key] = choco.uniform(
float(param.min), float(param.max))
elif param.type == CATEGORICAL:
chocolate_search_space[key] = choco.choice(param.list)
else:
chocolate_search_space[key] = choco.choice(
[float(e) for e in param.list])

conn = choco.SQLiteConnection("sqlite:///my_db.db")
# Refer to https://chocolate.readthedocs.io/tutorials/algo.html
if self.algorithm_name == "chocolate-grid":
sampler = choco.Grid(conn, chocolate_search_space, clear_db=True)
elif self.algorithm_name == "chocolate-random":
sampler = choco.Random(conn, chocolate_search_space, clear_db=True)
elif self.algorithm_name == "chocolate-quasirandom":
sampler = choco.QuasiRandom(
conn, chocolate_search_space, clear_db=True)
elif self.algorithm_name == "chocolate-bayesian-optimization":
sampler = choco.Bayes(conn, chocolate_search_space, clear_db=True)
# elif self.algorithm_name == "chocolate-CMAES":
# sampler = choco.CMAES(conn, chocolate_search_space, clear_db=True)
elif self.algorithm_name == "chocolate-MOCMAES":
mu = 1
sampler = choco.MOCMAES(
conn, chocolate_search_space, mu=mu, clear_db=True)
logger.info("algortihm: %s", self.algorithm_name)

for index, trial in enumerate(trials):
loss_for_choco = float(trial.target_metric.value)
if search_space.goal == MAX_GOAL:
loss_for_choco = -1 * loss_for_choco

entry = {"_chocolate_id": index, "_loss": loss_for_choco}
for param in search_space.params:
param_assignment = None
for assignment in trial.assignments:
if param.name == assignment.name:
param_assignment = assignment.value
break
if param.type == INTEGER:
param_assignment = int(param_assignment)
elif param.type == DOUBLE:
param_assignment = float(param_assignment)
entry.update({BaseChocolateService.encode(
param.name): param_assignment})
logger.info(entry)
# Should not use sampler.update(token, loss), because we will create
# a new BaseChocolateService instance for every request. Thus we need
# to insert all previous trials every time.
conn.insert_result(entry)

list_of_assignments = []

for i in range(request_number):
token, chocolate_params = sampler.next()
list_of_assignments.append(
BaseChocolateService.convert(search_space, chocolate_params))
return list_of_assignments

@staticmethod
def convert(search_space, chocolate_params):
assignments = []
for i in range(len(search_space.params)):
param = search_space.params[i]
key = BaseChocolateService.encode(param.name)
if param.type == INTEGER:
assignments.append(Assignment(
param.name, chocolate_params[key]))
elif param.type == DOUBLE:
assignments.append(Assignment(
param.name, chocolate_params[key]))
elif param.type == CATEGORICAL or param.type == DISCRETE:
assignments.append(Assignment(
param.name, chocolate_params[key]))
return assignments

@staticmethod
def encode(name):
"""Encode the name. Chocolate will check if the name contains hyphens.
Thus we need to encode it.
"""
return base64.b64encode(name.encode('utf-8')).decode('utf-8')
29 changes: 29 additions & 0 deletions pkg/suggestion/v1alpha3/chocolate_service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import logging

from pkg.apis.manager.v1alpha3.python import api_pb2
from pkg.apis.manager.v1alpha3.python import api_pb2_grpc
from pkg.apis.manager.health.python import health_pb2

from pkg.suggestion.v1alpha3.internal.search_space import HyperParameter, HyperParameterSearchSpace
from pkg.suggestion.v1alpha3.internal.trial import Trial, Assignment
from pkg.suggestion.v1alpha3.chocolate.base_chocolate_service import BaseChocolateService
from pkg.suggestion.v1alpha3.base_health_service import HealthServicer

logger = logging.getLogger("ChocolateService")


class ChocolateService(
api_pb2_grpc.SuggestionServicer, HealthServicer):
def GetSuggestions(self, request, context):
"""
Main function to provide suggestion.
"""
base_serice = BaseChocolateService(
algorithm_name=request.experiment.spec.algorithm.algorithm_name)
search_space = HyperParameterSearchSpace.convert(request.experiment)
trials = Trial.convert(request.trials)
new_assignments = base_serice.getSuggestions(
search_space, trials, request.request_number)
return api_pb2.GetSuggestionsReply(
parameter_assignments=Assignment.generate(new_assignments)
)
1 change: 1 addition & 0 deletions scripts/v1alpha3/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,4 @@ docker build -t ${REGISTRY}/${PREFIX}/v1alpha3/tfevent-metrics-collector -f ${CM
echo "Building suggestion images..."
docker build -t ${REGISTRY}/${PREFIX}/v1alpha3/suggestion-hyperopt -f ${CMD_PREFIX}/suggestion/hyperopt/v1alpha3/Dockerfile .
docker build -t ${REGISTRY}/${PREFIX}/v1alpha3/suggestion-skopt -f ${CMD_PREFIX}/suggestion/skopt/v1alpha3/Dockerfile .
docker build -t ${REGISTRY}/${PREFIX}/v1alpha3/suggestion-chocolate -f ${CMD_PREFIX}/suggestion/chocolate/v1alpha3/Dockerfile .
42 changes: 42 additions & 0 deletions test/scripts/v1alpha3/build-suggestion-chocolate.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/bin/bash

# Copyright 2018 The Kubeflow Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# This shell script is used to build an image from our argo workflow

set -o errexit
set -o nounset
set -o pipefail

export PATH=${GOPATH}/bin:/usr/local/go/bin:${PATH}
REGISTRY="${GCP_REGISTRY}"
PROJECT="${GCP_PROJECT}"
GO_DIR=${GOPATH}/src/github.com/${REPO_OWNER}/${REPO_NAME}-suggestion-chocolate
VERSION=$(git describe --tags --always --dirty)

echo "Activating service-account"
gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS}

echo "Copy source to GOPATH"
mkdir -p ${GO_DIR}
cp -r cmd ${GO_DIR}/cmd
cp -r pkg ${GO_DIR}/pkg
cp -r vendor ${GO_DIR}/vendor

cd ${GO_DIR}

cp cmd/suggestion/chocolate/v1alpha3/Dockerfile .
gcloud builds submit . --tag=${REGISTRY}/${REPO_NAME}/v1alpha3/suggestion-chocolate:${VERSION} --project=${PROJECT}
gcloud container images add-tag --quiet ${REGISTRY}/${REPO_NAME}/v1alpha3/suggestion-chocolate:${VERSION} ${REGISTRY}/${REPO_NAME}/v1alpha3/suggestion-chocolate:latest --verbosity=info
1 change: 1 addition & 0 deletions test/scripts/v1alpha3/python-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ set -o pipefail

export PYTHONPATH=$(pwd):$(pwd)/pkg/apis/manager/v1alpha3/python:$(pwd)/pkg/apis/manager/health/python
pip install -r test/suggestion/v1alpha3/test_requirements.txt
pip install -r cmd/suggestion/chocolate/v1alpha3/requirements.txt
pip install -r cmd/suggestion/hyperopt/v1alpha3/requirements.txt
pip install -r cmd/suggestion/skopt/v1alpha3/requirements.txt
pytest -s ./test
2 changes: 2 additions & 0 deletions test/scripts/v1alpha3/run-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ sed -i -e "s@image: gcr.io\/kubeflow-images-public\/katib\/v1alpha3\/katib-ui@im
# Suggestion algorithms
sed -i -e "s@image: gcr.io\/kubeflow-images-public\/katib\/v1alpha3\/suggestion-nasrl@image: ${REGISTRY}\/${REPO_NAME}\/v1alpha3\/suggestion-nasrl:${VERSION}@" manifests/v1alpha3/katib-controller/katib-config.yaml
sed -i -e "s@image: gcr.io\/kubeflow-images-public\/katib\/v1alpha3\/suggestion-hyperband@image: ${REGISTRY}\/${REPO_NAME}\/v1alpha3\/suggestion-hyperband:${VERSION}@" manifests/v1alpha3/katib-controller/katib-config.yaml
sed -i -e "s@gcr.io\/kubeflow-images-public\/katib\/v1alpha3\/suggestion-chocolate@${REGISTRY}\/${REPO_NAME}\/v1alpha3\/suggestion-chocolate:${VERSION}@" manifests/v1alpha3/katib-controller/katib-config.yaml
sed -i -e "s@gcr.io\/kubeflow-images-public\/katib\/v1alpha3\/suggestion-hyperopt@${REGISTRY}\/${REPO_NAME}\/v1alpha3\/suggestion-hyperopt:${VERSION}@" manifests/v1alpha3/katib-controller/katib-config.yaml
sed -i -e "s@gcr.io\/kubeflow-images-public\/katib\/v1alpha3\/suggestion-skopt@${REGISTRY}\/${REPO_NAME}\/v1alpha3\/suggestion-skopt:${VERSION}@" manifests/v1alpha3/katib-controller/katib-config.yaml

Expand Down Expand Up @@ -136,6 +137,7 @@ done

echo "Running e2e hyperopt random experiment"
export KUBECONFIG=$HOME/.kube/config
go run run-e2e-experiment.go ../../../examples/v1alpha3/chocolate-grid-example.yaml
go run run-e2e-experiment.go ../../../examples/v1alpha3/hyperopt-random-example.yaml
go run run-e2e-experiment.go ../../../examples/v1alpha3/skopt-bayesian-optimization-example.yaml

Expand Down
Loading