From 365dfc3ab7bcd99b3c30f8a7e12e21863dbc1caa Mon Sep 17 00:00:00 2001 From: Andrey Velichkevich Date: Mon, 22 Apr 2024 23:18:46 +0100 Subject: [PATCH 1/2] Support ARM arch for release images Signed-off-by: Andrey Velichkevich --- Makefile | 2 +- docs/release/README.md | 5 +++- scripts/v1beta1/build.sh | 59 ++++++++++++++++++++++------------------ 3 files changed, 37 insertions(+), 29 deletions(-) diff --git a/Makefile b/Makefile index 4673ed4ea4e..061df399f8c 100755 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ HAS_MOCKGEN := $(shell command -v mockgen;) COMMIT := v1beta1-$(shell git rev-parse --short=7 HEAD) KATIB_REGISTRY := docker.io/kubeflowkatib -CPU_ARCH ?= amd64 +CPU_ARCH ?= linux/amd64,linux/arm64 ENVTEST_K8S_VERSION ?= 1.29 MOCKGEN_VERSION ?= $(shell grep 'github.com/golang/mock' go.mod | cut -d ' ' -f 2) GO_VERSION=$(shell grep '^go' go.mod | cut -d ' ' -f 2) diff --git a/docs/release/README.md b/docs/release/README.md index ed7a1e973f3..75823be8068 100644 --- a/docs/release/README.md +++ b/docs/release/README.md @@ -75,7 +75,10 @@ Follow these steps to cut a new Katib release: git clone git@github.com:kubeflow/katib.git $GOPATH/src/github.com/kubeflow/katib ``` -1. Make sure that you can build all Katib images: +1. Make sure that you can build all Katib images. **Note** that + your Docker Desktop should + [enable containerd image store](https://docs.docker.com/desktop/containerd/#enable-the-containerd-image-store) + to build multi-arch images: ``` make build REGISTRY=private-registry TAG=latest diff --git a/scripts/v1beta1/build.sh b/scripts/v1beta1/build.sh index f8cba66c34c..5ec9c638660 100755 --- a/scripts/v1beta1/build.sh +++ b/scripts/v1beta1/build.sh @@ -29,19 +29,24 @@ if [[ -z "${REGISTRY}" || -z "${TAG}" || -z "${ARCH}" ]]; then exit 1 fi -SUPPORTED_CPU_ARCHS=(amd64 arm64 ppc64le) +SUPPORTED_CPU_ARCHS=(linux/amd64 linux/arm64 linux/ppc64le) function check_specified_cpu_arch() { for SUPPORTED_ARCH in "${SUPPORTED_CPU_ARCHS[@]}"; do - if [ "${ARCH}" = "${SUPPORTED_ARCH}" ]; then + if [ "$1" = "${SUPPORTED_ARCH}" ]; then return 0 fi done - echo "CPU architecture '${ARCH}' is not supported" + echo "CPU architecture '${1}' is not supported" echo "You can use '${SUPPORTED_CPU_ARCHS[*]}'" echo "To get machine architecture run: uname -m" return 1 } -check_specified_cpu_arch + +# Verify that arch is supported. +IFS=',' read -ra archs <<< "$ARCH" + for arch in "${archs[@]}"; do + check_specified_cpu_arch "$arch" +done VERSION="v1beta1" CMD_PREFIX="cmd" @@ -56,82 +61,82 @@ cd "${SCRIPT_ROOT}" # Katib core images echo -e "\nBuilding Katib controller image...\n" -docker buildx build --platform "linux/${ARCH}" -t "${REGISTRY}/katib-controller:${TAG}" -f ${CMD_PREFIX}/katib-controller/${VERSION}/Dockerfile . +docker buildx build --platform "${ARCH}" -t "${REGISTRY}/katib-controller:${TAG}" -f ${CMD_PREFIX}/katib-controller/${VERSION}/Dockerfile . echo -e "\nBuilding Katib DB manager image...\n" -docker buildx build --platform "linux/${ARCH}" -t "${REGISTRY}/katib-db-manager:${TAG}" -f ${CMD_PREFIX}/db-manager/${VERSION}/Dockerfile . +docker buildx build --platform "${ARCH}" -t "${REGISTRY}/katib-db-manager:${TAG}" -f ${CMD_PREFIX}/db-manager/${VERSION}/Dockerfile . echo -e "\nBuilding Katib UI image...\n" -docker buildx build --platform "linux/${ARCH}" -t "${REGISTRY}/katib-ui:${TAG}" -f ${CMD_PREFIX}/ui/${VERSION}/Dockerfile . +docker buildx build --platform "${ARCH}" -t "${REGISTRY}/katib-ui:${TAG}" -f ${CMD_PREFIX}/ui/${VERSION}/Dockerfile . echo -e "\nBuilding file metrics collector image...\n" -docker buildx build --platform "linux/${ARCH}" -t "${REGISTRY}/file-metrics-collector:${TAG}" -f ${CMD_PREFIX}/metricscollector/${VERSION}/file-metricscollector/Dockerfile . +docker buildx build --platform "${ARCH}" -t "${REGISTRY}/file-metrics-collector:${TAG}" -f ${CMD_PREFIX}/metricscollector/${VERSION}/file-metricscollector/Dockerfile . echo -e "\nBuilding TF Event metrics collector image...\n" if [ "${ARCH}" == "ppc64le" ]; then - docker buildx build --platform "linux/${ARCH}" -t "${REGISTRY}/tfevent-metrics-collector:${TAG}" -f ${CMD_PREFIX}/metricscollector/${VERSION}/tfevent-metricscollector/Dockerfile.ppc64le . + docker buildx build --platform "${ARCH}" -t "${REGISTRY}/tfevent-metrics-collector:${TAG}" -f ${CMD_PREFIX}/metricscollector/${VERSION}/tfevent-metricscollector/Dockerfile.ppc64le . else - docker buildx build --platform "linux/${ARCH}" -t "${REGISTRY}/tfevent-metrics-collector:${TAG}" -f ${CMD_PREFIX}/metricscollector/${VERSION}/tfevent-metricscollector/Dockerfile . + docker buildx build --platform "${ARCH}" -t "${REGISTRY}/tfevent-metrics-collector:${TAG}" -f ${CMD_PREFIX}/metricscollector/${VERSION}/tfevent-metricscollector/Dockerfile . fi # Suggestion images echo -e "\nBuilding suggestion images..." echo -e "\nBuilding hyperopt suggestion...\n" -docker buildx build --platform "linux/${ARCH}" -t "${REGISTRY}/suggestion-hyperopt:${TAG}" -f ${CMD_PREFIX}/suggestion/hyperopt/${VERSION}/Dockerfile . +docker buildx build --platform "${ARCH}" -t "${REGISTRY}/suggestion-hyperopt:${TAG}" -f ${CMD_PREFIX}/suggestion/hyperopt/${VERSION}/Dockerfile . echo -e "\nBuilding hyperband suggestion...\n" -docker buildx build --platform "linux/${ARCH}" -t "${REGISTRY}/suggestion-hyperband:${TAG}" -f ${CMD_PREFIX}/suggestion/hyperband/${VERSION}/Dockerfile . +docker buildx build --platform "${ARCH}" -t "${REGISTRY}/suggestion-hyperband:${TAG}" -f ${CMD_PREFIX}/suggestion/hyperband/${VERSION}/Dockerfile . echo -e "\nBuilding skopt suggestion...\n" -docker buildx build --platform "linux/${ARCH}" -t "${REGISTRY}/suggestion-skopt:${TAG}" -f ${CMD_PREFIX}/suggestion/skopt/${VERSION}/Dockerfile . +docker buildx build --platform "${ARCH}" -t "${REGISTRY}/suggestion-skopt:${TAG}" -f ${CMD_PREFIX}/suggestion/skopt/${VERSION}/Dockerfile . echo -e "\nBuilding goptuna suggestion...\n" -docker buildx build --platform "linux/${ARCH}" -t "${REGISTRY}/suggestion-goptuna:${TAG}" -f ${CMD_PREFIX}/suggestion/goptuna/${VERSION}/Dockerfile . +docker buildx build --platform "${ARCH}" -t "${REGISTRY}/suggestion-goptuna:${TAG}" -f ${CMD_PREFIX}/suggestion/goptuna/${VERSION}/Dockerfile . echo -e "\nBuilding optuna suggestion...\n" -docker buildx build --platform "linux/${ARCH}" -t "${REGISTRY}/suggestion-optuna:${TAG}" -f ${CMD_PREFIX}/suggestion/optuna/${VERSION}/Dockerfile . +docker buildx build --platform "${ARCH}" -t "${REGISTRY}/suggestion-optuna:${TAG}" -f ${CMD_PREFIX}/suggestion/optuna/${VERSION}/Dockerfile . echo -e "\nBuilding ENAS suggestion...\n" -docker buildx build --platform "linux/${ARCH}" -t "${REGISTRY}/suggestion-enas:${TAG}" -f ${CMD_PREFIX}/suggestion/nas/enas/${VERSION}/Dockerfile . +docker buildx build --platform "${ARCH}" -t "${REGISTRY}/suggestion-enas:${TAG}" -f ${CMD_PREFIX}/suggestion/nas/enas/${VERSION}/Dockerfile . echo -e "\nBuilding DARTS suggestion...\n" -docker buildx build --platform "linux/${ARCH}" -t "${REGISTRY}/suggestion-darts:${TAG}" -f ${CMD_PREFIX}/suggestion/nas/darts/${VERSION}/Dockerfile . +docker buildx build --platform "${ARCH}" -t "${REGISTRY}/suggestion-darts:${TAG}" -f ${CMD_PREFIX}/suggestion/nas/darts/${VERSION}/Dockerfile . echo -e "\nBuilding PBT suggestion...\n" -docker buildx build --platform "linux/${ARCH}" -t "${REGISTRY}/suggestion-pbt:${TAG}" -f ${CMD_PREFIX}/suggestion/pbt/${VERSION}/Dockerfile . +docker buildx build --platform "${ARCH}" -t "${REGISTRY}/suggestion-pbt:${TAG}" -f ${CMD_PREFIX}/suggestion/pbt/${VERSION}/Dockerfile . # Early stopping images echo -e "\nBuilding early stopping images...\n" echo -e "\nBuilding median stopping rule...\n" -docker buildx build --platform "linux/${ARCH}" -t "${REGISTRY}/earlystopping-medianstop:${TAG}" -f ${CMD_PREFIX}/earlystopping/medianstop/${VERSION}/Dockerfile . +docker buildx build --platform "${ARCH}" -t "${REGISTRY}/earlystopping-medianstop:${TAG}" -f ${CMD_PREFIX}/earlystopping/medianstop/${VERSION}/Dockerfile . # Training container images echo -e "\nBuilding training container images..." echo -e "\nBuilding dynamic learning rate training container example for PBT...\n" -docker buildx build --platform "linux/${ARCH}" -t "${REGISTRY}/simple-pbt:${TAG}" -f examples/${VERSION}/trial-images/simple-pbt/Dockerfile . +docker buildx build --platform "${ARCH}" -t "${REGISTRY}/simple-pbt:${TAG}" -f examples/${VERSION}/trial-images/simple-pbt/Dockerfile . echo -e "\nBuilding PyTorch CIFAR-10 CNN training container example for DARTS with CPU support...\n" -docker buildx build --platform "linux/${ARCH}" -t "${REGISTRY}/darts-cnn-cifar10-cpu:${TAG}" -f examples/${VERSION}/trial-images/darts-cnn-cifar10/Dockerfile.cpu . +docker buildx build --platform "${ARCH}" -t "${REGISTRY}/darts-cnn-cifar10-cpu:${TAG}" -f examples/${VERSION}/trial-images/darts-cnn-cifar10/Dockerfile.cpu . echo -e "\nBuilding PyTorch CIFAR-10 CNN training container example for DARTS with GPU support...\n" -docker buildx build --platform "linux/${ARCH}" -t "${REGISTRY}/darts-cnn-cifar10-gpu:${TAG}" -f examples/${VERSION}/trial-images/darts-cnn-cifar10/Dockerfile.gpu . +docker buildx build --platform "${ARCH}" -t "${REGISTRY}/darts-cnn-cifar10-gpu:${TAG}" -f examples/${VERSION}/trial-images/darts-cnn-cifar10/Dockerfile.gpu . echo -e "\nBuilding PyTorch mnist training container example with CPU support...\n" -docker buildx build --platform "linux/${ARCH}" -t "${REGISTRY}/pytorch-mnist-cpu:${TAG}" -f examples/${VERSION}/trial-images/pytorch-mnist/Dockerfile.cpu . +docker buildx build --platform "${ARCH}" -t "${REGISTRY}/pytorch-mnist-cpu:${TAG}" -f examples/${VERSION}/trial-images/pytorch-mnist/Dockerfile.cpu . echo -e "\nBuilding PyTorch mnist training container example with GPU support...\n" -docker buildx build --platform "linux/${ARCH}" -t "${REGISTRY}/pytorch-mnist-gpu:${TAG}" -f examples/${VERSION}/trial-images/pytorch-mnist/Dockerfile.gpu . +docker buildx build --platform "${ARCH}" -t "${REGISTRY}/pytorch-mnist-gpu:${TAG}" -f examples/${VERSION}/trial-images/pytorch-mnist/Dockerfile.gpu . echo -e "\nBuilding Tensorflow with summaries mnist training container example...\n" -docker buildx build --platform "linux/${ARCH}" -t "${REGISTRY}/tf-mnist-with-summaries:${TAG}" -f examples/${VERSION}/trial-images/tf-mnist-with-summaries/Dockerfile . +docker buildx build --platform "${ARCH}" -t "${REGISTRY}/tf-mnist-with-summaries:${TAG}" -f examples/${VERSION}/trial-images/tf-mnist-with-summaries/Dockerfile . echo -e "\nBuilding Keras CIFAR-10 CNN training container example for ENAS with CPU support...\n" -docker buildx build --platform "linux/${ARCH}" -t "${REGISTRY}/enas-cnn-cifar10-cpu:${TAG}" -f examples/${VERSION}/trial-images/enas-cnn-cifar10/Dockerfile.cpu . +docker buildx build --platform "${ARCH}" -t "${REGISTRY}/enas-cnn-cifar10-cpu:${TAG}" -f examples/${VERSION}/trial-images/enas-cnn-cifar10/Dockerfile.cpu . echo -e "\nBuilding Keras CIFAR-10 CNN training container example for ENAS with GPU support...\n" -docker buildx build --platform "linux/${ARCH}" -t "${REGISTRY}/enas-cnn-cifar10-gpu:${TAG}" -f examples/${VERSION}/trial-images/enas-cnn-cifar10/Dockerfile.gpu . +docker buildx build --platform "${ARCH}" -t "${REGISTRY}/enas-cnn-cifar10-gpu:${TAG}" -f examples/${VERSION}/trial-images/enas-cnn-cifar10/Dockerfile.gpu . echo -e "\nAll Katib images with ${TAG} tag have been built successfully!\n" From a77abc88b3e46de7b04649c18de2914bd194a2db Mon Sep 17 00:00:00 2001 From: Andrey Velichkevich Date: Wed, 24 Apr 2024 16:17:09 +0100 Subject: [PATCH 2/2] Update Developer Doc Signed-off-by: Andrey Velichkevich --- docs/developer-guide.md | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/docs/developer-guide.md b/docs/developer-guide.md index e6601b605f4..efffa2f9a60 100644 --- a/docs/developer-guide.md +++ b/docs/developer-guide.md @@ -13,7 +13,7 @@ see the following user guides: ## Requirements - [Go](https://golang.org/) (1.22 or later) -- [Docker](https://docs.docker.com/) (20.10 or later) +- [Docker](https://docs.docker.com/) (24.0 or later) - [Docker Buildx](https://docs.docker.com/build/buildx/) (0.8.0 or later) - [Java](https://docs.oracle.com/javase/8/docs/technotes/guides/install/install_overview.html) (8 or later) - [Python](https://www.python.org/) (3.11 or later) @@ -21,7 +21,9 @@ see the following user guides: ## Build from source code -Check source code as follows: +**Note** that your Docker Desktop should +[enable containerd image store](https://docs.docker.com/desktop/containerd/#enable-the-containerd-image-store) +to build multi-arch images. Check source code as follows: ```bash make build REGISTRY= TAG= @@ -45,8 +47,8 @@ make undeploy ## Technical and style guide -The following guidelines apply primarily to Katib, -but other projects like [Training Operator](https://github.com/kubeflow/training-operator) might also adhere to them. +The following guidelines apply primarily to Katib, +but other projects like [Training Operator](https://github.com/kubeflow/training-operator) might also adhere to them. ## Go Development @@ -54,13 +56,13 @@ When coding: - Follow [effective go](https://go.dev/doc/effective_go) guidelines. - Run locally [`make check`](https://github.com/kubeflow/katib/blob/46173463027e4fd2e604e25d7075b2b31a702049/Makefile#L31) -to verify if changes follow best practices before submitting PRs. + to verify if changes follow best practices before submitting PRs. Testing: - Use [`cmp.Diff`](https://pkg.go.dev/github.com/google/go-cmp/cmp#Diff) instead of `reflect.Equal`, to provide useful comparisons. - Define test cases as maps instead of slices to avoid dependencies on the running order. -Map key should be equal to the test case name. + Map key should be equal to the test case name. ## Modify controller APIs @@ -77,7 +79,7 @@ make generate Below is a list of command-line flags accepted by Katib controller: | Name | Type | Default | Description | -|--------------|--------|---------|----------------------------------------------------------------------------------------------------------------------------------| +| ------------ | ------ | ------- | -------------------------------------------------------------------------------------------------------------------------------- | | katib-config | string | "" | The katib-controller will load its initial configuration from this file. Omit this flag to use the default configuration values. | ## DB Manager Flags @@ -126,7 +128,6 @@ Once Katib is deployed in the Kubernetes cluster, the `cert-generator` follows t - Generate the self-signed certificate and private key. - Update a Kubernetes Secret with the self-signed TLS certificate and private key. - - Patch the webhooks with the `CABundle`. Once the `cert-generator` finished, the Katib controller starts to register controllers such as `experiment-controller` to the manager.