From 9e314746a6abd647ea2533f334016988d6530c1a Mon Sep 17 00:00:00 2001 From: tenzen-y Date: Tue, 14 Jun 2022 20:27:40 +0900 Subject: [PATCH] Allow running examples on Apple Silicon M1 --- .../tfevent-metricscollector/Dockerfile | 7 +++++++ cmd/suggestion/chocolate/v1beta1/Dockerfile | 2 +- examples/v1beta1/kind-cluster/deploy.sh | 10 ++++++++-- .../enas-cnn-cifar10/Dockerfile.cpu | 8 +++++++- .../enas-cnn-cifar10/Dockerfile.gpu | 2 +- .../enas-cnn-cifar10/requirements.txt | 2 ++ .../tf-mnist-with-summaries/Dockerfile | 9 ++++++++- .../tf-mnist-with-summaries/requirements.txt | 2 ++ scripts/v1beta1/build.sh | 19 ++++++++++--------- 9 files changed, 46 insertions(+), 15 deletions(-) create mode 100644 examples/v1beta1/trial-images/tf-mnist-with-summaries/requirements.txt diff --git a/cmd/metricscollector/v1beta1/tfevent-metricscollector/Dockerfile b/cmd/metricscollector/v1beta1/tfevent-metricscollector/Dockerfile index 5329739c997..fc26dfdfcd9 100644 --- a/cmd/metricscollector/v1beta1/tfevent-metricscollector/Dockerfile +++ b/cmd/metricscollector/v1beta1/tfevent-metricscollector/Dockerfile @@ -7,6 +7,13 @@ ADD ./pkg/ ${TARGET_DIR}/pkg/ ADD ./${METRICS_COLLECTOR_DIR}/ ${TARGET_DIR}/${METRICS_COLLECTOR_DIR}/ WORKDIR ${TARGET_DIR}/${METRICS_COLLECTOR_DIR} +RUN if [ "$(uname -m)" = "aarch64" ]; then \ + apt-get -y update && \ + apt-get -y install gfortran libpcre3 libpcre3-dev && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/*; \ + fi + RUN pip install --no-cache-dir -r requirements.txt RUN chgrp -R 0 ${TARGET_DIR} \ diff --git a/cmd/suggestion/chocolate/v1beta1/Dockerfile b/cmd/suggestion/chocolate/v1beta1/Dockerfile index 0cba973a8a9..4be183490a1 100644 --- a/cmd/suggestion/chocolate/v1beta1/Dockerfile +++ b/cmd/suggestion/chocolate/v1beta1/Dockerfile @@ -16,7 +16,7 @@ ENV SUGGESTION_DIR cmd/suggestion/chocolate/v1beta1 RUN apt-get -y update && \ apt-get -y install git && \ if [ "$(uname -m)" = "ppc64le" ] || [ "$(uname -m)" = "aarch64" ]; then \ - apt-get -y install gfortran libopenblas-dev liblapack-dev; \ + apt-get -y install gfortran libopenblas-dev liblapack-dev g++; \ fi && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* diff --git a/examples/v1beta1/kind-cluster/deploy.sh b/examples/v1beta1/kind-cluster/deploy.sh index c8ee65be4eb..fea4e1c9ea0 100755 --- a/examples/v1beta1/kind-cluster/deploy.sh +++ b/examples/v1beta1/kind-cluster/deploy.sh @@ -36,8 +36,8 @@ if [ -z "$(command -v kubectl)" ]; then exit 1 fi -# Step 1. Create Kind cluster with Kubernetes v1.22.9 -kind create cluster --image kindest/node:v1.22.9 +# Step 1. Create Kind cluster with Kubernetes v1.23.6 +kind create cluster --image kindest/node:v1.23.6 echo -e "\nKind cluster has been created\n" # Step 2. Set context for kubectl @@ -53,6 +53,12 @@ kubectl get nodes echo -e "\nDeploying Katib components\n" kubectl apply -k "github.com/kubeflow/katib.git/manifests/v1beta1/installs/katib-standalone?ref=master" +# If the local machine's CPU architecture is arm64, rewrite mysql image. +if [ "$(uname -m)" = "arm64" ]; then + kubectl patch deployments -n kubeflow katib-mysql --type json -p \ + '[{"op": "replace", "path": "/spec/template/spec/containers/0/image", "value": "arm64v8/mysql:8.0.29-oracle"}]' +fi + # Wait until all Katib pods are running. kubectl wait --for=condition=ready --timeout=${TIMEOUT} -l "katib.kubeflow.org/component in (controller,db-manager,mysql,ui)" -n kubeflow pod diff --git a/examples/v1beta1/trial-images/enas-cnn-cifar10/Dockerfile.cpu b/examples/v1beta1/trial-images/enas-cnn-cifar10/Dockerfile.cpu index 31daea12858..4b693640e7c 100644 --- a/examples/v1beta1/trial-images/enas-cnn-cifar10/Dockerfile.cpu +++ b/examples/v1beta1/trial-images/enas-cnn-cifar10/Dockerfile.cpu @@ -7,8 +7,14 @@ WORKDIR ${TARGET_DIR} ENV PYTHONPATH ${TARGET_DIR} +RUN if [ "$(uname -m)" = "aarch64" ]; then \ + apt-get -y update && \ + apt-get -y install gfortran libpcre3 libpcre3-dev && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/*; \ + fi + RUN pip install --no-cache-dir -r requirements.txt -RUN pip install --no-cache-dir tensorflow==2.9.1 RUN chgrp -R 0 ${TARGET_DIR} \ && chmod -R g+rwX ${TARGET_DIR} diff --git a/examples/v1beta1/trial-images/enas-cnn-cifar10/Dockerfile.gpu b/examples/v1beta1/trial-images/enas-cnn-cifar10/Dockerfile.gpu index 717593a5acf..fb37258ea8a 100644 --- a/examples/v1beta1/trial-images/enas-cnn-cifar10/Dockerfile.gpu +++ b/examples/v1beta1/trial-images/enas-cnn-cifar10/Dockerfile.gpu @@ -7,7 +7,7 @@ WORKDIR ${TARGET_DIR} ENV PYTHONPATH ${TARGET_DIR} -RUN pip install --no-cache-dir -r requirements.txt +RUN pip install --no-cache-dir scipy==1.8.1 RUN chgrp -R 0 ${TARGET_DIR} \ && chmod -R g+rwX ${TARGET_DIR} diff --git a/examples/v1beta1/trial-images/enas-cnn-cifar10/requirements.txt b/examples/v1beta1/trial-images/enas-cnn-cifar10/requirements.txt index 497c40a9811..9ee2b9ff0b2 100644 --- a/examples/v1beta1/trial-images/enas-cnn-cifar10/requirements.txt +++ b/examples/v1beta1/trial-images/enas-cnn-cifar10/requirements.txt @@ -1 +1,3 @@ scipy>=1.7.2 +tensorflow==2.9.1; platform_machine=="x86_64" +tensorflow-aarch64==2.9.1; platform_machine=="aarch64" diff --git a/examples/v1beta1/trial-images/tf-mnist-with-summaries/Dockerfile b/examples/v1beta1/trial-images/tf-mnist-with-summaries/Dockerfile index ec50836c852..74919c6ad09 100644 --- a/examples/v1beta1/trial-images/tf-mnist-with-summaries/Dockerfile +++ b/examples/v1beta1/trial-images/tf-mnist-with-summaries/Dockerfile @@ -3,7 +3,14 @@ FROM python:3.9-slim ADD examples/v1beta1/trial-images/tf-mnist-with-summaries /opt/tf-mnist-with-summaries WORKDIR /opt/tf-mnist-with-summaries -RUN pip install --no-cache-dir tensorflow==2.9.1 +RUN if [ "$(uname -m)" = "aarch64" ]; then \ + apt-get -y update && \ + apt-get -y install gfortran libpcre3 libpcre3-dev && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/*; \ + fi + +RUN pip install --no-cache-dir -r requirements.txt RUN chgrp -R 0 /opt/tf-mnist-with-summaries \ && chmod -R g+rwX /opt/tf-mnist-with-summaries diff --git a/examples/v1beta1/trial-images/tf-mnist-with-summaries/requirements.txt b/examples/v1beta1/trial-images/tf-mnist-with-summaries/requirements.txt new file mode 100644 index 00000000000..1ae49a9e4ac --- /dev/null +++ b/examples/v1beta1/trial-images/tf-mnist-with-summaries/requirements.txt @@ -0,0 +1,2 @@ +tensorflow==2.9.1; platform_machine=="x86_64" +tensorflow-aarch64==2.9.1; platform_machine=="aarch64" diff --git a/scripts/v1beta1/build.sh b/scripts/v1beta1/build.sh index 25a48e21206..88474a842c3 100755 --- a/scripts/v1beta1/build.sh +++ b/scripts/v1beta1/build.sh @@ -112,32 +112,33 @@ echo -e "\nBuilding median stopping rule...\n" docker build --platform "linux/$ARCH" -t "${REGISTRY}/earlystopping-medianstop:${TAG}" -f ${CMD_PREFIX}/earlystopping/medianstop/${VERSION}/Dockerfile . # Training container images +echo -e "\nBuilding training container images..." + if [ ! "$ARCH" = "amd64" ]; then - echo -e "\nTraining container images are supported only amd64." + echo -e "\nSome training container images are supported only amd64." else - echo -e "\nBuilding training container images..." - echo -e "\nBuilding mxnet mnist training container example...\n" docker build --platform linux/amd64 -t "${REGISTRY}/mxnet-mnist:${TAG}" -f examples/${VERSION}/trial-images/mxnet-mnist/Dockerfile . - echo -e "\nBuilding Tensorflow with summaries mnist training container example...\n" - docker build --platform linux/amd64 -t "${REGISTRY}/tf-mnist-with-summaries:${TAG}" -f examples/${VERSION}/trial-images/tf-mnist-with-summaries/Dockerfile . - echo -e "\nBuilding PyTorch mnist training container example...\n" docker build --platform linux/amd64 -t "${REGISTRY}/pytorch-mnist:${TAG}" -f examples/${VERSION}/trial-images/pytorch-mnist/Dockerfile . echo -e "\nBuilding Keras CIFAR-10 CNN training container example for ENAS with GPU support...\n" docker build --platform linux/amd64 -t "${REGISTRY}/enas-cnn-cifar10-gpu:${TAG}" -f examples/${VERSION}/trial-images/enas-cnn-cifar10/Dockerfile.gpu . - echo -e "\nBuilding Keras CIFAR-10 CNN training container example for ENAS with CPU support...\n" - docker build --platform linux/amd64 -t "${REGISTRY}/enas-cnn-cifar10-cpu:${TAG}" -f examples/${VERSION}/trial-images/enas-cnn-cifar10/Dockerfile.cpu . - echo -e "\nBuilding PyTorch CIFAR-10 CNN training container example for DARTS with CPU support...\n" docker build --platform linux/amd64 -t "${REGISTRY}/darts-cnn-cifar10-cpu:${TAG}" -f examples/${VERSION}/trial-images/darts-cnn-cifar10/Dockerfile.cpu . echo -e "\nBuilding PyTorch CIFAR-10 CNN training container example for DARTS with GPU support...\n" docker build --platform linux/amd64 -t "${REGISTRY}/darts-cnn-cifar10-gpu:${TAG}" -f examples/${VERSION}/trial-images/darts-cnn-cifar10/Dockerfile.gpu . + fi +echo -e "\nBuilding Tensorflow with summaries mnist training container example...\n" +docker build --platform "linux/$ARCH" -t "${REGISTRY}/tf-mnist-with-summaries:${TAG}" -f examples/${VERSION}/trial-images/tf-mnist-with-summaries/Dockerfile . + +echo -e "\nBuilding Keras CIFAR-10 CNN training container example for ENAS with CPU support...\n" +docker build --platform "linux/$ARCH" -t "${REGISTRY}/enas-cnn-cifar10-cpu:${TAG}" -f examples/${VERSION}/trial-images/enas-cnn-cifar10/Dockerfile.cpu . + echo -e "\nAll Katib images with ${TAG} tag have been built successfully!\n"