Skip to content

Commit

Permalink
Upgrade PyTorch version to v1.13.0 (#2082)
Browse files Browse the repository at this point in the history
* Upgrade PyTorch version to v1.13.0

Signed-off-by: Yuki Iwai <yuki.iwai.tz@gmail.com>

* Build container images using minikube in E2E tests

Signed-off-by: Yuki Iwai <yuki.iwai.tz@gmail.com>

Signed-off-by: Yuki Iwai <yuki.iwai.tz@gmail.com>
  • Loading branch information
tenzen-y authored Jan 17, 2023
1 parent 6bcbd25 commit 0749265
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 35 deletions.
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Pytorch=1.11.0, cuda=11.6.0
# Ref: https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel_22-08.html#rel_22-08
FROM nvcr.io/nvidia/pytorch:22.02-py3
# We need to use the nvcr.io/nvidia/pytorch image as a base image to support both linux/amd64 and linux_arm64 platforms.
# PyTorch=1.13.0, cuda=11.8.0
# Ref: https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-22-11.html#rel-22-11
FROM nvcr.io/nvidia/pytorch:22.11-py3

ENV TARGET_DIR /opt/darts-cnn-cifar10

Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
torch==1.11.0
torchvision==0.12.0
torch==1.13.1
torchvision==0.14.1
Pillow>=9.1.1
7 changes: 4 additions & 3 deletions examples/v1beta1/trial-images/pytorch-mnist/Dockerfile.gpu
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Pytorch=1.11.0, cuda=11.6.0
# Ref: https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel_22-08.html#rel_22-08
FROM nvcr.io/nvidia/pytorch:22.02-py3
# We need to use the nvcr.io/nvidia/pytorch image as a base image to support both linux/amd64 and linux_arm64 platforms.
# PyTorch=1.13.0, cuda=11.8.0
# Ref: https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-22-11.html#rel-22-11
FROM nvcr.io/nvidia/pytorch:22.11-py3

ADD examples/v1beta1/trial-images/pytorch-mnist /opt/pytorch-mnist

Expand Down
4 changes: 2 additions & 2 deletions examples/v1beta1/trial-images/pytorch-mnist/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cloudml-hypertune==0.1.0.dev6
torch==1.11.0
torchvision==0.12.0
torch==1.13.1
torchvision==0.14.1
Pillow>=9.1.1
37 changes: 12 additions & 25 deletions test/e2e/v1beta1/scripts/gh-actions/build-load.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,10 @@
set -o errexit
set -o pipefail
set -o nounset
cd "$(dirname "$0")"

pushd .
cd "$(dirname "$0")/../../../../.."
trap popd EXIT

TRIAL_IMAGES=${1:-""}
EXPERIMENTS=${2:-""}
Expand Down Expand Up @@ -48,14 +51,7 @@ _build_containers() {
done

echo -e "\nBuilding $CONTAINER_NAME image with $DOCKERFILE...\n"
docker buildx build --platform "$(uname -m)" --load -t "$REGISTRY/$CONTAINER_NAME:$TAG" -f "../../../../../$DOCKERFILE" ../../../../../
}

_load_minikube_cluster() {
CONTAINER_NAME=${1:-"katib-controller"}

echo -e "\n\nLoading $CONTAINER_NAME image...\n\n"
minikube image load "$REGISTRY/$CONTAINER_NAME:$TAG"
DOCKER_BUILDKIT=1 minikube image build --build-opt platform=linux/amd64 --all -t "$REGISTRY/$CONTAINER_NAME:$TAG" -f "$DOCKERFILE" .
}

_install_tools() {
Expand All @@ -66,11 +62,6 @@ _install_tools() {
fi
}

cleanup_build_cache() {
echo -e "\nCleanup Build Cache...\n"
docker builder prune
}

run() {
CONTAINER_NAME=${1:-"katib-controller"}
DOCKERFILE=${2:-"$CMD_PREFIX/katib-controller/$VERSION/Dockerfile"}
Expand All @@ -85,10 +76,10 @@ run() {
# Search for Suggestion Images required for Trial.
for exp_name in "${EXPERIMENT_ARRAY[@]}"; do

exp_path=$(find ../../../../../examples/v1beta1 -name "${exp_name}.yaml")
exp_path=$(find examples/v1beta1 -name "${exp_name}.yaml")
algorithm_name="$(yq eval '.spec.algorithm.algorithmName' "$exp_path")"

suggestion_image_name="$(yq eval '.data.suggestion' ../../../../../manifests/v1beta1/components/controller/katib-config.yaml |
suggestion_image_name="$(yq eval '.data.suggestion' manifests/v1beta1/components/controller/katib-config.yaml |
algorithm_name=$algorithm_name yq eval '.[env(algorithm_name)].image' | cut -d: -f1)"
suggestion_name="$(basename "$suggestion_image_name")"

Expand All @@ -99,7 +90,6 @@ run() {
for s in "${suggestions[@]}"; do
if [ "$s" == "$CONTAINER_NAME" ]; then
_build_containers "$CONTAINER_NAME" "$DOCKERFILE"
_load_minikube_cluster "$CONTAINER_NAME"
break
fi
done
Expand All @@ -112,10 +102,10 @@ run() {
# Search for EarlyStopping Images required for Trial.
for exp_name in "${EXPERIMENT_ARRAY[@]}"; do

exp_path=$(find ../../../../../examples/v1beta1 -name "${exp_name}.yaml")
exp_path=$(find examples/v1beta1 -name "${exp_name}.yaml")
algorithm_name="$(yq eval '.spec.earlyStopping.algorithmName' "$exp_path")"

earlystopping_image_name="$(yq eval '.data.early-stopping' ../../../../../manifests/v1beta1/components/controller/katib-config.yaml |
earlystopping_image_name="$(yq eval '.data.early-stopping' manifests/v1beta1/components/controller/katib-config.yaml |
algorithm_name=$algorithm_name yq eval '.[env(algorithm_name)].image' | cut -d: -f1)"
earlystopping_name="$(basename "$earlystopping_image_name")"

Expand All @@ -126,15 +116,13 @@ run() {
for e in "${earlystoppings[@]}"; do
if [ "$e" == "$CONTAINER_NAME" ]; then
_build_containers "$CONTAINER_NAME" "$DOCKERFILE"
_load_minikube_cluster "$CONTAINER_NAME"
break
fi
done

# Others
else
_build_containers "$CONTAINER_NAME" "$DOCKERFILE"
_load_minikube_cluster "$CONTAINER_NAME"
fi
}

Expand All @@ -153,7 +141,6 @@ fi
run "cert-generator" "$CMD_PREFIX/cert-generator/$VERSION/Dockerfile"
run "file-metrics-collector" "$CMD_PREFIX/metricscollector/$VERSION/file-metricscollector/Dockerfile"
run "tfevent-metrics-collector" "$CMD_PREFIX/metricscollector/$VERSION/tfevent-metricscollector/Dockerfile"
cleanup_build_cache

# Suggestion images
echo -e "\nBuilding suggestion images..."
Expand All @@ -165,18 +152,18 @@ run "suggestion-optuna" "$CMD_PREFIX/suggestion/optuna/$VERSION/Dockerfile"
run "suggestion-pbt" "$CMD_PREFIX/suggestion/pbt/$VERSION/Dockerfile"
run "suggestion-enas" "$CMD_PREFIX/suggestion/nas/enas/$VERSION/Dockerfile"
run "suggestion-darts" "$CMD_PREFIX/suggestion/nas/darts/$VERSION/Dockerfile"
cleanup_build_cache

# Early stopping images
echo -e "\nBuilding early stopping images...\n"
run "earlystopping-medianstop" "$CMD_PREFIX/earlystopping/medianstop/$VERSION/Dockerfile"
cleanup_build_cache

# Training container images
echo -e "\nBuilding training container images..."
for name in "${TRIAL_IMAGE_ARRAY[@]}"; do
run "$name" "examples/$VERSION/trial-images/$name/Dockerfile"
done
cleanup_build_cache

echo -e "\nCleanup Build Cache...\n"
docker buildx prune -f

echo -e "\nAll Katib images with ${TAG} tag have been built successfully!\n"

0 comments on commit 0749265

Please sign in to comment.