diff --git a/test/e2e/v1beta1/scripts/gh-actions/setup-katib.sh b/test/e2e/v1beta1/scripts/gh-actions/setup-katib.sh index 97c322b3db9..3f2da39d2b7 100755 --- a/test/e2e/v1beta1/scripts/gh-actions/setup-katib.sh +++ b/test/e2e/v1beta1/scripts/gh-actions/setup-katib.sh @@ -65,10 +65,42 @@ echo "Deploying Katib" cd ../../../../../ && WITH_DATABASE_TYPE=$WITH_DATABASE_TYPE make deploy && cd - # Wait until all Katib pods is running. +MAX_RETRY_COUNT=24 +COMPONENTS_NUM=3 +if "${DEPLOY_KATIB_UI}"; then + COMPONENTS_NUM=4 +fi +until [ ! "$(kubectl get pods -n kubeflow -l "katib.kubeflow.org/component in ($WITH_DATABASE_TYPE,controller,ui,db-manager)" -oname | wc -l)" = "${COMPONENTS_NUM}" ]; do + echo -e "\nWaiting for katib components to be created" + kubectl get pods -n kubeflow || : + + sleep 5 + MAX_RETRY_COUNT=$((MAX_RETRY_COUNT - 1)) + if [ "${MAX_RETRY_COUNT}" = "0" ]; then + echo "Failed to create katib components" + exit 1 + fi +done + TIMEOUT=120s kubectl wait --for=condition=ready --timeout=${TIMEOUT} -l "katib.kubeflow.org/component in ($WITH_DATABASE_TYPE,controller,db-manager,ui)" -n kubeflow pod || (kubectl get pods -n kubeflow && kubectl describe pods -n kubeflow && exit 1) +MAX_RETRY_COUNT=24 +until [ ! "$(kubectl get pod -n kubeflow -l katib.kubeflow.org/component=controller -ojsonpath='{.items[0].status.conditions[?(@.type=="ContainersReady")].status}')" = "True" ]; do + echo -e "\nWaiting for Katib controller to be ready" + kubectl get pod -n kubeflow -l katib.kubeflow.org/component=controller -ojsonpath='{.items[0].status.conditions[?(@.type=="ContainersReady")].status}' || : + kubectl get pod -n kubeflow -l katib.kubeflow.org/component=controller -ojsonpath='{.items[0].status.conditions}' || : + kubectl get pods -n kubeflow || : + + sleep 5 + MAX_RETRY_COUNT=$((MAX_RETRY_COUNT - 1)) + if [ "${MAX_RETRY_COUNT}" = "0" ]; then + echo "Failed to set up Katib controller" + exit 1 + fi +done + echo "All Katib components are running." echo "Katib deployments" kubectl -n kubeflow get deploy