feat: Enable Horizontal Pod Autoscaling for ServingRuntime/ClusterSer…

…vingRuntime (#342) Enable Horizontal Pod Autoscaling for ServingRuntime/ClusterServingRuntime by adding annotation `serving.kserve.io/autoscalerClass: hpa` - Add auto-scaling, HPA controller - Add ServingRuntime Webhook - Update deployment manifests - Add script to generate self-signed certificate - Add option to enable self-signed certificate to install script - Add deploy-release-dev-mode-fvt target to Makefile - Add FVT and unit tests - Upgrade FVT minikube version from 1.25 to 1.27 - Enabe FVT deployment on OpenShift (etcd --data-dir) - Update Docs Resolves #329 Signed-off-by: Jooho Lee <ljhiyh@gmail.com>
kserve · Apr 27, 2023 · dd0229f · dd0229f
1 parent e9d9366
commit dd0229f
Show file tree

Hide file tree

Showing 49 changed files with 1,994 additions and 120 deletions.
diff --git a/.github/workflows/run-fvt.yml b/.github/workflows/run-fvt.yml
@@ -20,14 +20,16 @@ jobs:
       - uses: actions/checkout@v2
       - uses: actions/setup-go@v2
         with:
-          go-version: '1.18.7'
-      - name: Setup Minikube
-        run: |
-          wget --no-verbose https://github.com/kubernetes/minikube/releases/download/v1.25.1/minikube-linux-amd64
-          sudo cp minikube-linux-amd64 /usr/local/bin/minikube
-          sudo chmod 755 /usr/local/bin/minikube
-          sudo apt-get install -y conntrack socat
-          minikube start --driver=none --kubernetes-version v1.22.10
+          go-version: '1.18.7'            
+      - name: Start Minikube
+        uses: medyagh/setup-minikube@v0.0.11
+        id: minikube
+        with:
+          minikube-version: 1.27.1
+          container-runtime: docker
+          kubernetes-version: v1.25.2
+          cpus: max
+          memory: max
       - name: Check pods
         run: |
           sleep 30
@@ -55,6 +57,7 @@ jobs:
           echo -e '\n  disabled: true' >> config/runtimes/torchserve-0.x.yaml
       - name: Build Controller image
         run: |
+          eval $(minikube -p minikube docker-env)
           make build.develop
           ./scripts/build_docker.sh --target runtime --tag ${{ env.IMAGE_TAG }}
       - name: Install ModelMesh Serving
@@ -63,12 +66,14 @@ jobs:
           ./scripts/install.sh --namespace modelmesh-serving --fvt --dev-mode-logging
       - name: Free up disk space
         run: |
+          eval $(minikube -p minikube docker-env)
           echo "Pruning images"
           docker image prune -a -f
           docker system df
           df -h
       - name: Pre-pull runtime images
         run: |
+          eval $(minikube -p minikube docker-env)
           docker pull nvcr.io/nvidia/tritonserver:21.06.1-py3
           docker pull seldonio/mlserver:0.5.2
           docker pull openvino/model_server:2022.2
@@ -78,6 +83,7 @@ jobs:
           docker pull kserve/modelmesh
       - name: Check installation
         run: |
+          eval $(minikube -p minikube docker-env)
           docker images
           kubectl get pods
           kubectl get clusterservingruntimes

diff --git a/.gitignore b/.gitignore
@@ -3,6 +3,7 @@
 
 public/
 target/
+vendor/
 
 # Binaries for programs and plugins
 *.exe

diff --git a/Dockerfile.develop b/Dockerfile.develop
@@ -40,6 +40,7 @@ RUN microdnf install \
     tar \
     vim \
     git \
+    jq \
     python38 \
     nodejs && \
     pip3 install pre-commit && \

diff --git a/Makefile b/Makefile
@@ -47,7 +47,8 @@ test:
 
 # Run fvt tests. This requires an etcd, kubernetes connection, and model serving installation. Ginkgo CLI is used to run them in parallel
 fvt:
-	ginkgo -v -procs=2 --progress --fail-fast fvt/predictor fvt/scaleToZero fvt/storage --timeout=50m
+	ginkgo -v -procs=2 --progress --fail-fast fvt/predictor fvt/scaleToZero fvt/storage fvt/hpa --timeout=50m
+
 
 # Command to regenerate the grpc go files from the proto files
 fvt-protoc:
@@ -87,6 +88,15 @@ deploy-release:
 deploy-release-dev-mode:
 	./scripts/install.sh --namespace ${NAMESPACE} --install-config-path config --dev-mode-logging
 
+deploy-release-dev-mode-fvt:
+ifdef MODELMESH_SERVING_IMAGE
+	$(eval extra_options += --modelmesh-serving-image ${MODELMESH_SERVING_IMAGE}) 
+endif
+ifdef NAMESPACE_SCOPE_MODE
+	$(eval extra_options += --namespace-scope-mode) 
+endif 
+	./scripts/install.sh --namespace ${NAMESPACE} --install-config-path config --dev-mode-logging --fvt ${extra_options}
+
 delete: oc-login
 	./scripts/delete.sh --namespace ${NAMESPACE} --local-config-path config
 

diff --git a/apis/serving/v1alpha1/servingruntime_webhook.go b/apis/serving/v1alpha1/servingruntime_webhook.go
@@ -0,0 +1,216 @@
+//Copyright 2021 IBM Corporation
+//
+//Licensed under the Apache License, Version 2.0 (the "License");
+//you may not use this file except in compliance with the License.
+//You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+//Unless required by applicable law or agreed to in writing, software
+//distributed under the License is distributed on an "AS IS" BASIS,
+//WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//See the License for the specific language governing permissions and
+//limitations under the License.
+//
+package v1alpha1
+
+import (
+	"context"
+	"fmt"
+	"math"
+	"net/http"
+	"strconv"
+
+	kservev1alpha "github.com/kserve/kserve/pkg/apis/serving/v1alpha1"
+	"github.com/kserve/kserve/pkg/constants"
+	"github.com/kserve/modelmesh-serving/controllers/autoscaler"
+	mmcontstant "github.com/kserve/modelmesh-serving/pkg/constants"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
+)
+
+//+kubebuilder:webhook:path=/validate-serving-modelmesh-io-v1alpha1-servingruntime,mutating=false,failurePolicy=fail,sideEffects=None,groups=serving.kserve.io,resources=servingruntimes;clusterservingruntimes,verbs=create;update,versions=v1alpha1,name=servingruntime.modelmesh-webhook-server.default,admissionReviewVersions=v1
+type ServingRuntimeWebhook struct {
+	Client  client.Client
+	decoder *admission.Decoder
+}
+
+func (s *ServingRuntimeWebhook) Handle(ctx context.Context, req admission.Request) admission.Response {
+	var srAnnotations map[string]string
+	srReplicas := uint16(math.MaxUint16)
+	multiModel := false
+
+	if req.Kind.Kind == "ServingRuntime" {
+		servingRuntime := &kservev1alpha.ServingRuntime{}
+		err := s.decoder.Decode(req, servingRuntime)
+		if err != nil {
+			return admission.Errored(http.StatusBadRequest, err)
+		}
+		srAnnotations = servingRuntime.ObjectMeta.Annotations
+
+		if (*servingRuntime).Spec.Replicas != nil {
+			srReplicas = uint16(*servingRuntime.Spec.Replicas)
+		}
+
+		if (*servingRuntime).Spec.MultiModel != nil {
+			multiModel = *servingRuntime.Spec.MultiModel
+		}
+
+	} else {
+		clusterServingRuntime := &kservev1alpha.ClusterServingRuntime{}
+		err := s.decoder.Decode(req, clusterServingRuntime)
+		if err != nil {
+			return admission.Errored(http.StatusBadRequest, err)
+		}
+		srAnnotations = clusterServingRuntime.ObjectMeta.Annotations
+
+		if (*clusterServingRuntime).Spec.Replicas != nil {
+			srReplicas = uint16(*clusterServingRuntime.Spec.Replicas)
+		}
+
+		if (*clusterServingRuntime).Spec.MultiModel != nil {
+			multiModel = *clusterServingRuntime.Spec.MultiModel
+		}
+	}
+
+	if !multiModel {
+		return admission.Allowed("Not validating ServingRuntime because it is not ModelMesh compatible")
+	}
+
+	if err := validateServingRuntimeAutoscaler(srAnnotations); err != nil {
+		return admission.Denied(err.Error())
+	}
+
+	if err := validateAutoscalerTargetUtilizationPercentage(srAnnotations); err != nil {
+		return admission.Denied(err.Error())
+	}
+
+	if err := validateAutoScalingReplicas(srAnnotations, srReplicas); err != nil {
+		return admission.Denied(err.Error())
+	}
+
+	return admission.Allowed("Passed all validation checks for ServingRuntime")
+}
+
+// InjectDecoder injects the decoder.
+func (s *ServingRuntimeWebhook) InjectDecoder(d *admission.Decoder) error {
+	s.decoder = d
+	return nil
+}
+
+// Validation of servingruntime autoscaler class
+func validateServingRuntimeAutoscaler(annotations map[string]string) error {
+	value, ok := annotations[constants.AutoscalerClass]
+	class := constants.AutoscalerClassType(value)
+	if ok {
+		for _, item := range constants.AutoscalerAllowedClassList {
+			if class == item {
+				switch class {
+				case constants.AutoscalerClassHPA:
+					if metric, ok := annotations[constants.AutoscalerMetrics]; ok {
+						return validateHPAMetrics(constants.AutoscalerMetricsType(metric))
+					} else {
+						return nil
+					}
+				default:
+					return fmt.Errorf("unknown autoscaler class [%s]", class)
+				}
+			}
+		}
+		return fmt.Errorf("[%s] is not a supported autoscaler class type.\n", value)
+	}
+
+	return nil
+}
+
+// Validate of autoscaler targetUtilizationPercentage
+func validateAutoscalerTargetUtilizationPercentage(annotations map[string]string) error {
+	if value, ok := annotations[constants.TargetUtilizationPercentage]; ok {
+		t, err := strconv.Atoi(value)
+		if err != nil {
+			return fmt.Errorf("The target utilization percentage should be a [1-100] integer.")
+		} else {
+			if t < 1 || t > 100 {
+				return fmt.Errorf("The target utilization percentage should be a [1-100] integer.")
+			}
+		}
+	}
+
+	return nil
+}
+
+// Validate scaling options
+func validateAutoScalingReplicas(annotations map[string]string, srReplicas uint16) error {
+	autoscalerClassType := autoscaler.AutoscalerClassNone
+	if value, ok := annotations[constants.AutoscalerClass]; ok {
+		autoscalerClassType = value
+	}
+
+	switch autoscalerClassType {
+	case string(constants.AutoscalerClassHPA):
+		if srReplicas != math.MaxUint16 {
+			return fmt.Errorf("Autoscaler is enabled and also replicas variable set. You can not set both.")
+		}
+		return validateScalingHPA(annotations)
+	default:
+		return nil
+	}
+}
+
+func validateScalingHPA(annotations map[string]string) error {
+	metric := constants.AutoScalerMetricsCPU
+	if value, ok := annotations[constants.AutoscalerMetrics]; ok {
+		metric = constants.AutoscalerMetricsType(value)
+	}
+
+	minReplicas := 1
+	if value, ok := annotations[mmcontstant.MinScaleAnnotationKey]; ok {
+		if valueInt, err := strconv.Atoi(value); err != nil {
+			return fmt.Errorf("The min replicas should be a integer.")
+		} else if valueInt < 1 {
+			return fmt.Errorf("The min replicas should be more than 0")
+		} else {
+			minReplicas = valueInt
+		}
+	}
+
+	maxReplicas := 1
+	if value, ok := annotations[mmcontstant.MaxScaleAnnotationKey]; ok {
+		if valueInt, err := strconv.Atoi(value); err != nil {
+			return fmt.Errorf("The max replicas should be a integer.")
+		} else {
+			maxReplicas = valueInt
+		}
+	}
+
+	if minReplicas > maxReplicas {
+		return fmt.Errorf("The max replicas should be same or bigger than min replicas.")
+	}
+
+	err := validateHPAMetrics(metric)
+	if err != nil {
+		return err
+	}
+
+	if value, ok := annotations[constants.TargetUtilizationPercentage]; ok {
+		t, err := strconv.Atoi(value)
+		if err != nil {
+			return fmt.Errorf("The target utilization percentage should be a [1-100] integer.")
+		} else if metric == constants.AutoScalerMetricsMemory && t < 1 {
+			return fmt.Errorf("The target memory should be greater than 1 MiB")
+		}
+	}
+
+	return nil
+}
+
+// Validate of autoscaler HPA metrics
+func validateHPAMetrics(metric constants.AutoscalerMetricsType) error {
+	for _, item := range constants.AutoscalerAllowedMetricsList {
+		if item == metric {
+			return nil
+		}
+	}
+	return fmt.Errorf("[%s] is not a supported metric.\n", metric)
+
+}
-Original file line number
+Diff line change
@@ Expand Up / @@ -3,6 +3,7 @@ @@
     public/
     target/
+    vendor/
     # Binaries for programs and plugins
     *.exe
@@ Expand Down @@