add HPA feature/unit/fvt/docs/script

Update run-fvt.yaml to support 1.25 kubernetes(hpa v2) Signed-off-by: jooho <jlee@redhat.com>
kserve · Mar 10, 2023 · 0706eb8 · 0706eb8
1 parent f794ffc
commit 0706eb8
Show file tree

Hide file tree

Showing 40 changed files with 1,924 additions and 77 deletions.
diff --git a/.github/workflows/run-fvt.yml b/.github/workflows/run-fvt.yml
@@ -20,14 +20,16 @@ jobs:
       - uses: actions/checkout@v2
       - uses: actions/setup-go@v2
         with:
-          go-version: '1.18.7'
-      - name: Setup Minikube
-        run: |
-          wget --no-verbose https://github.com/kubernetes/minikube/releases/download/v1.25.1/minikube-linux-amd64
-          sudo cp minikube-linux-amd64 /usr/local/bin/minikube
-          sudo chmod 755 /usr/local/bin/minikube
-          sudo apt-get install -y conntrack socat
-          minikube start --driver=none --kubernetes-version v1.22.10
+          go-version: '1.18.7'            
+      - name: Start Minikube
+        uses: medyagh/setup-minikube@latest
+        id: minikube
+        with:
+          minikube-version: 1.27.1
+          container-runtime: docker
+          kubernetes-version: v1.25.2
+          cpus: max
+          memory: max
       - name: Check pods
         run: |
           sleep 30
@@ -55,20 +57,26 @@ jobs:
           echo -e '\n  disabled: true' >> config/runtimes/torchserve-0.x.yaml
       - name: Build Controller image
         run: |
+          eval $(minikube -p minikube docker-env)
           make build.develop
           ./scripts/build_docker.sh --target runtime --tag ${{ env.IMAGE_TAG }}
+          docker images
       - name: Install ModelMesh Serving
         run: |
+          docker images
           kubectl create ns modelmesh-serving
           ./scripts/install.sh --namespace modelmesh-serving --fvt --dev-mode-logging
       - name: Free up disk space
         run: |
+          docker images
           echo "Pruning images"
           docker image prune -a -f
           docker system df
           df -h
+          docker images
       - name: Pre-pull runtime images
         run: |
+          eval $(minikube -p minikube docker-env)
           docker pull nvcr.io/nvidia/tritonserver:21.06.1-py3
           docker pull seldonio/mlserver:0.5.2
           docker pull openvino/model_server:2022.1
@@ -86,4 +94,4 @@ jobs:
           export PATH=/root/go/bin/:$PATH
           export NAMESPACE=modelmesh-serving
           export NAMESPACESCOPEMODE=false
-          make fvt
+          make fvt
diff --git a/.gitignore b/.gitignore
@@ -3,6 +3,7 @@
 
 public/
 target/
+vendor/
 
 # Binaries for programs and plugins
 *.exe
@@ -32,4 +33,4 @@ bin
 # Modelmesh development related artifacts
 devbuild
 .develop_image_name
-.dev/
+.dev/
diff --git a/Makefile b/Makefile
@@ -47,7 +47,7 @@ test:
 
 # Run fvt tests. This requires an etcd, kubernetes connection, and model serving installation. Ginkgo CLI is used to run them in parallel
 fvt:
-	ginkgo -v -p -progress --fail-fast fvt/predictor fvt/scaleToZero --timeout=40m
+	ginkgo -v -p -progress --fail-fast fvt/predictor fvt/scaleToZero fvt/hpa --timeout=50m
 
 # Command to regenerate the grpc go files from the proto files
 fvt-protoc:
@@ -87,6 +87,15 @@ deploy-release:
 deploy-release-dev-mode:
 	./scripts/install.sh --namespace ${NAMESPACE} --install-config-path config --dev-mode-logging
 
+deploy-release-dev-mode-fvt:
+ifdef MODELMESH_SERVING_IMAGE
+	$(eval extra_options += --modelmesh-serving-image ${MODELMESH_SERVING_IMAGE}) 
+endif
+ifdef NAMESPACE_SCOPE_MODE
+	$(eval extra_options += --namespace-scope-mode) 
+endif 
+	./scripts/install.sh --namespace ${NAMESPACE} --install-config-path config --dev-mode-logging --fvt ${extra_options}
+
 delete: oc-login
 	./scripts/delete.sh --namespace ${NAMESPACE} --local-config-path config
 

diff --git a/apis/serving/v1alpha1/servingruntime_webhook.go b/apis/serving/v1alpha1/servingruntime_webhook.go
@@ -0,0 +1,189 @@
+/*
+Copyright 2021 IBM Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+package v1alpha1
+
+import (
+	"context"
+	"fmt"
+	"net/http"
+	"strconv"
+
+	kservev1alpha "github.com/kserve/kserve/pkg/apis/serving/v1alpha1"
+	"github.com/kserve/kserve/pkg/constants"
+	"github.com/kserve/modelmesh-serving/controllers/autoscaler"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
+)
+
+//+kubebuilder:webhook:path=/validate-serving-modelmesh-io-v1alpha1-servingruntime,mutating=false,failurePolicy=fail,sideEffects=None,groups=serving.kserve.io,resources=servingruntimes;clusterservingruntimes,verbs=create;update,versions=v1alpha1,name=servingruntime.modelmesh-webhook-server.default,admissionReviewVersions=v1
+type ServingRuntimeWebhook struct {
+	Client  client.Client
+	decoder *admission.Decoder
+}
+
+func (s *ServingRuntimeWebhook) Handle(ctx context.Context, req admission.Request) admission.Response {
+	var srAnnotations map[string]string
+
+	if req.Kind.Kind == "ServingRuntime" {
+		servingRuntime := &kservev1alpha.ServingRuntime{}
+		err := s.decoder.Decode(req, servingRuntime)
+		if err != nil {
+			return admission.Errored(http.StatusBadRequest, err)
+		}
+		srAnnotations = servingRuntime.ObjectMeta.Annotations
+	} else {
+		clusterServingRuntime := &kservev1alpha.ClusterServingRuntime{}
+		err := s.decoder.Decode(req, clusterServingRuntime)
+		if err != nil {
+			return admission.Errored(http.StatusBadRequest, err)
+		}
+		srAnnotations = clusterServingRuntime.ObjectMeta.Annotations
+	}
+
+	if err := validateServingRuntimeAutoscaler(srAnnotations); err != nil {
+		return admission.Denied(err.Error())
+	}
+
+	if err := validateAutoscalerTargetUtilizationPercentage(srAnnotations); err != nil {
+		return admission.Denied(err.Error())
+	}
+
+	if err := validateAutoScalingReplicas(srAnnotations); err != nil {
+		return admission.Denied(err.Error())
+	}
+
+	return admission.Allowed("Passed all validation checks for ServingRuntime")
+}
+
+// InjectDecoder injects the decoder.
+func (s *ServingRuntimeWebhook) InjectDecoder(d *admission.Decoder) error {
+	s.decoder = d
+	return nil
+}
+
+// Validation of servingruntime autoscaler class
+func validateServingRuntimeAutoscaler(annotations map[string]string) error {
+	value, ok := annotations[constants.AutoscalerClass]
+	class := constants.AutoscalerClassType(value)
+	if ok {
+		for _, item := range constants.AutoscalerAllowedClassList {
+			if class == item {
+				switch class {
+				case constants.AutoscalerClassHPA:
+					if metric, ok := annotations[constants.AutoscalerMetrics]; ok {
+						return validateHPAMetrics(constants.AutoscalerMetricsType(metric))
+					} else {
+						return nil
+					}
+				default:
+					return fmt.Errorf("unknown autoscaler class [%s]", class)
+				}
+			}
+		}
+		return fmt.Errorf("[%s] is not a supported autoscaler class type.\n", value)
+	}
+
+	return nil
+}
+
+// Validate of autoscaler targetUtilizationPercentage
+func validateAutoscalerTargetUtilizationPercentage(annotations map[string]string) error {
+	if value, ok := annotations[constants.TargetUtilizationPercentage]; ok {
+		t, err := strconv.Atoi(value)
+		if err != nil {
+			return fmt.Errorf("The target utilization percentage should be a [1-100] integer.")
+		} else {
+			if t < 1 || t > 100 {
+				return fmt.Errorf("The target utilization percentage should be a [1-100] integer.")
+			}
+		}
+	}
+
+	return nil
+}
+
+// Validate scaling options
+func validateAutoScalingReplicas(annotations map[string]string) error {
+	autoscalerClassType := autoscaler.AutoscalerClassNone
+	if value, ok := annotations[constants.AutoscalerClass]; ok {
+		autoscalerClassType = value
+	}
+
+	switch autoscalerClassType {
+	case string(constants.AutoscalerClassHPA):
+		return validateScalingHPA(annotations)
+	default:
+		return nil
+	}
+}
+
+func validateScalingHPA(annotations map[string]string) error {
+	metric := constants.AutoScalerMetricsCPU
+	if value, ok := annotations[constants.AutoscalerMetrics]; ok {
+		metric = constants.AutoscalerMetricsType(value)
+	}
+
+	minReplicas := 1
+	if value, ok := annotations[constants.MinScaleAnnotationKey]; ok {
+		if valueInt, err := strconv.Atoi(value); err != nil {
+			return fmt.Errorf("The min replicas should be a integer.")
+		} else if valueInt < 1 {
+			return fmt.Errorf("The min replicas should be more than 0")
+		} else {
+			minReplicas = valueInt
+		}
+	}
+
+	maxReplicas := 1
+	if value, ok := annotations[constants.MaxScaleAnnotationKey]; ok {
+		if valueInt, err := strconv.Atoi(value); err != nil {
+			return fmt.Errorf("The max replicas should be a integer.")
+		} else {
+			maxReplicas = valueInt
+		}
+	}
+
+	if minReplicas > maxReplicas {
+		return fmt.Errorf("The max replicas should be same or bigger than min replicas.")
+	}
+
+	err := validateHPAMetrics(metric)
+	if err != nil {
+		return err
+	}
+
+	if value, ok := annotations[constants.TargetUtilizationPercentage]; ok {
+		t, err := strconv.Atoi(value)
+		if err != nil {
+			return fmt.Errorf("The target utilization percentage should be a [1-100] integer.")
+		} else if metric == constants.AutoScalerMetricsMemory && t < 1 {
+			return fmt.Errorf("The target memory should be greater than 1 MiB")
+		}
+	}
+
+	return nil
+}
+
+// Validate of autoscaler HPA metrics
+func validateHPAMetrics(metric constants.AutoscalerMetricsType) error {
+	for _, item := range constants.AutoscalerAllowedMetricsList {
+		if item == metric {
+			return nil
+		}
+	}
+	return fmt.Errorf("[%s] is not a supported metric.\n", metric)
+
+}
diff --git a/apis/serving/v1alpha1/servingruntime_webhook_test.go b/apis/serving/v1alpha1/servingruntime_webhook_test.go
@@ -0,0 +1,100 @@
+/*
+Copyright 2021 IBM Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+package v1alpha1
+
+import (
+	// "fmt"
+	"testing"
+
+	"github.com/onsi/gomega"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+
+	kservev1alpha "github.com/kserve/kserve/pkg/apis/serving/v1alpha1"
+	"github.com/kserve/kserve/pkg/constants"
+)
+
+func makeTestRawServingRuntime() kservev1alpha.ServingRuntime {
+	servingRuntime := kservev1alpha.ServingRuntime{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "foo",
+			Namespace: "default",
+			Annotations: map[string]string{
+				"serving.kserve.io/autoscalerClass":             "hpa",
+				"serving.kserve.io/metrics":                     "cpu",
+				"serving.kserve.io/targetUtilizationPercentage": "75",
+				"autoscaling.knative.dev/min-scale":             "2",
+				"autoscaling.knative.dev/max-scale":             "3",
+			},
+		},
+	}
+
+	return servingRuntime
+}
+
+func TestValidAutoscalerTypeAndHPAMetrics(t *testing.T) {
+	g := gomega.NewGomegaWithT(t)
+	sr := makeTestRawServingRuntime()
+	g.Expect(validateServingRuntimeAutoscaler(sr.Annotations)).Should(gomega.Succeed())
+}
+func TestInvalidAutoscalerClassType(t *testing.T) {
+	g := gomega.NewGomegaWithT(t)
+	sr := makeTestRawServingRuntime()
+	sr.ObjectMeta.Annotations[constants.AutoscalerClass] = "test"
+	g.Expect(validateServingRuntimeAutoscaler(sr.Annotations)).ShouldNot(gomega.Succeed())
+}
+
+func TestInvalidAutoscalerTargetUtilizationPercentageLowValue(t *testing.T) {
+	g := gomega.NewGomegaWithT(t)
+	sr := makeTestRawServingRuntime()
+	sr.ObjectMeta.Annotations[constants.TargetUtilizationPercentage] = "-1"
+	g.Expect(validateAutoscalerTargetUtilizationPercentage(sr.Annotations)).ShouldNot(gomega.Succeed())
+}
+
+func TestInvalidAutoscalerTargetUtilizationPercentageHighValue(t *testing.T) {
+	g := gomega.NewGomegaWithT(t)
+	sr := makeTestRawServingRuntime()
+	sr.ObjectMeta.Annotations[constants.TargetUtilizationPercentage] = "101"
+	g.Expect(validateAutoscalerTargetUtilizationPercentage(sr.Annotations)).ShouldNot(gomega.Succeed())
+}
+
+func TestInvalidAutoscalerLowMinReplicas(t *testing.T) {
+	g := gomega.NewGomegaWithT(t)
+	sr := makeTestRawServingRuntime()
+	sr.ObjectMeta.Annotations[constants.MinScaleAnnotationKey] = "0"
+	g.Expect(validateScalingHPA(sr.Annotations)).ShouldNot(gomega.Succeed())
+}
+
+func TestInvalidAutoscalerMaxReplicasMustBiggerThanMixReplicas(t *testing.T) {
+	g := gomega.NewGomegaWithT(t)
+	sr := makeTestRawServingRuntime()
+	sr.ObjectMeta.Annotations[constants.MinScaleAnnotationKey] = "4"
+	sr.ObjectMeta.Annotations[constants.MaxScaleAnnotationKey] = "3"
+	g.Expect(validateAutoScalingReplicas(sr.Annotations)).ShouldNot(gomega.Succeed())
+}
+
+func TestValidAutoscalerMetricsType(t *testing.T) {
+	g := gomega.NewGomegaWithT(t)
+	sr := makeTestRawServingRuntime()
+	sr.ObjectMeta.Annotations[constants.AutoscalerMetrics] = "memory"
+	g.Expect(validateHPAMetrics(constants.AutoscalerMetricsType("memory"))).Should(gomega.Succeed())
+}
+
+func TestInvalidAutoscalerMetricsType(t *testing.T) {
+	g := gomega.NewGomegaWithT(t)
+	sr := makeTestRawServingRuntime()
+	sr.ObjectMeta.Annotations[constants.AutoscalerMetrics] = "conccurrency"
+	g.Expect(validateHPAMetrics(constants.AutoscalerMetricsType("conccurrency"))).ShouldNot(gomega.Succeed())
+}