From acca29733396e7724ee161b1e77db8bf918574f6 Mon Sep 17 00:00:00 2001
From: jooho <jlee@redhat.com>
Date: Wed, 8 Mar 2023 16:47:31 -0500
Subject: [PATCH] add HPA feature/unit/fvt/docs/script Update run-fvt.yaml to
 support 1.25 kubernetes(hpa v2)

Signed-off-by: jooho <jlee@redhat.com>
---
 .github/workflows/run-fvt.yml                 |  22 +-
 .gitignore                                    |   3 +-
 Makefile                                      |  11 +-
 .../v1alpha1/servingruntime_webhook.go        | 189 +++++++++++++
 .../v1alpha1/servingruntime_webhook_test.go   | 100 +++++++
 config/certmanager/certificate.yaml           |   8 +-
 config/default/kustomization.yaml             | 101 +++++--
 config/default/manager_webhook_patch.yaml     |  10 +-
 config/default/webhookcainjection_patch.yaml  |  16 +-
 config/dependencies/fvt.yaml                  |   2 +
 config/manager/manager.yaml                   |   1 +
 config/namespace-runtimes/kustomization.yaml  |   2 +-
 config/rbac/cluster-scope/role.yaml           |  12 +
 config/rbac/common/kustomization.yaml         |   1 +
 config/rbac/common/networkpolicy-webhook.yaml |  29 ++
 config/rbac/namespace-scope/role.yaml         |  12 +
 config/webhook/kustomization.yaml             |  10 +
 config/webhook/kustomizeconfig.yaml           |  18 ++
 config/webhook/manifests.yaml                 |  29 ++
 config/webhook/service.yaml                   |  12 +
 .../autoscaler/autoscaler_reconciler.go       | 131 +++++++++
 .../autoscaler/autoscaler_reconciler_test.go  |  74 ++++++
 controllers/hpa/hpa_reconciler.go             | 195 ++++++++++++++
 controllers/hpa/hpa_reconciler_test.go        | 158 +++++++++++
 controllers/servingruntime_controller.go      |  59 +++-
 docs/developer.md                             |  20 ++
 docs/install/install-script.md                |  10 +-
 docs/production-use/scaling.md                |  32 +++
 docs/quickstart.md                            |  24 +-
 fvt/README.md                                 |   2 +-
 fvt/fvtclient.go                              |  82 ++++++
 fvt/globals.go                                |   1 +
 fvt/helpers.go                                |   1 +
 fvt/hpa/hpa_suite_test.go                     | 118 ++++++++
 fvt/hpa/hpa_test.go                           | 251 ++++++++++++++++++
 fvt/utils.go                                  |   5 +
 go.mod                                        |   2 +-
 main.go                                       |  11 +
 scripts/install.sh                            |  74 +++++-
 scripts/self-signed-ca.sh                     | 153 +++++++++++
 40 files changed, 1914 insertions(+), 77 deletions(-)
 create mode 100644 apis/serving/v1alpha1/servingruntime_webhook.go
 create mode 100644 apis/serving/v1alpha1/servingruntime_webhook_test.go
 create mode 100644 config/rbac/common/networkpolicy-webhook.yaml
 create mode 100644 config/webhook/kustomization.yaml
 create mode 100644 config/webhook/kustomizeconfig.yaml
 create mode 100644 config/webhook/manifests.yaml
 create mode 100644 config/webhook/service.yaml
 create mode 100644 controllers/autoscaler/autoscaler_reconciler.go
 create mode 100644 controllers/autoscaler/autoscaler_reconciler_test.go
 create mode 100644 controllers/hpa/hpa_reconciler.go
 create mode 100644 controllers/hpa/hpa_reconciler_test.go
 create mode 100644 fvt/hpa/hpa_suite_test.go
 create mode 100644 fvt/hpa/hpa_test.go
 create mode 100755 scripts/self-signed-ca.sh

diff --git a/.github/workflows/run-fvt.yml b/.github/workflows/run-fvt.yml
index 65f754bbf..c9b1f71f2 100644
--- a/.github/workflows/run-fvt.yml
+++ b/.github/workflows/run-fvt.yml
@@ -20,14 +20,16 @@ jobs:
       - uses: actions/checkout@v2
       - uses: actions/setup-go@v2
         with:
-          go-version: '1.18.7'
-      - name: Setup Minikube
-        run: |
-          wget --no-verbose https://github.com/kubernetes/minikube/releases/download/v1.25.1/minikube-linux-amd64
-          sudo cp minikube-linux-amd64 /usr/local/bin/minikube
-          sudo chmod 755 /usr/local/bin/minikube
-          sudo apt-get install -y conntrack socat
-          minikube start --driver=none --kubernetes-version v1.22.10
+          go-version: '1.18.7'            
+      - name: Start Minikube
+        uses: medyagh/setup-minikube@latest
+        id: minikube
+        with:
+          minikube-version: 1.27.1
+          container-runtime: docker
+          kubernetes-version: v1.25.2
+          cpus: max
+          memory: max
       - name: Check pods
         run: |
           sleep 30
@@ -55,6 +57,7 @@ jobs:
           echo -e '\n  disabled: true' >> config/runtimes/torchserve-0.x.yaml
       - name: Build Controller image
         run: |
+          eval $(minikube -p minikube docker-env)
           make build.develop
           ./scripts/build_docker.sh --target runtime --tag ${{ env.IMAGE_TAG }}
       - name: Install ModelMesh Serving
@@ -69,6 +72,7 @@ jobs:
           df -h
       - name: Pre-pull runtime images
         run: |
+          eval $(minikube -p minikube docker-env)
           docker pull nvcr.io/nvidia/tritonserver:21.06.1-py3
           docker pull seldonio/mlserver:0.5.2
           docker pull openvino/model_server:2022.1
@@ -86,4 +90,4 @@ jobs:
           export PATH=/root/go/bin/:$PATH
           export NAMESPACE=modelmesh-serving
           export NAMESPACESCOPEMODE=false
-          make fvt
+          make fvt
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 33a31d267..ebda98b64 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@
 
 public/
 target/
+vendor/
 
 # Binaries for programs and plugins
 *.exe
@@ -32,4 +33,4 @@ bin
 # Modelmesh development related artifacts
 devbuild
 .develop_image_name
-.dev/
+.dev/
\ No newline at end of file
diff --git a/Makefile b/Makefile
index 29027257f..4910886c3 100644
--- a/Makefile
+++ b/Makefile
@@ -47,7 +47,7 @@ test:
 
 # Run fvt tests. This requires an etcd, kubernetes connection, and model serving installation. Ginkgo CLI is used to run them in parallel
 fvt:
-	ginkgo -v -p -progress --fail-fast fvt/predictor fvt/scaleToZero --timeout=40m
+	ginkgo -v -p -progress --fail-fast fvt/predictor fvt/scaleToZero fvt/hpa --timeout=50m
 
 # Command to regenerate the grpc go files from the proto files
 fvt-protoc:
@@ -87,6 +87,15 @@ deploy-release:
 deploy-release-dev-mode:
 	./scripts/install.sh --namespace ${NAMESPACE} --install-config-path config --dev-mode-logging
 
+deploy-release-dev-mode-fvt:
+ifdef MODELMESH_SERVING_IMAGE
+	$(eval extra_options += --modelmesh-serving-image ${MODELMESH_SERVING_IMAGE}) 
+endif
+ifdef NAMESPACE_SCOPE_MODE
+	$(eval extra_options += --namespace-scope-mode) 
+endif 
+	./scripts/install.sh --namespace ${NAMESPACE} --install-config-path config --dev-mode-logging --fvt ${extra_options}
+
 delete: oc-login
 	./scripts/delete.sh --namespace ${NAMESPACE} --local-config-path config
 
diff --git a/apis/serving/v1alpha1/servingruntime_webhook.go b/apis/serving/v1alpha1/servingruntime_webhook.go
new file mode 100644
index 000000000..7c16b7ee9
--- /dev/null
+++ b/apis/serving/v1alpha1/servingruntime_webhook.go
@@ -0,0 +1,189 @@
+/*
+Copyright 2021 IBM Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+package v1alpha1
+
+import (
+	"context"
+	"fmt"
+	"net/http"
+	"strconv"
+
+	kservev1alpha "github.com/kserve/kserve/pkg/apis/serving/v1alpha1"
+	"github.com/kserve/kserve/pkg/constants"
+	"github.com/kserve/modelmesh-serving/controllers/autoscaler"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
+)
+
+//+kubebuilder:webhook:path=/validate-serving-modelmesh-io-v1alpha1-servingruntime,mutating=false,failurePolicy=fail,sideEffects=None,groups=serving.kserve.io,resources=servingruntimes;clusterservingruntimes,verbs=create;update,versions=v1alpha1,name=servingruntime.modelmesh-webhook-server.default,admissionReviewVersions=v1
+type ServingRuntimeWebhook struct {
+	Client  client.Client
+	decoder *admission.Decoder
+}
+
+func (s *ServingRuntimeWebhook) Handle(ctx context.Context, req admission.Request) admission.Response {
+	var srAnnotations map[string]string
+
+	if req.Kind.Kind == "ServingRuntime" {
+		servingRuntime := &kservev1alpha.ServingRuntime{}
+		err := s.decoder.Decode(req, servingRuntime)
+		if err != nil {
+			return admission.Errored(http.StatusBadRequest, err)
+		}
+		srAnnotations = servingRuntime.ObjectMeta.Annotations
+	} else {
+		clusterServingRuntime := &kservev1alpha.ClusterServingRuntime{}
+		err := s.decoder.Decode(req, clusterServingRuntime)
+		if err != nil {
+			return admission.Errored(http.StatusBadRequest, err)
+		}
+		srAnnotations = clusterServingRuntime.ObjectMeta.Annotations
+	}
+
+	if err := validateServingRuntimeAutoscaler(srAnnotations); err != nil {
+		return admission.Denied(err.Error())
+	}
+
+	if err := validateAutoscalerTargetUtilizationPercentage(srAnnotations); err != nil {
+		return admission.Denied(err.Error())
+	}
+
+	if err := validateAutoScalingReplicas(srAnnotations); err != nil {
+		return admission.Denied(err.Error())
+	}
+
+	return admission.Allowed("Passed all validation checks for ServingRuntime")
+}
+
+// InjectDecoder injects the decoder.
+func (s *ServingRuntimeWebhook) InjectDecoder(d *admission.Decoder) error {
+	s.decoder = d
+	return nil
+}
+
+// Validation of servingruntime autoscaler class
+func validateServingRuntimeAutoscaler(annotations map[string]string) error {
+	value, ok := annotations[constants.AutoscalerClass]
+	class := constants.AutoscalerClassType(value)
+	if ok {
+		for _, item := range constants.AutoscalerAllowedClassList {
+			if class == item {
+				switch class {
+				case constants.AutoscalerClassHPA:
+					if metric, ok := annotations[constants.AutoscalerMetrics]; ok {
+						return validateHPAMetrics(constants.AutoscalerMetricsType(metric))
+					} else {
+						return nil
+					}
+				default:
+					return fmt.Errorf("unknown autoscaler class [%s]", class)
+				}
+			}
+		}
+		return fmt.Errorf("[%s] is not a supported autoscaler class type.\n", value)
+	}
+
+	return nil
+}
+
+// Validate of autoscaler targetUtilizationPercentage
+func validateAutoscalerTargetUtilizationPercentage(annotations map[string]string) error {
+	if value, ok := annotations[constants.TargetUtilizationPercentage]; ok {
+		t, err := strconv.Atoi(value)
+		if err != nil {
+			return fmt.Errorf("The target utilization percentage should be a [1-100] integer.")
+		} else {
+			if t < 1 || t > 100 {
+				return fmt.Errorf("The target utilization percentage should be a [1-100] integer.")
+			}
+		}
+	}
+
+	return nil
+}
+
+// Validate scaling options
+func validateAutoScalingReplicas(annotations map[string]string) error {
+	autoscalerClassType := autoscaler.AutoscalerClassNone
+	if value, ok := annotations[constants.AutoscalerClass]; ok {
+		autoscalerClassType = value
+	}
+
+	switch autoscalerClassType {
+	case string(constants.AutoscalerClassHPA):
+		return validateScalingHPA(annotations)
+	default:
+		return nil
+	}
+}
+
+func validateScalingHPA(annotations map[string]string) error {
+	metric := constants.AutoScalerMetricsCPU
+	if value, ok := annotations[constants.AutoscalerMetrics]; ok {
+		metric = constants.AutoscalerMetricsType(value)
+	}
+
+	minReplicas := 1
+	if value, ok := annotations[constants.MinScaleAnnotationKey]; ok {
+		if valueInt, err := strconv.Atoi(value); err != nil {
+			return fmt.Errorf("The min replicas should be a integer.")
+		} else if valueInt < 1 {
+			return fmt.Errorf("The min replicas should be more than 0")
+		} else {
+			minReplicas = valueInt
+		}
+	}
+
+	maxReplicas := 1
+	if value, ok := annotations[constants.MaxScaleAnnotationKey]; ok {
+		if valueInt, err := strconv.Atoi(value); err != nil {
+			return fmt.Errorf("The max replicas should be a integer.")
+		} else {
+			maxReplicas = valueInt
+		}
+	}
+
+	if minReplicas > maxReplicas {
+		return fmt.Errorf("The max replicas should be same or bigger than min replicas.")
+	}
+
+	err := validateHPAMetrics(metric)
+	if err != nil {
+		return err
+	}
+
+	if value, ok := annotations[constants.TargetUtilizationPercentage]; ok {
+		t, err := strconv.Atoi(value)
+		if err != nil {
+			return fmt.Errorf("The target utilization percentage should be a [1-100] integer.")
+		} else if metric == constants.AutoScalerMetricsMemory && t < 1 {
+			return fmt.Errorf("The target memory should be greater than 1 MiB")
+		}
+	}
+
+	return nil
+}
+
+// Validate of autoscaler HPA metrics
+func validateHPAMetrics(metric constants.AutoscalerMetricsType) error {
+	for _, item := range constants.AutoscalerAllowedMetricsList {
+		if item == metric {
+			return nil
+		}
+	}
+	return fmt.Errorf("[%s] is not a supported metric.\n", metric)
+
+}
diff --git a/apis/serving/v1alpha1/servingruntime_webhook_test.go b/apis/serving/v1alpha1/servingruntime_webhook_test.go
new file mode 100644
index 000000000..fc3330217
--- /dev/null
+++ b/apis/serving/v1alpha1/servingruntime_webhook_test.go
@@ -0,0 +1,100 @@
+/*
+Copyright 2021 IBM Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+package v1alpha1
+
+import (
+	// "fmt"
+	"testing"
+
+	"github.com/onsi/gomega"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+
+	kservev1alpha "github.com/kserve/kserve/pkg/apis/serving/v1alpha1"
+	"github.com/kserve/kserve/pkg/constants"
+)
+
+func makeTestRawServingRuntime() kservev1alpha.ServingRuntime {
+	servingRuntime := kservev1alpha.ServingRuntime{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "foo",
+			Namespace: "default",
+			Annotations: map[string]string{
+				"serving.kserve.io/autoscalerClass":             "hpa",
+				"serving.kserve.io/metrics":                     "cpu",
+				"serving.kserve.io/targetUtilizationPercentage": "75",
+				"autoscaling.knative.dev/min-scale":             "2",
+				"autoscaling.knative.dev/max-scale":             "3",
+			},
+		},
+	}
+
+	return servingRuntime
+}
+
+func TestValidAutoscalerTypeAndHPAMetrics(t *testing.T) {
+	g := gomega.NewGomegaWithT(t)
+	sr := makeTestRawServingRuntime()
+	g.Expect(validateServingRuntimeAutoscaler(sr.Annotations)).Should(gomega.Succeed())
+}
+func TestInvalidAutoscalerClassType(t *testing.T) {
+	g := gomega.NewGomegaWithT(t)
+	sr := makeTestRawServingRuntime()
+	sr.ObjectMeta.Annotations[constants.AutoscalerClass] = "test"
+	g.Expect(validateServingRuntimeAutoscaler(sr.Annotations)).ShouldNot(gomega.Succeed())
+}
+
+func TestInvalidAutoscalerTargetUtilizationPercentageLowValue(t *testing.T) {
+	g := gomega.NewGomegaWithT(t)
+	sr := makeTestRawServingRuntime()
+	sr.ObjectMeta.Annotations[constants.TargetUtilizationPercentage] = "-1"
+	g.Expect(validateAutoscalerTargetUtilizationPercentage(sr.Annotations)).ShouldNot(gomega.Succeed())
+}
+
+func TestInvalidAutoscalerTargetUtilizationPercentageHighValue(t *testing.T) {
+	g := gomega.NewGomegaWithT(t)
+	sr := makeTestRawServingRuntime()
+	sr.ObjectMeta.Annotations[constants.TargetUtilizationPercentage] = "101"
+	g.Expect(validateAutoscalerTargetUtilizationPercentage(sr.Annotations)).ShouldNot(gomega.Succeed())
+}
+
+func TestInvalidAutoscalerLowMinReplicas(t *testing.T) {
+	g := gomega.NewGomegaWithT(t)
+	sr := makeTestRawServingRuntime()
+	sr.ObjectMeta.Annotations[constants.MinScaleAnnotationKey] = "0"
+	g.Expect(validateScalingHPA(sr.Annotations)).ShouldNot(gomega.Succeed())
+}
+
+func TestInvalidAutoscalerMaxReplicasMustBiggerThanMixReplicas(t *testing.T) {
+	g := gomega.NewGomegaWithT(t)
+	sr := makeTestRawServingRuntime()
+	sr.ObjectMeta.Annotations[constants.MinScaleAnnotationKey] = "4"
+	sr.ObjectMeta.Annotations[constants.MaxScaleAnnotationKey] = "3"
+	g.Expect(validateAutoScalingReplicas(sr.Annotations)).ShouldNot(gomega.Succeed())
+}
+
+func TestValidAutoscalerMetricsType(t *testing.T) {
+	g := gomega.NewGomegaWithT(t)
+	sr := makeTestRawServingRuntime()
+	sr.ObjectMeta.Annotations[constants.AutoscalerMetrics] = "memory"
+	g.Expect(validateHPAMetrics(constants.AutoscalerMetricsType("memory"))).Should(gomega.Succeed())
+}
+
+func TestInvalidAutoscalerMetricsType(t *testing.T) {
+	g := gomega.NewGomegaWithT(t)
+	sr := makeTestRawServingRuntime()
+	sr.ObjectMeta.Annotations[constants.AutoscalerMetrics] = "conccurrency"
+	g.Expect(validateHPAMetrics(constants.AutoscalerMetricsType("conccurrency"))).ShouldNot(gomega.Succeed())
+}
diff --git a/config/certmanager/certificate.yaml b/config/certmanager/certificate.yaml
index 6bd4ae7ba..100bd5f5a 100644
--- a/config/certmanager/certificate.yaml
+++ b/config/certmanager/certificate.yaml
@@ -29,11 +29,11 @@ metadata:
   name: serving-cert # this name should match the one appeared in kustomizeconfig.yaml
   namespace: system
 spec:
-  # $(SERVICE_NAME) and $(SERVICE_NAMESPACE) will be substituted by kustomize
+  # SERVICE_NAME_PLACEHOLDER and SERVICE_NAMESPACE_PLACEHOLDER will be substituted by kustomize
   dnsNames:
-    - $(SERVICE_NAME).$(SERVICE_NAMESPACE).svc
-    - $(SERVICE_NAME).$(SERVICE_NAMESPACE).svc.cluster.local
+    - SERVICE_NAME_PLACEHOLDER.SERVICE_NAMESPACE_PLACEHOLDER.svc
+    - SERVICE_NAME_PLACEHOLDER.SERVICE_NAMESPACE_PLACEHOLDER.svc.cluster.local
   issuerRef:
     kind: Issuer
     name: selfsigned-issuer
-  secretName: webhook-server-cert # this secret will not be prefixed, since it's not managed by kustomize
+  secretName: modelmesh-webhook-server-cert # this secret will not be prefixed, since it's not managed by kustomize
diff --git a/config/default/kustomization.yaml b/config/default/kustomization.yaml
index 8001c1860..0050347a0 100644
--- a/config/default/kustomization.yaml
+++ b/config/default/kustomization.yaml
@@ -49,34 +49,73 @@
 #- webhookcainjection_patch.yaml
 
 # the following config is for teaching kustomize how to do var substitution
-#vars:
-# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER' prefix.
-#- name: CERTIFICATE_NAMESPACE # namespace of the certificate CR
-#  objref:
-#    kind: Certificate
-#    group: cert-manager.io
-#    version: v1alpha2
-#    name: serving-cert # this name should match the one in certificate.yaml
-#  fieldref:
-#    fieldpath: metadata.namespace
-#- name: CERTIFICATE_NAME
-#  objref:
-#    kind: Certificate
-#    group: cert-manager.io
-#    version: v1alpha2
-#    name: serving-cert # this name should match the one in certificate.yaml
-#- name: SERVICE_NAMESPACE # namespace of the service
-#  objref:
-#    kind: Service
-#    version: v1
-#    name: webhook-service
-#  fieldref:
-#    fieldpath: metadata.namespace
-#- name: SERVICE_NAME
-#  objref:
-#    kind: Service
-#    version: v1
-#    name: webhook-service
+replacements:
+  - source:
+      fieldPath: metadata.namespace
+      kind: Certificate
+      name: serving-cert
+  - source:
+      kind: Certificate
+      name: serving-cert
+  - source:
+      fieldPath: metadata.namespace
+      kind: Service
+      name: modelmesh-webhook-server-service
+    targets:
+      - fieldPaths:
+          - |-
+            spec.# $(SERVICE_NAME) and $(SERVICE_NAMESPACE) will be substituted by kustomize
+            dnsNames.0
+        options:
+          delimiter: .
+          index: 1
+        select:
+          group: cert-manager.io
+          kind: Certificate
+          name: serving-cert
+          namespace: system
+          version: v1
+      - fieldPaths:
+          - |-
+            spec.# $(SERVICE_NAME) and $(SERVICE_NAMESPACE) will be substituted by kustomize
+            dnsNames.1
+        options:
+          delimiter: .
+          index: 1
+        select:
+          group: cert-manager.io
+          kind: Certificate
+          name: serving-cert
+          namespace: system
+          version: v1
+  - source:
+      kind: Service
+      name: modelmesh-webhook-server-service
+    targets:
+      - fieldPaths:
+          - |-
+            spec.# $(SERVICE_NAME) and SERVICE_NAMESPACE_PLACEHOLDER will be substituted by kustomize
+            dnsNames.0
+        options:
+          delimiter: .
+        select:
+          group: cert-manager.io
+          kind: Certificate
+          name: serving-cert
+          namespace: system
+          version: v1
+      - fieldPaths:
+          - |-
+            spec.# $(SERVICE_NAME) and SERVICE_NAMESPACE_PLACEHOLDER will be substituted by kustomize
+            dnsNames.1
+        options:
+          delimiter: .
+        select:
+          group: cert-manager.io
+          kind: Certificate
+          name: serving-cert
+          namespace: system
+          version: v1
 
 configMapGenerator:
   - files:
@@ -93,3 +132,9 @@ kind: Kustomization
 resources:
   - ../crd
   - ../manager
+  - ../webhook
+  - ../certmanager
+
+patchesStrategicMerge:
+  - manager_webhook_patch.yaml
+  - webhookcainjection_patch.yaml
diff --git a/config/default/manager_webhook_patch.yaml b/config/default/manager_webhook_patch.yaml
index b094527fa..a2e09d0ee 100644
--- a/config/default/manager_webhook_patch.yaml
+++ b/config/default/manager_webhook_patch.yaml
@@ -14,7 +14,7 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: controller-manager
+  name: modelmesh-controller
   namespace: system
 spec:
   template:
@@ -23,14 +23,14 @@ spec:
         - name: manager
           ports:
             - containerPort: 9443
-              name: webhook-server
+              name: webhook
               protocol: TCP
           volumeMounts:
             - mountPath: /tmp/k8s-webhook-server/serving-certs
-              name: cert
+              name: modelmesh-webhook-server-cert
               readOnly: true
       volumes:
-        - name: cert
+        - name: modelmesh-webhook-server-cert
           secret:
             defaultMode: 420
-            secretName: webhook-server-cert
+            secretName: modelmesh-webhook-server-cert
diff --git a/config/default/webhookcainjection_patch.yaml b/config/default/webhookcainjection_patch.yaml
index 60e4bc91e..511a8d9c1 100644
--- a/config/default/webhookcainjection_patch.yaml
+++ b/config/default/webhookcainjection_patch.yaml
@@ -13,16 +13,16 @@
 # limitations under the License.
 # This patch add annotation to admission webhook config and
 # the variables $(CERTIFICATE_NAMESPACE) and $(CERTIFICATE_NAME) will be substituted by kustomize.
-apiVersion: admissionregistration.k8s.io/v1
-kind: MutatingWebhookConfiguration
-metadata:
-  name: mutating-webhook-configuration
-  annotations:
-    cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME)
----
+# apiVersion: admissionregistration.k8s.io/v1
+# kind: MutatingWebhookConfiguration
+# metadata:
+#   name: mutating-webhook-configuration
+#   annotations:
+#     cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME)
+# ---
 apiVersion: admissionregistration.k8s.io/v1
 kind: ValidatingWebhookConfiguration
 metadata:
-  name: validating-webhook-configuration
+  name: servingruntime.serving.kserve.io
   annotations:
     cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME)
diff --git a/config/dependencies/fvt.yaml b/config/dependencies/fvt.yaml
index 4ae036424..ccad5ed6d 100644
--- a/config/dependencies/fvt.yaml
+++ b/config/dependencies/fvt.yaml
@@ -43,6 +43,8 @@ spec:
       containers:
         - command:
             - etcd
+            - --data-dir # use data directory under /tmp for read/write access by non-root user on OpenShift
+            - /tmp/etcd.data
             - --listen-client-urls
             - http://0.0.0.0:2379
             - --advertise-client-urls
diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml
index 4a05ced9b..73b082476 100644
--- a/config/manager/manager.yaml
+++ b/config/manager/manager.yaml
@@ -15,6 +15,7 @@ apiVersion: apps/v1
 kind: Deployment
 metadata:
   name: modelmesh-controller
+  namespace: system
   labels:
     control-plane: modelmesh-controller
 spec:
diff --git a/config/namespace-runtimes/kustomization.yaml b/config/namespace-runtimes/kustomization.yaml
index 657e278f9..e361106f7 100644
--- a/config/namespace-runtimes/kustomization.yaml
+++ b/config/namespace-runtimes/kustomization.yaml
@@ -2,7 +2,7 @@ apiVersion: kustomize.config.k8s.io/v1beta1
 kind: Kustomization
 bases:
   - ../runtimes
-patchesJson6902:
+patches:
   - target:
       group: serving.kserve.io
       version: v1alpha1
diff --git a/config/rbac/cluster-scope/role.yaml b/config/rbac/cluster-scope/role.yaml
index 25a7bd758..86a8ee81c 100644
--- a/config/rbac/cluster-scope/role.yaml
+++ b/config/rbac/cluster-scope/role.yaml
@@ -196,3 +196,15 @@ rules:
       - get
       - patch
       - update
+  - apiGroups:
+      - autoscaling
+    resources:
+      - horizontalpodautoscalers
+      - horizontalpodautoscalers/status
+    verbs:
+      - get
+      - list
+      - watch
+      - create
+      - delete
+      - update
diff --git a/config/rbac/common/kustomization.yaml b/config/rbac/common/kustomization.yaml
index 577e5b558..3168c7c7d 100644
--- a/config/rbac/common/kustomization.yaml
+++ b/config/rbac/common/kustomization.yaml
@@ -24,6 +24,7 @@ resources:
   - modelmesh-service-account.yaml
   - networkpolicy-controller.yaml
   - networkpolicy-runtimes.yaml
+  - networkpolicy-webhook.yaml
 # Comment the following 4 lines if you want to disable
 # the auth proxy (https://github.com/brancz/kube-rbac-proxy)
 # which protects your /metrics endpoint.
diff --git a/config/rbac/common/networkpolicy-webhook.yaml b/config/rbac/common/networkpolicy-webhook.yaml
new file mode 100644
index 000000000..8c337ac27
--- /dev/null
+++ b/config/rbac/common/networkpolicy-webhook.yaml
@@ -0,0 +1,29 @@
+# Copyright 2021 IBM Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+apiVersion: networking.k8s.io/v1
+kind: NetworkPolicy
+metadata:
+  name: modelmesh-webhook
+spec:
+  podSelector:
+    matchLabels:
+      app.kubernetes.io/managed-by: modelmesh-controller
+      control-plane: modelmesh-controller
+  ingress:
+    # exposed for webhook
+    - ports:
+        - port: 9443
+          protocol: TCP
+  policyTypes:
+    - Ingress
diff --git a/config/rbac/namespace-scope/role.yaml b/config/rbac/namespace-scope/role.yaml
index 946692d3a..4abc8c973 100644
--- a/config/rbac/namespace-scope/role.yaml
+++ b/config/rbac/namespace-scope/role.yaml
@@ -164,3 +164,15 @@ rules:
       - get
       - patch
       - update
+  - apiGroups:
+      - autoscaling
+    resources:
+      - horizontalpodautoscalers
+      - horizontalpodautoscalers/status
+    verbs:
+      - get
+      - list
+      - watch
+      - create
+      - delete
+      - update
diff --git a/config/webhook/kustomization.yaml b/config/webhook/kustomization.yaml
new file mode 100644
index 000000000..9b1813655
--- /dev/null
+++ b/config/webhook/kustomization.yaml
@@ -0,0 +1,10 @@
+---
+resources:
+  - manifests.yaml
+  - service.yaml
+
+# Adds namespace to all resources.
+namespace: modelmesh-serving
+
+configurations:
+  - kustomizeconfig.yaml
diff --git a/config/webhook/kustomizeconfig.yaml b/config/webhook/kustomizeconfig.yaml
new file mode 100644
index 000000000..919027219
--- /dev/null
+++ b/config/webhook/kustomizeconfig.yaml
@@ -0,0 +1,18 @@
+# the following config is for teaching kustomize where to look at when substituting vars.
+# It requires kustomize v2.1.0 or newer to work properly.
+nameReference:
+  - kind: Service
+    version: v1
+    fieldSpecs:
+      - kind: ValidatingWebhookConfiguration
+        group: admissionregistration.k8s.io
+        path: webhooks/clientConfig/service/name
+
+namespace:
+  - kind: ValidatingWebhookConfiguration
+    group: admissionregistration.k8s.io
+    path: webhooks/clientConfig/service/namespace
+    create: true
+
+varReference:
+  - path: metadata/annotations
diff --git a/config/webhook/manifests.yaml b/config/webhook/manifests.yaml
new file mode 100644
index 000000000..1bee1ea09
--- /dev/null
+++ b/config/webhook/manifests.yaml
@@ -0,0 +1,29 @@
+apiVersion: admissionregistration.k8s.io/v1
+kind: ValidatingWebhookConfiguration
+metadata:
+  creationTimestamp: null
+  name: servingruntime.serving.kserve.io
+webhooks:
+  - admissionReviewVersions:
+      - v1
+    clientConfig:
+      caBundle: Cg==
+      service:
+        name: modelmesh-webhook-server-service
+        namespace: system
+        path: /validate-serving-modelmesh-io-v1alpha1-servingruntime
+        port: 9443
+    failurePolicy: Fail
+    name: servingruntime.modelmesh-webhook-server.default
+    rules:
+      - apiGroups:
+          - serving.kserve.io
+        apiVersions:
+          - v1alpha1
+        operations:
+          - CREATE
+          - UPDATE
+        resources:
+          - clusterservingruntimes
+          - servingruntimes
+    sideEffects: None
diff --git a/config/webhook/service.yaml b/config/webhook/service.yaml
new file mode 100644
index 000000000..9197435fb
--- /dev/null
+++ b/config/webhook/service.yaml
@@ -0,0 +1,12 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: modelmesh-webhook-server-service
+  namespace: system
+spec:
+  ports:
+    - port: 9443
+      protocol: TCP
+      targetPort: webhook
+  selector:
+    control-plane: modelmesh-controller
diff --git a/controllers/autoscaler/autoscaler_reconciler.go b/controllers/autoscaler/autoscaler_reconciler.go
new file mode 100644
index 000000000..82f240080
--- /dev/null
+++ b/controllers/autoscaler/autoscaler_reconciler.go
@@ -0,0 +1,131 @@
+/*
+Copyright 2021 The KServe Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package autoscaler
+
+import (
+	"fmt"
+
+	"github.com/pkg/errors"
+
+	kserveapi "github.com/kserve/kserve/pkg/apis/serving/v1alpha1"
+	"github.com/kserve/kserve/pkg/constants"
+	"github.com/kserve/modelmesh-serving/controllers/hpa"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+
+	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
+)
+
+const (
+	AutoscalerClassNone = "none"
+)
+
+type Autoscaler struct {
+	AutoscalerClass constants.AutoscalerClassType
+	HPA             *hpa.HPAReconciler
+}
+
+// AutoscalerReconciler is the struct of Raw K8S Object
+type AutoscalerReconciler struct {
+	client     client.Client
+	scheme     *runtime.Scheme
+	Autoscaler *Autoscaler
+}
+
+func NewAutoscalerReconciler(client client.Client,
+	scheme *runtime.Scheme,
+	servingRuntime interface{}, mmDeploymentName string, mmNamespace string) (*AutoscalerReconciler, error) {
+
+	as, err := createAutoscaler(client, scheme, servingRuntime, mmDeploymentName, mmNamespace)
+	if err != nil {
+		return nil, err
+	}
+	return &AutoscalerReconciler{
+		client:     client,
+		scheme:     scheme,
+		Autoscaler: as,
+	}, err
+}
+
+func getAutoscalerClass(metadata metav1.ObjectMeta) constants.AutoscalerClassType {
+	annotations := metadata.Annotations
+	if value, ok := annotations[constants.AutoscalerClass]; ok {
+		return constants.AutoscalerClassType(value)
+	} else {
+		return AutoscalerClassNone
+	}
+}
+
+func createAutoscaler(client client.Client,
+	scheme *runtime.Scheme, servingRuntime interface{}, mmDeploymentName string, mmNamespace string) (*Autoscaler, error) {
+	var runtimeMeta metav1.ObjectMeta
+	isSR := false
+
+	sr, ok := servingRuntime.(*kserveapi.ServingRuntime)
+	if ok {
+		runtimeMeta = sr.ObjectMeta
+		isSR = true
+	}
+	csr, ok := servingRuntime.(*kserveapi.ClusterServingRuntime)
+	if ok {
+		runtimeMeta = csr.ObjectMeta
+	}
+
+	as := &Autoscaler{}
+	ac := getAutoscalerClass(runtimeMeta)
+	as.AutoscalerClass = ac
+
+	switch ac {
+	case constants.AutoscalerClassHPA:
+		as.HPA = hpa.NewHPAReconciler(client, scheme, runtimeMeta, mmDeploymentName, mmNamespace)
+		if isSR {
+			if err := controllerutil.SetControllerReference(sr, as.HPA.HPA, scheme); err != nil {
+				return nil, fmt.Errorf("fails to set HPA owner reference for ServingRuntime: %w", err)
+			}
+		} else {
+			if err := controllerutil.SetControllerReference(csr, as.HPA.HPA, scheme); err != nil {
+				return nil, fmt.Errorf("fails to set HPA owner reference for ClusterServingRuntime: %w", err)
+			}
+		}
+	case AutoscalerClassNone:
+		// Set HPA reconciler even though AutoscalerClass is None to delete existing hpa
+		as.HPA = hpa.NewHPAReconciler(client, scheme, runtimeMeta, mmDeploymentName, mmNamespace)
+		return as, nil
+	default:
+		return nil, errors.New("unknown autoscaler class type.")
+	}
+	return as, nil
+}
+
+// Reconcile ...
+func (r *AutoscalerReconciler) Reconcile(scaleToZero bool) (*Autoscaler, error) {
+	//reconcile Autoscaler
+	//In the case of a new autoscaler plugin, it checks AutoscalerClassType
+	if r.Autoscaler.AutoscalerClass == constants.AutoscalerClassHPA || r.Autoscaler.AutoscalerClass == AutoscalerClassNone {
+		_, err := r.Autoscaler.HPA.Reconcile(scaleToZero)
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	if scaleToZero {
+		r.Autoscaler.HPA.HPA = nil
+	}
+
+	return r.Autoscaler, nil
+}
diff --git a/controllers/autoscaler/autoscaler_reconciler_test.go b/controllers/autoscaler/autoscaler_reconciler_test.go
new file mode 100644
index 000000000..bc83693f8
--- /dev/null
+++ b/controllers/autoscaler/autoscaler_reconciler_test.go
@@ -0,0 +1,74 @@
+/*
+Copyright 2021 IBM Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package autoscaler
+
+import (
+	"testing"
+
+	"github.com/google/go-cmp/cmp"
+
+	"github.com/kserve/kserve/pkg/constants"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+)
+
+func TestGetAutoscalerClass(t *testing.T) {
+	servingRuntimeName := "my-model"
+	namespace := "test"
+
+	testCases := []struct {
+		name                   string
+		servingRuntimeMetaData *metav1.ObjectMeta
+		expectedAutoScalerType constants.AutoscalerClassType
+	}{
+		{
+			name: "Return default AutoScaler, if the autoscalerClass annotation is not set",
+			servingRuntimeMetaData: &metav1.ObjectMeta{
+				Name:        servingRuntimeName,
+				Namespace:   namespace,
+				Annotations: map[string]string{},
+			},
+			expectedAutoScalerType: AutoscalerClassNone,
+		},
+		{
+			name: "Return none AutoScaler, if the autoscalerClass annotation set none",
+			servingRuntimeMetaData: &metav1.ObjectMeta{
+				Name:        servingRuntimeName,
+				Namespace:   namespace,
+				Annotations: map[string]string{"serving.kserve.io/autoscalerClass": "none"},
+			},
+			expectedAutoScalerType: AutoscalerClassNone,
+		},
+		{
+			name: "Return hpa AutoScaler, if the autoscalerClass annotation set hpa",
+			servingRuntimeMetaData: &metav1.ObjectMeta{
+				Name:        servingRuntimeName,
+				Namespace:   namespace,
+				Annotations: map[string]string{"serving.kserve.io/autoscalerClass": "hpa"},
+			},
+			expectedAutoScalerType: constants.AutoscalerClassHPA,
+		},
+	}
+
+	for _, tt := range testCases {
+		t.Run(tt.name, func(t *testing.T) {
+			result := getAutoscalerClass(*tt.servingRuntimeMetaData)
+			if diff := cmp.Diff(tt.expectedAutoScalerType, result); diff != "" {
+				t.Errorf("Test %q unexpected result (-want +got): %v", t.Name(), diff)
+			}
+		})
+	}
+}
diff --git a/controllers/hpa/hpa_reconciler.go b/controllers/hpa/hpa_reconciler.go
new file mode 100644
index 000000000..46899d7ed
--- /dev/null
+++ b/controllers/hpa/hpa_reconciler.go
@@ -0,0 +1,195 @@
+/*
+Copyright 2021 The KServe Authors.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package hpa
+
+import (
+	"context"
+	"strconv"
+
+	"github.com/kserve/kserve/pkg/apis/serving/v1beta1"
+	"github.com/kserve/kserve/pkg/constants"
+	"github.com/kserve/kserve/pkg/utils"
+	v2beta2 "k8s.io/api/autoscaling/v2beta2"
+	corev1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/equality"
+	apierr "k8s.io/apimachinery/pkg/api/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/types"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	logf "sigs.k8s.io/controller-runtime/pkg/log"
+)
+
+var log = logf.Log.WithName("HPAReconciler")
+
+// HPAReconciler is the struct of Raw K8S Object
+type HPAReconciler struct {
+	client client.Client
+	scheme *runtime.Scheme
+	HPA    *v2beta2.HorizontalPodAutoscaler
+}
+
+func NewHPAReconciler(client client.Client,
+	scheme *runtime.Scheme, runtimeMeta metav1.ObjectMeta, mmDeploymentName string, mmNamespace string) *HPAReconciler {
+	return &HPAReconciler{
+		client: client,
+		scheme: scheme,
+		HPA:    createHPA(runtimeMeta, mmDeploymentName, mmNamespace),
+	}
+}
+
+func getHPAMetrics(metadata metav1.ObjectMeta) []v2beta2.MetricSpec {
+	var metrics []v2beta2.MetricSpec
+	var utilization int32 = constants.DefaultCPUUtilization
+
+	annotations := metadata.Annotations
+	resourceName := corev1.ResourceCPU
+
+	if value, ok := annotations[constants.TargetUtilizationPercentage]; ok {
+		utilizationInt, _ := strconv.Atoi(value)
+		utilization = int32(utilizationInt)
+	}
+
+	if value, ok := annotations[constants.AutoscalerMetrics]; ok {
+		resourceName = corev1.ResourceName(value)
+	}
+
+	metricTarget := v2beta2.MetricTarget{
+		Type:               "Utilization",
+		AverageUtilization: &utilization,
+	}
+
+	ms := v2beta2.MetricSpec{
+		Type: v2beta2.ResourceMetricSourceType,
+		Resource: &v2beta2.ResourceMetricSource{
+			Name:   resourceName,
+			Target: metricTarget,
+		},
+	}
+
+	metrics = append(metrics, ms)
+	return metrics
+}
+
+func createHPA(runtimeMeta metav1.ObjectMeta, mmDeploymentName string, mmNamespace string) *v2beta2.HorizontalPodAutoscaler {
+	var minReplicas int32
+	minReplicas = int32(constants.DefaultMinReplicas)
+	maxReplicas := int32(constants.DefaultMinReplicas)
+	annotations := runtimeMeta.Annotations
+
+	if value, ok := annotations[constants.MinScaleAnnotationKey]; ok {
+		minReplicasInt, _ := strconv.Atoi(value)
+		minReplicas = int32(minReplicasInt)
+
+	}
+	if value, ok := annotations[constants.MaxScaleAnnotationKey]; ok {
+		maxReplicasInt, _ := strconv.Atoi(value)
+		maxReplicas = int32(maxReplicasInt)
+	}
+
+	if maxReplicas < minReplicas {
+		maxReplicas = minReplicas
+	}
+
+	metrics := getHPAMetrics(runtimeMeta)
+
+	hpaObjectMeta := metav1.ObjectMeta{
+		Name:      mmDeploymentName,
+		Namespace: mmNamespace,
+		Labels: utils.Union(runtimeMeta.Labels, map[string]string{
+			constants.InferenceServicePodLabelKey: runtimeMeta.Name,
+			constants.KServiceComponentLabel:      string(v1beta1.PredictorComponent),
+		}),
+		Annotations: runtimeMeta.Annotations,
+	}
+
+	hpa := &v2beta2.HorizontalPodAutoscaler{
+		ObjectMeta: hpaObjectMeta,
+		Spec: v2beta2.HorizontalPodAutoscalerSpec{
+			ScaleTargetRef: v2beta2.CrossVersionObjectReference{
+				APIVersion: "apps/v1",
+				Kind:       "Deployment",
+				Name:       hpaObjectMeta.Name,
+			},
+			MinReplicas: &minReplicas,
+			MaxReplicas: maxReplicas,
+
+			Metrics:  metrics,
+			Behavior: &v2beta2.HorizontalPodAutoscalerBehavior{},
+		},
+	}
+	return hpa
+}
+
+// checkHPAExist checks if the hpa exists?
+func (r *HPAReconciler) checkHPAExist(client client.Client) (constants.CheckResultType, *v2beta2.HorizontalPodAutoscaler, error) {
+	existingHPA := &v2beta2.HorizontalPodAutoscaler{}
+	err := client.Get(context.TODO(), types.NamespacedName{
+		Namespace: r.HPA.ObjectMeta.Namespace,
+		Name:      r.HPA.ObjectMeta.Name,
+	}, existingHPA)
+	if err != nil {
+		if apierr.IsNotFound(err) {
+			return constants.CheckResultCreate, nil, nil
+		}
+		return constants.CheckResultUnknown, nil, err
+	}
+
+	//existed, check equivalent
+	if semanticHPAEquals(r.HPA, existingHPA) {
+		return constants.CheckResultExisted, existingHPA, nil
+	}
+	return constants.CheckResultUpdate, existingHPA, nil
+}
+
+func semanticHPAEquals(desired, existing *v2beta2.HorizontalPodAutoscaler) bool {
+	return equality.Semantic.DeepEqual(desired.Spec.Metrics, existing.Spec.Metrics) &&
+		equality.Semantic.DeepEqual(desired.Spec.MaxReplicas, existing.Spec.MaxReplicas) &&
+		equality.Semantic.DeepEqual(*desired.Spec.MinReplicas, *existing.Spec.MinReplicas)
+}
+
+// Reconcile ...
+func (r *HPAReconciler) Reconcile(scaleToZero bool) (*v2beta2.HorizontalPodAutoscaler, error) {
+	//reconcile
+	checkResult, existingHPA, err := r.checkHPAExist(r.client)
+	log.Info("service reconcile", "checkResult", checkResult, "err", err)
+	if err != nil {
+		return nil, err
+	}
+
+	// when scaleToZero is true, delete HPA if it exist
+	if existingHPA != nil && scaleToZero {
+		if err = r.client.Delete(context.TODO(), existingHPA, &client.DeleteOptions{}); err != nil {
+			return nil, err
+		}
+	}
+
+	if checkResult == constants.CheckResultCreate && !scaleToZero {
+		err = r.client.Create(context.TODO(), r.HPA)
+		if err != nil {
+			return nil, err
+		} else {
+			return r.HPA, nil
+		}
+	} else if checkResult == constants.CheckResultUpdate { //CheckResultUpdate
+		err = r.client.Update(context.TODO(), r.HPA)
+		if err != nil {
+			return nil, err
+		} else {
+			return r.HPA, nil
+		}
+	} else {
+		return existingHPA, nil
+	}
+}
diff --git a/controllers/hpa/hpa_reconciler_test.go b/controllers/hpa/hpa_reconciler_test.go
new file mode 100644
index 000000000..d127a5be8
--- /dev/null
+++ b/controllers/hpa/hpa_reconciler_test.go
@@ -0,0 +1,158 @@
+/*
+Copyright 2021 IBM Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package hpa
+
+import (
+	"fmt"
+	"testing"
+
+	"github.com/google/go-cmp/cmp"
+	"github.com/kserve/kserve/pkg/constants"
+	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+)
+
+func TestGetHPAMetrics(t *testing.T) {
+	servingRuntimeName := "my-model"
+	namespace := "test"
+
+	testCases := []struct {
+		name                                string
+		servingRuntimeMetaData              *metav1.ObjectMeta
+		expectedTargetUtilizationPercentage int32
+		expectedAutoscalerMetrics           corev1.ResourceName
+	}{
+		{
+			name: "Check default HPAMetrics",
+			servingRuntimeMetaData: &metav1.ObjectMeta{
+				Name:        servingRuntimeName,
+				Namespace:   namespace,
+				Annotations: map[string]string{},
+			},
+			expectedTargetUtilizationPercentage: int32(80),
+			expectedAutoscalerMetrics:           corev1.ResourceName("cpu"),
+		},
+		{
+			name: "Check HPAMetrics if annotations has " + constants.AutoscalerMetrics,
+			servingRuntimeMetaData: &metav1.ObjectMeta{
+				Name:        servingRuntimeName,
+				Namespace:   namespace,
+				Annotations: map[string]string{constants.AutoscalerMetrics: "memory"},
+			},
+			expectedTargetUtilizationPercentage: int32(80),
+			expectedAutoscalerMetrics:           corev1.ResourceName("memory"),
+		},
+		{
+			name: "Check HPAMetrics if annotations has " + constants.TargetUtilizationPercentage,
+			servingRuntimeMetaData: &metav1.ObjectMeta{
+				Name:        servingRuntimeName,
+				Namespace:   namespace,
+				Annotations: map[string]string{constants.TargetUtilizationPercentage: "50"},
+			},
+			expectedTargetUtilizationPercentage: int32(50),
+			expectedAutoscalerMetrics:           corev1.ResourceName("cpu"),
+		},
+	}
+
+	for _, tt := range testCases {
+		t.Run(tt.name, func(t *testing.T) {
+			result := getHPAMetrics(*tt.servingRuntimeMetaData)
+			if diff := cmp.Diff(tt.expectedTargetUtilizationPercentage, *result[0].Resource.Target.AverageUtilization); diff != "" {
+				t.Errorf("Test %q unexpected result (-want +got): %v", t.Name(), diff)
+			}
+			if diff := cmp.Diff(tt.expectedAutoscalerMetrics, result[0].Resource.Name); diff != "" {
+				t.Errorf("Test %q unexpected result (-want +got): %v", t.Name(), diff)
+			}
+		})
+	}
+}
+
+func TestCreateHPA(t *testing.T) {
+	servingRuntimeName := "my-model"
+	namespace := "test"
+	deploymentName := fmt.Sprintf("%s-%s", servingRuntimeName, namespace)
+
+	testCases := []struct {
+		name                   string
+		servingRuntimeMetaData *metav1.ObjectMeta
+		mmDeploymentName       *string
+		mmNamespace            *string
+		expectedMinReplicas    int32
+		expectedMaxReplicas    int32
+	}{
+		{
+			name: "Check default HPA replicas",
+			servingRuntimeMetaData: &metav1.ObjectMeta{
+				Name:        servingRuntimeName,
+				Namespace:   namespace,
+				Annotations: map[string]string{},
+			},
+			mmDeploymentName:    &deploymentName,
+			mmNamespace:         &namespace,
+			expectedMinReplicas: int32(1),
+			expectedMaxReplicas: int32(1),
+		},
+		{
+			name: "Check HPA replicas if annotations has " + constants.MaxScaleAnnotationKey,
+			servingRuntimeMetaData: &metav1.ObjectMeta{
+				Name:        servingRuntimeName,
+				Namespace:   namespace,
+				Annotations: map[string]string{constants.MaxScaleAnnotationKey: "2"},
+			},
+			mmDeploymentName:    &deploymentName,
+			mmNamespace:         &namespace,
+			expectedMinReplicas: int32(1),
+			expectedMaxReplicas: int32(2),
+		},
+		{
+			name: "Check HPA replicas if annotations has " + constants.MinScaleAnnotationKey + ". max replicas should be the same as min replicas",
+			servingRuntimeMetaData: &metav1.ObjectMeta{
+				Name:        servingRuntimeName,
+				Namespace:   namespace,
+				Annotations: map[string]string{constants.MinScaleAnnotationKey: "2"},
+			},
+			mmDeploymentName:    &deploymentName,
+			mmNamespace:         &namespace,
+			expectedMinReplicas: int32(2),
+			expectedMaxReplicas: int32(2),
+		},
+		{
+			name: "Check HPA replicas if annotations set min/max replicas both",
+			servingRuntimeMetaData: &metav1.ObjectMeta{
+				Name:        servingRuntimeName,
+				Namespace:   namespace,
+				Annotations: map[string]string{constants.MinScaleAnnotationKey: "2", constants.MaxScaleAnnotationKey: "3"},
+			},
+			mmDeploymentName:    &deploymentName,
+			mmNamespace:         &namespace,
+			expectedMinReplicas: int32(2),
+			expectedMaxReplicas: int32(3),
+		},
+	}
+
+	for _, tt := range testCases {
+		t.Run(tt.name, func(t *testing.T) {
+			hpa := createHPA(*tt.servingRuntimeMetaData, *tt.mmDeploymentName, *tt.mmNamespace)
+			if diff := cmp.Diff(tt.expectedMinReplicas, *hpa.Spec.MinReplicas); diff != "" {
+				t.Errorf("Test %q unexpected result (-want +got): %v", t.Name(), diff)
+			}
+			if diff := cmp.Diff(tt.expectedMaxReplicas, hpa.Spec.MaxReplicas); diff != "" {
+				t.Errorf("Test %q unexpected result (-want +got): %v", t.Name(), diff)
+			}
+		})
+	}
+}
diff --git a/controllers/servingruntime_controller.go b/controllers/servingruntime_controller.go
index 1c262a87e..66e7a0247 100644
--- a/controllers/servingruntime_controller.go
+++ b/controllers/servingruntime_controller.go
@@ -35,31 +35,30 @@ import (
 	"sync"
 	"time"
 
+	"github.com/go-logr/logr"
+	mf "github.com/manifestival/manifestival"
+
+	kserveapi "github.com/kserve/kserve/pkg/apis/serving/v1alpha1"
 	"github.com/kserve/kserve/pkg/apis/serving/v1beta1"
+	api "github.com/kserve/modelmesh-serving/apis/serving/v1alpha1"
+	"github.com/kserve/modelmesh-serving/controllers/autoscaler"
+	"github.com/kserve/modelmesh-serving/controllers/modelmesh"
 	"github.com/kserve/modelmesh-serving/pkg/config"
-
 	"github.com/kserve/modelmesh-serving/pkg/mmesh"
 	"github.com/kserve/modelmesh-serving/pkg/predictor_source"
 
+	ctrl "sigs.k8s.io/controller-runtime"
+	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/event"
-
 	"sigs.k8s.io/controller-runtime/pkg/handler"
 	"sigs.k8s.io/controller-runtime/pkg/reconcile"
+	"sigs.k8s.io/controller-runtime/pkg/source"
 
-	"github.com/go-logr/logr"
-	mf "github.com/manifestival/manifestival"
 	appsv1 "k8s.io/api/apps/v1"
 	corev1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/errors"
 	"k8s.io/apimachinery/pkg/runtime"
 	"k8s.io/apimachinery/pkg/types"
-	ctrl "sigs.k8s.io/controller-runtime"
-	"sigs.k8s.io/controller-runtime/pkg/client"
-	"sigs.k8s.io/controller-runtime/pkg/source"
-
-	kserveapi "github.com/kserve/kserve/pkg/apis/serving/v1alpha1"
-	api "github.com/kserve/modelmesh-serving/apis/serving/v1alpha1"
-	"github.com/kserve/modelmesh-serving/controllers/modelmesh"
 )
 
 const (
@@ -196,6 +195,7 @@ func (r *ServingRuntimeReconciler) Reconcile(ctx context.Context, req ctrl.Reque
 
 	// Reconcile this serving runtime
 	rt := &kserveapi.ServingRuntime{}
+	crt := &kserveapi.ClusterServingRuntime{}
 	var owner mf.Owner
 	var spec *kserveapi.ServingRuntimeSpec
 
@@ -209,7 +209,6 @@ func (r *ServingRuntimeReconciler) Reconcile(ctx context.Context, req ctrl.Reque
 			return r.removeRuntimeFromInfoMap(req)
 		}
 		// try to find the runtime in cluster ServingRuntimes
-		crt := &kserveapi.ClusterServingRuntime{}
 		if err = r.Client.Get(ctx, types.NamespacedName{Name: req.Name}, crt); err == nil {
 			spec = &crt.Spec
 			owner = crt
@@ -281,11 +280,45 @@ func (r *ServingRuntimeReconciler) Reconcile(ctx context.Context, req ctrl.Reque
 		return ctrl.Result{}, nil
 	}
 
-	replicas, requeueDuration, err := r.determineReplicasAndRequeueDuration(ctx, log, cfg, spec, req.NamespacedName)
+	var requeueDuration time.Duration
+	mmDeploymentName := fmt.Sprintf("%s-%s", mmDeployment.ServiceName, mmDeployment.Name)
+
+	var as *autoscaler.AutoscalerReconciler
+	if crt.GetName() != "" {
+		as, err = autoscaler.NewAutoscalerReconciler(r.Client, r.Client.Scheme(), crt, mmDeploymentName, mmDeployment.Namespace)
+	} else {
+		as, err = autoscaler.NewAutoscalerReconciler(r.Client, r.Client.Scheme(), rt, mmDeploymentName, mmDeployment.Namespace)
+	}
+
+	if err != nil {
+		log.Error(err, "fails to create an autoscaler controller: %w", "skip to create HPA")
+	}
+
+	replicas, requeueDurationRuntime, err := r.determineReplicasAndRequeueDuration(ctx, log, cfg, spec, req.NamespacedName)
 	if err != nil {
 		return RequeueResult, fmt.Errorf("could not determine replicas: %w", err)
 	}
+	requeueDuration = requeueDurationRuntime
 	mmDeployment.Replicas = replicas
+
+	//ScaleToZero or None autoscaler case
+	if replicas == uint16(0) || as.Autoscaler.AutoscalerClass == autoscaler.AutoscalerClassNone {
+		if _, err = as.Reconcile(true); err != nil {
+			return ctrl.Result{}, fmt.Errorf("HPA reconcile error: %w", err)
+		}
+	} else {
+		//Autoscaler case
+		if as.Autoscaler != nil {
+			//scaleUp
+			mmDeployment.Replicas = uint16(*(as.Autoscaler.HPA.HPA).Spec.MinReplicas)
+		}
+
+		//Create or Update HPA
+		if _, err = as.Reconcile(false); err != nil {
+			return ctrl.Result{}, fmt.Errorf("HPA reconcile error: %w", err)
+		}
+	}
+
 	if err = mmDeployment.Apply(ctx); err != nil {
 		if errors.IsConflict(err) {
 			// this can occur during normal operations if the deployment was updated
diff --git a/docs/developer.md b/docs/developer.md
index 524c97f12..7a3abb5b1 100644
--- a/docs/developer.md
+++ b/docs/developer.md
@@ -6,6 +6,12 @@ This document outlines some of the development practices with ModelMesh Serving.
 
 Local Kubernetes clusters can easily be set up using tools like [kind](https://kind.sigs.k8s.io/) and [minikube](https://minikube.sigs.k8s.io/docs/).
 
+Modelmesh controller is using webhook that requires certificate. We suggest using [cert manager](https://github.com/cert-manager/cert-manager) for provisioning the certificates for the webhook server. Other solutions should also work as long as they put the certificates in the desired location. You can follow [the cert manager documentation](https://cert-manager.io/docs/installation/) to install it.
+
+If you don't want to install cert manager, you can set the `--enable-self-signed-ca`. It will execute a script to create a self-signed CA and patch it to the webhook config.
+
+_(Note)_ The `--fvt` option automatically sets `--enable-self-signed-ca`, so you do not need to set it explicitly.
+
 For example, using `kind`:
 
 ```shell
@@ -58,6 +64,20 @@ you will need to restart the controller pod. This can be done through the follow
 kubectl rollout restart deploy modelmesh-controller
 ```
 
+## Deploy a custom controller image
+
+If you have a custom controller image in your repository, you simply set `MODELMESH_SERVING_IMAGE` to deploy it. The following Makefile command will deploy the controller image with fvt dependencies.
+
+For example:
+
+```shell
+NAMESPACE=modelmesh-serving \
+MODELMESH_SERVING_IMAGE=quay.io/$org/modelmesh-controller:custom \
+make deploy-release-dev-mode-fvt
+```
+
+This command will deploy your custom controller image `quay.io/$org/modelmesh-controller:custom` under `modelmesh-serving` namespace.
+
 ## Building the developer image
 
 A dockerized development environment is provided to help set up dependencies for testing, linting, and code generating.
diff --git a/docs/install/install-script.md b/docs/install/install-script.md
index 42ed6532c..7f791a552 100644
--- a/docs/install/install-script.md
+++ b/docs/install/install-script.md
@@ -57,7 +57,7 @@ The `--quickstart` option can be specified to install and configure supporting d
 
 ```shell
 kubectl create namespace modelmesh-serving
-./scripts/install.sh --namespace modelmesh-serving --quickstart
+./scripts/install.sh --namespace modelmesh-serving --quickstart  --enable-self-signed-ca
 ```
 
 See the installation help below for detail:
@@ -72,9 +72,11 @@ Flags:
   -d, --delete                   Delete any existing instances of ModelMesh Serving in Kube namespace before running install, including CRDs, RBACs, controller, older CRD with serving.kserve.io api group name, etc.
   -u, --user-namespaces          Kubernetes namespaces to enable for ModelMesh Serving
   --quickstart                   Install and configure required supporting datastores in the same namespace (etcd and MinIO) - for experimentation/development
-  --fvt                          Install and configure required supporting datastores in the same namespace (etcd and MinIO) - for development with fvt enabled
+  --fvt                          Install and configure required supporting datastores in the same namespace (etcd and MinIO) and also sets enable-self-signed-ca - for development with fvt enabled
   -dev, --dev-mode-logging       Enable dev mode logging (stacktraces on warning and no sampling)
   --namespace-scope-mode         Run ModelMesh Serving in namespace scope mode
+  --modelmesh-serving-image      Set a custom modelmesh serving image
+  --enable-self-signed-ca        Enable self-signed-ca, if you don't have cert-manager in the cluster
 
 Installs ModelMesh Serving CRDs, controller, and built-in runtimes into specified
 Kubernetes namespaces.
@@ -91,6 +93,10 @@ The installation will create a secret named `storage-config` if it does not alre
 
 The `--namespace-scope-mode` will deploy `ServingRuntime`s confined to the same namespace, instead of the default cluster-scoped runtimes `ClusterServingRuntime`s. These serving runtimes are accessible to any user/namespace in the cluster.
 
+You can optionally provide a custom modelmesh seving image url with `--modelmesh-serving-image`. If not specified, it will pull the latest modelmesh image.
+
+Modelmesh controller is using webhook that requires certificates. We suggest using [cert manager](https://github.com/cert-manager/cert-manager) for provisioning the certificates for the webhook server. Other solutions should also work as long as they put the certificates in the desired location. You can follow [the cert manager documentation](https://cert-manager.io/docs/installation/) to install it. If you don't want to install cert manager, you can set the `--enable-self-signed-ca`. It will execute a script to create a self-signed CA and patch it to the webhook config.
+
 ## Setup additional namespaces
 
 To enable additional namespaces for ModelMesh after the initial installation, you need to add a label named `modelmesh-enabled`, and optionally setup the storage secret `storage-config` and built-in runtimes, in the user namespaces.
diff --git a/docs/production-use/scaling.md b/docs/production-use/scaling.md
index 1b2c8c9a1..59f17d911 100644
--- a/docs/production-use/scaling.md
+++ b/docs/production-use/scaling.md
@@ -14,3 +14,35 @@ Increasing the number of runtime replicas has two important effects:
 If a given `ServingRuntime` has no `InferenceService`s that it supports, the `Deployment` for that runtime can safely be scaled to 0 replicas to save on resources. By enabling `ScaleToZero` in the configuration, ModelMesh Serving will perform this scaling automatically. If an `InferenceService` is later added that requires the runtime, it will be scaled back up.
 
 To prevent unnecessary churn, the `ScaleToZero` behavior has a grace period that delays scaling down after the last `InferenceService` required by the runtime is deleted. If a new `InferenceService` is created in that window there will be no change to the scale.
+
+### Autoscaler
+
+In addition to the Scale to Zero feature, runtime pods can be autoscaled through HPA. This feature is disabled by default, but it can be enabled at any time by annotating each ServingRuntime/ClusterServingRuntime.
+To enable Autoscaler feature, add the following annotation.
+
+```shell
+apiVersion: serving.kserve.io/v1alpha1
+kind: ServingRuntime
+metadata:
+  annotations:
+    serving.kserve.io/autoscalerClass: hpa
+```
+
+Additional annotations:
+
+```shell
+metadata:
+  annotations:
+    serving.kserve.io/autoscalerClass: hpa
+    serving.kserve.io/targetUtilizationPercentage: "75"
+    serving.kserve.io/metrics: "cpu"
+    autoscaling.knative.dev/min-scale: "2"
+    autoscaling.knative.dev/max-scale: "3"
+```
+
+You can disable the Autoscaler feature even if a runtime pod created based on that ServingRuntime is running.
+
+**NOTE**
+
+- If `serving.kserve.io/autoscalerClass: hpa` is not set, the other annotations would be ignored.
+- If `ScaleToZero` is enabled and there are no `InferenceService`, HPA will be deleted and the ServingRuntime deployment will be scaled down to 0.
diff --git a/docs/quickstart.md b/docs/quickstart.md
index a75f70e94..c3bc83a76 100644
--- a/docs/quickstart.md
+++ b/docs/quickstart.md
@@ -8,6 +8,28 @@ To quickly get started using ModelMesh Serving, here is a brief guide.
 - [kubectl](https://kubernetes.io/docs/tasks/tools/#kubectl) and [kustomize](https://kubectl.docs.kubernetes.io/installation/kustomize/) (v3.2.0+)
 - At least 4 vCPU and 8 GB memory. For more details, please see [here](install/README.md#deployed-components).
 
+## Recommandation
+
+- [cert-manager](https://github.com/cert-manager/cert-manager)
+
+  ```shell
+  CERT_MANAGER_VERSION="v1.11.0"
+
+  echo "Installing cert manager ..."
+  kubectl create namespace cert-manager
+  sleep 2
+  kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.11.0/cert-manager.yaml
+
+  echo "Waiting for cert manager started ..."
+  kubectl wait --for=condition=ready pod -l 'app in (cert-manager,webhook)' --timeout=180s -n cert-manager
+  ```
+
+  - Modelmesh controller is using webhook that requires certificate. We suggest using [cert manager](https://github.com/cert-manager/cert-manager) for provisioning the certificates for the webhook server. Other solutions should also work as long as they put the certificates in the desired location. You can follow [the cert manager documentation](https://cert-manager.io/docs/installation/) to install it.
+
+If you don't want to install cert manager, you can set the `--enable-self-signed-ca`. It will execute a script to create a self-signed CA and patch it to the webhook config.
+
+_(Note)_ The `--fvt` option automatically sets `--enable-self-signed-ca`, so you do not need to set it explicitly.
+
 ## 1. Install ModelMesh Serving
 
 ### Get the latest release
@@ -22,7 +44,7 @@ cd modelmesh-serving
 
 ```shell
 kubectl create namespace modelmesh-serving
-./scripts/install.sh --namespace-scope-mode --namespace modelmesh-serving --quickstart
+./scripts/install.sh --namespace-scope-mode --namespace modelmesh-serving --quickstart --enable-self-signed-ca
 ```
 
 This will install ModelMesh Serving in the `modelmesh-serving` namespace, along with an etcd and MinIO instances.
diff --git a/fvt/README.md b/fvt/README.md
index c3c312f17..486f38cd6 100644
--- a/fvt/README.md
+++ b/fvt/README.md
@@ -4,7 +4,7 @@ Functional Verification Test (FVT) suite for ModelMesh Serving using [Ginkgo](ht
 
 ## How the tests are structured
 
-- The entry points for FVT suite are located in `predictor/predictor_suite_test.go` and `scaleToZero/scaleToZero_suite_test.go`.
+- The entry points for FVT suite are located in `predictor/predictor_suite_test.go`, `scaleToZero/scaleToZero_suite_test.go` and `hpa/hpa_suite_test.go`.
 - Framework, utility, and helper functions for all suites are in the `fvt` package in this directory.
 - Manifests used to create predictors, inference services, and runtimes are in the `testdata` folder.
 
diff --git a/fvt/fvtclient.go b/fvt/fvtclient.go
index e6d1b5b73..f62b7e1ca 100644
--- a/fvt/fvtclient.go
+++ b/fvt/fvtclient.go
@@ -39,6 +39,7 @@ import (
 	"google.golang.org/grpc/credentials"
 
 	appsv1 "k8s.io/api/apps/v1"
+	v2beta2 "k8s.io/api/autoscaling/v2beta2"
 	corev1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
@@ -230,6 +231,11 @@ var (
 		Version:  "v1",
 		Resource: "endpoints", // this must be the plural form
 	}
+	gvrHPA = schema.GroupVersionResource{
+		Group:    "autoscaling",
+		Version:  "v2",
+		Resource: "horizontalpodautoscalers", // this must be the plural form
+	}
 )
 
 func (fvt *FVTClient) CreatePredictorExpectSuccess(resource *unstructured.Unstructured) *unstructured.Unstructured {
@@ -267,6 +273,57 @@ func (fvt *FVTClient) ApplyPredictorExpectSuccess(predictor *unstructured.Unstru
 	return obj
 }
 
+func (fvt *FVTClient) ApplyServingRuntimeExpectSuccess(servingRuntime *unstructured.Unstructured) *unstructured.Unstructured {
+	// use server-side-apply with Patch
+	servingRuntime.SetManagedFields(nil)
+	patch, err := yaml.Marshal(servingRuntime)
+	Expect(err).ToNot(HaveOccurred())
+
+	obj, err := fvt.Resource(gvrRuntime).Namespace(fvt.namespace).Patch(context.TODO(), servingRuntime.GetName(), types.ApplyPatchType, patch, applyPatchOptions)
+	Expect(err).ToNot(HaveOccurred())
+	Expect(obj).ToNot(BeNil())
+	Expect(obj.GetKind()).To(Equal(ServingRuntimeKind))
+	return obj
+}
+
+func (fvt *FVTClient) ApplyClusterServingRuntimeExpectSuccess(clusterServingRuntime *unstructured.Unstructured) *unstructured.Unstructured {
+	// use server-side-apply with Patch
+	clusterServingRuntime.SetManagedFields(nil)
+	patch, err := yaml.Marshal(clusterServingRuntime)
+	Expect(err).ToNot(HaveOccurred())
+
+	obj, err := fvt.Resource(gvrCRuntime).Patch(context.TODO(), clusterServingRuntime.GetName(), types.ApplyPatchType, patch, applyPatchOptions)
+	Expect(err).ToNot(HaveOccurred())
+	Expect(obj).ToNot(BeNil())
+	Expect(obj.GetKind()).To(Equal(ClusterServingRuntimeKind))
+	return obj
+}
+
+func (fvt *FVTClient) SetServingRuntimeAnnotation(expectedRuntimeName string, annotations map[string]interface{}) {
+	fvt.log.Info("Set annotations for a runtime: "+expectedRuntimeName, "annotations", annotations)
+
+	var srObject *unstructured.Unstructured
+	if NameSpaceScopeMode {
+		srObject = FVTClientInstance.GetServingRuntime(expectedRuntimeName)
+	} else {
+		srObject = FVTClientInstance.GetClusterServingRuntime(expectedRuntimeName)
+	}
+
+	SetMap(srObject, annotations, "metadata", "annotations")
+
+	if NameSpaceScopeMode {
+		FVTClientInstance.ApplyServingRuntimeExpectSuccess(srObject)
+	} else {
+		FVTClientInstance.ApplyClusterServingRuntimeExpectSuccess(srObject)
+	}
+}
+
+func (fvt *FVTClient) GetClusterServingRuntime(name string) *unstructured.Unstructured {
+	obj, err := fvt.Resource(gvrCRuntime).Get(context.TODO(), name, metav1.GetOptions{})
+	Expect(err).ToNot(HaveOccurred())
+	return obj
+}
+
 func (fvt *FVTClient) GetServingRuntime(name string) *unstructured.Unstructured {
 	obj, err := fvt.Resource(gvrRuntime).Namespace(fvt.namespace).Get(context.TODO(), name, metav1.GetOptions{})
 	Expect(err).ToNot(HaveOccurred())
@@ -367,6 +424,13 @@ func (fvt *FVTClient) PrintPredictors() {
 	}
 }
 
+func (fvt *FVTClient) PrintHPAs() {
+	err := fvt.RunKubectl("get", "hpas")
+	if err != nil {
+		fvt.log.Error(err, "Error running get hpa command")
+	}
+}
+
 func (fvt *FVTClient) PrintIsvcs() {
 	err := fvt.RunKubectl("get", "inferenceservices")
 	if err != nil {
@@ -678,6 +742,24 @@ func (fvt *FVTClient) StartWatchingDeploys() watch.Interface {
 	return deployWatcher
 }
 
+func (fvt *FVTClient) ListHPAs() v2beta2.HorizontalPodAutoscalerList {
+	var err error
+
+	listOptions := metav1.ListOptions{LabelSelector: "app.kubernetes.io/managed-by=modelmesh-controller", TimeoutSeconds: &DefaultTimeout}
+	u, err := fvt.Resource(gvrHPA).Namespace(fvt.namespace).List(context.TODO(), listOptions)
+	Expect(err).ToNot(HaveOccurred())
+
+	var hpaList v2beta2.HorizontalPodAutoscalerList
+	for _, uh := range u.Items {
+		var h v2beta2.HorizontalPodAutoscaler
+		err = runtime.DefaultUnstructuredConverter.FromUnstructured(uh.Object, &h)
+		Expect(err).ToNot(HaveOccurred())
+		hpaList.Items = append(hpaList.Items, h)
+	}
+
+	return hpaList
+}
+
 func (fvt *FVTClient) ListDeploys() appsv1.DeploymentList {
 	var err error
 
diff --git a/fvt/globals.go b/fvt/globals.go
index 2f89b6061..8b61858a7 100644
--- a/fvt/globals.go
+++ b/fvt/globals.go
@@ -59,6 +59,7 @@ var MutualTLSConfig = map[string]interface{}{
 
 const (
 	ServingRuntimeKind         = "ServingRuntime"
+	ClusterServingRuntimeKind  = "ClusterServingRuntime"
 	PredictorKind              = "Predictor"
 	IsvcKind                   = "InferenceService"
 	ConfigMapKind              = "ConfigMap"
diff --git a/fvt/helpers.go b/fvt/helpers.go
index e586bc46b..aac883406 100644
--- a/fvt/helpers.go
+++ b/fvt/helpers.go
@@ -352,6 +352,7 @@ func WaitForDeployStatus(watcher watch.Interface, timeToStabilize time.Duration)
 			}
 		}
 	}
+
 	Expect(targetStateReached).To(BeTrue(), "Timeout before deploy '%s' ready(last state was replicas: '%v' updatedReplicas: '%v' availableReplicas: '%v')",
 		deployName, replicas, updatedReplicas, availableReplicas)
 }
diff --git a/fvt/hpa/hpa_suite_test.go b/fvt/hpa/hpa_suite_test.go
new file mode 100644
index 000000000..beb5b73e2
--- /dev/null
+++ b/fvt/hpa/hpa_suite_test.go
@@ -0,0 +1,118 @@
+// Copyright 2022 IBM Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+package hpa
+
+import (
+	"os"
+	"testing"
+	"time"
+
+	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
+
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"sigs.k8s.io/controller-runtime/pkg/log/zap"
+
+	. "github.com/kserve/modelmesh-serving/fvt"
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+func TestScaleToZeroSuite(t *testing.T) {
+	RegisterFailHandler(Fail)
+	RunSpecs(t, "HPA autoscaler suite")
+}
+
+var _ = SynchronizedBeforeSuite(func() []byte {
+	// runs *only* on process #1
+	return nil
+}, func(_ []byte) {
+	// runs on *all* processes
+	Log = zap.New(zap.UseDevMode(true), zap.WriteTo(GinkgoWriter))
+	Log.Info("Initializing test suite")
+
+	namespace := os.Getenv("NAMESPACE")
+	if namespace == "" {
+		namespace = DefaultTestNamespace
+	}
+	serviceName := os.Getenv("SERVICENAME")
+	if serviceName == "" {
+		serviceName = DefaultTestServiceName
+	}
+	controllerNamespace := os.Getenv("CONTROLLERNAMESPACE")
+	if controllerNamespace == "" {
+		controllerNamespace = DefaultControllerNamespace
+	}
+	NameSpaceScopeMode = os.Getenv("NAMESPACESCOPEMODE") == "true"
+	Log.Info("Using environment variables", "NAMESPACE", namespace, "SERVICENAME", serviceName,
+		"CONTROLLERNAMESPACE", controllerNamespace, "NAMESPACESCOPEMODE", NameSpaceScopeMode)
+
+	var err error
+	FVTClientInstance, err = GetFVTClient(Log, namespace, serviceName, controllerNamespace)
+	Expect(err).ToNot(HaveOccurred())
+	Expect(FVTClientInstance).ToNot(BeNil())
+	Log.Info("FVTClientInstance created", "client", FVTClientInstance)
+
+	// confirm 3 cluster serving runtimes or serving runtimes
+	var list *unstructured.UnstructuredList
+	if NameSpaceScopeMode {
+		list, err = FVTClientInstance.ListServingRuntimes(metav1.ListOptions{})
+	} else {
+		list, err = FVTClientInstance.ListClusterServingRuntimes(metav1.ListOptions{})
+	}
+	Expect(err).ToNot(HaveOccurred())
+	Expect(list.Items).To(HaveLen(4))
+
+	config := map[string]interface{}{
+		"scaleToZero": map[string]interface{}{
+			"enabled":            true,
+			"gracePeriodSeconds": 5,
+		},
+		"podsPerRuntime": 1,
+	}
+	FVTClientInstance.ApplyUserConfigMap(config)
+
+	// cleanup any predictors and inference services if they exist
+	FVTClientInstance.DeleteAllPredictors()
+	FVTClientInstance.DeleteAllIsvcs()
+
+	Log.Info("Setup completed")
+})
+
+var _ = SynchronizedAfterSuite(func() {
+	// runs on *all* processes
+	// ensure we cleanup any port-forward
+	FVTClientInstance.DisconnectFromModelServing()
+}, func() {
+	// runs *only* on process #1
+	// cleanup any predictors and inference services if they exist
+	FVTClientInstance.DeleteAllPredictors()
+	FVTClientInstance.DeleteAllIsvcs()
+})
+
+// register handlers for a failed test case to print info to the console
+var startTime string
+var _ = JustBeforeEach(func() {
+	startTime = time.Now().Format("2006-01-02T15:04:05Z")
+})
+var _ = JustAfterEach(func() {
+	if CurrentSpecReport().Failed() {
+		FVTClientInstance.PrintPredictors()
+		FVTClientInstance.PrintIsvcs()
+		FVTClientInstance.PrintHPAs()
+		FVTClientInstance.PrintPods()
+		FVTClientInstance.PrintDescribeNodes()
+		FVTClientInstance.PrintEvents()
+		FVTClientInstance.TailPodLogs(startTime)
+	}
+})
diff --git a/fvt/hpa/hpa_test.go b/fvt/hpa/hpa_test.go
new file mode 100644
index 000000000..edcad383a
--- /dev/null
+++ b/fvt/hpa/hpa_test.go
@@ -0,0 +1,251 @@
+// Copyright 2021 IBM Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+package hpa
+
+import (
+	"strings"
+	"time"
+
+	"github.com/kserve/kserve/pkg/apis/serving/v1alpha1"
+	"github.com/kserve/kserve/pkg/constants"
+	v2beta2 "k8s.io/api/autoscaling/v2beta2"
+	"k8s.io/apimachinery/pkg/runtime"
+
+	. "github.com/kserve/modelmesh-serving/fvt"
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+	corev1 "k8s.io/api/core/v1"
+)
+
+var (
+	schemeSR = runtime.NewScheme()
+)
+
+func init() {
+	_ = v1alpha1.AddToScheme(schemeSR)
+}
+
+var _ = Describe("Scaling of runtime deployments with HPA Autoscaler", Ordered, func() {
+	// constants
+	testPredictorObject := NewPredictorForFVT("mlserver-sklearn-predictor.yaml")
+	// runtime expected to serve the test predictor
+	expectedRuntimeName := "mlserver-0.x"
+
+	// checkDeploymentState returns the replicas value for the expected runtime
+	// and expects others to be scaled to zero
+	checkDeploymentState := func() int32 {
+		deployments := FVTClientInstance.ListDeploys()
+		var replicas int32
+		for _, d := range deployments.Items {
+			Log.Info("Checking deployment scale", "name", d.ObjectMeta.Name)
+			// the service prefix may change
+			if strings.HasSuffix(d.ObjectMeta.Name, expectedRuntimeName) {
+				// since we list existing deploys Replicas should never be nil
+				replicas = *d.Spec.Replicas
+			} else {
+				Expect(*d.Spec.Replicas).To(BeEquivalentTo(int32(0)))
+			}
+		}
+		return replicas
+	}
+	expectScaledToTargetReplicas := func(targetReplicas int32) {
+		replicas := checkDeploymentState()
+		Expect(replicas).To(BeEquivalentTo(targetReplicas))
+	}
+
+	expectScaledToZero := func() {
+		replicas := checkDeploymentState()
+		Expect(replicas).To(BeEquivalentTo(int32(0)))
+	}
+
+	checkHPAState := func() *v2beta2.HorizontalPodAutoscaler {
+		hpaList := FVTClientInstance.ListHPAs()
+
+		var hpa *v2beta2.HorizontalPodAutoscaler
+		if len(hpaList.Items) == 0 {
+			hpa = nil
+		} else {
+			for _, d := range hpaList.Items {
+				Log.Info("Checking if HPA exist", "name", d.ObjectMeta.Name)
+				// the service prefix may change
+				if strings.HasSuffix(d.ObjectMeta.Name, expectedRuntimeName) {
+					hpa = &d
+				}
+			}
+		}
+		return hpa
+	}
+
+	expectHPAExist := func(exist bool) {
+		hpa := checkHPAState()
+		if exist {
+			Expect(hpa).NotTo(BeNil())
+		} else {
+			Expect(hpa).To(BeNil())
+		}
+	}
+
+	expectHPAMinReplicas := func(minReplicas int32) {
+		hpa := checkHPAState
+		Expect(*hpa().Spec.MinReplicas).To(Equal(minReplicas))
+	}
+
+	expectHPAMaxReplicas := func(maxReplicas int32) {
+		hpa := checkHPAState
+		Expect(hpa().Spec.MaxReplicas).To(Equal(maxReplicas))
+	}
+
+	expectHPATargetUtilizationPercentage := func(targetUtilizationPercentage int32) {
+		hpa := checkHPAState
+		Expect(*hpa().Spec.Metrics[0].Resource.Target.AverageUtilization).To(Equal(targetUtilizationPercentage))
+	}
+
+	expectHPAResourceName := func(resourceName corev1.ResourceName) {
+		hpa := checkHPAState
+		Expect(hpa().Spec.Metrics[0].Resource.Name).To(Equal(resourceName))
+	}
+
+	deployTestPredictorAndCheckDefaultHPA := func() {
+		CreatePredictorAndWaitAndExpectLoaded(testPredictorObject)
+		expectScaledToTargetReplicas(int32(constants.DefaultMinReplicas))
+
+		// check HPA object
+		expectHPAExist(true)
+		expectHPAMinReplicas(1)
+		expectHPAMaxReplicas(1)
+		expectHPATargetUtilizationPercentage(80)
+		expectHPAResourceName(corev1.ResourceCPU)
+	}
+	BeforeAll(func() {
+		srAnnotations := make(map[string]interface{})
+		srAnnotations[constants.AutoscalerClass] = string(constants.AutoscalerClassHPA)
+
+		FVTClientInstance.SetServingRuntimeAnnotation(expectedRuntimeName, srAnnotations)
+	})
+
+	BeforeEach(func() {
+		FVTClientInstance.DeleteAllPredictors()
+		// ensure a stable deploy state
+		WaitForStableActiveDeployState()
+	})
+
+	AfterAll(func() {
+		FVTClientInstance.DeleteAllPredictors()
+
+		annotations := make(map[string]interface{})
+		FVTClientInstance.SetServingRuntimeAnnotation(expectedRuntimeName, annotations)
+	})
+
+	Context("when there are no predictors", func() {
+		It("Scale all runtimes down", func() {
+			// check that all runtimes are scaled to zero
+			By("Check ScaleToZero and No HPA")
+			expectScaledToZero()
+			expectHPAExist(false)
+		})
+		It("Scale all runtimes down after a created test predictor is deleted", func() {
+			By("Creating a test predictor for one Runtime")
+			deployTestPredictorAndCheckDefaultHPA()
+
+			By("Delete all predictors")
+			FVTClientInstance.DeleteAllPredictors()
+			time.Sleep(10 * time.Second)
+
+			By("Check ScaleToZero and No HPA")
+			expectScaledToZero()
+			expectHPAExist(false)
+		})
+	})
+	Context("when there are predictors", func() {
+		It("Creating a predictor should create an HPA and scale up the runtime to minReplicas of HPA", func() {
+			By("Creating a test predictor for one Runtime")
+			deployTestPredictorAndCheckDefaultHPA()
+		})
+
+		It("Scaleup/Scaledown and Change targetUtilizationPercentage by an annotation in ServingRuntime", func() {
+			By("Creating a test predictor for one Runtime")
+			deployTestPredictorAndCheckDefaultHPA()
+
+			// ScaleUp Test
+			By("ScaleUp to min(2)/max(4): " + constants.MinScaleAnnotationKey)
+			By("Increase TargetUtilizationPercentage to 90: " + constants.TargetUtilizationPercentage)
+			By("Change Metrics to memory: " + constants.TargetUtilizationPercentage)
+			srAnnotationsScaleUp := make(map[string]interface{})
+			srAnnotationsScaleUp[constants.AutoscalerClass] = string(constants.AutoscalerClassHPA)
+			srAnnotationsScaleUp[constants.MinScaleAnnotationKey] = "2"
+			srAnnotationsScaleUp[constants.MaxScaleAnnotationKey] = "4"
+			srAnnotationsScaleUp[constants.TargetUtilizationPercentage] = "90"
+			srAnnotationsScaleUp[constants.AutoscalerMetrics] = "memory"
+
+			// set modified annotations
+			FVTClientInstance.SetServingRuntimeAnnotation(expectedRuntimeName, srAnnotationsScaleUp)
+
+			time.Sleep(10 * time.Second)
+
+			// check that all runtimes except the one are scaled up to minimum replicas of HPA
+			expectScaledToTargetReplicas(2)
+
+			// check HPA
+			expectHPAExist(true)
+			expectHPAMinReplicas(2)
+			expectHPAMaxReplicas(4)
+			expectHPATargetUtilizationPercentage(90)
+			expectHPAResourceName(corev1.ResourceMemory)
+
+			// ScaleDown Test
+			By("ScaleDown to min(1)/max(1): " + constants.MinScaleAnnotationKey)
+			By("Decrease TargetUtilizationPercentage to 80: " + constants.TargetUtilizationPercentage)
+			By("Change Metrics to cpu: " + constants.TargetUtilizationPercentage)
+			srAnnotationsScaleDown := make(map[string]interface{})
+			srAnnotationsScaleDown[constants.AutoscalerClass] = string(constants.AutoscalerClassHPA)
+			srAnnotationsScaleDown[constants.MinScaleAnnotationKey] = "1"
+			srAnnotationsScaleDown[constants.MaxScaleAnnotationKey] = "1"
+			srAnnotationsScaleDown[constants.TargetUtilizationPercentage] = "80"
+			srAnnotationsScaleDown[constants.AutoscalerMetrics] = "cpu"
+
+			// set modified annotations
+			FVTClientInstance.SetServingRuntimeAnnotation(expectedRuntimeName, srAnnotationsScaleDown)
+
+			time.Sleep(10 * time.Second)
+			// check that all runtimes except the one are scaled up to minimum replicas of HPA
+			expectScaledToTargetReplicas(1)
+
+			// check HPA object
+			expectHPAExist(true)
+			expectHPAMinReplicas(1)
+			expectHPAMaxReplicas(1)
+			expectHPATargetUtilizationPercentage(80)
+			expectHPAResourceName(corev1.ResourceCPU)
+		})
+
+	})
+	// This test must be the last because it will remove hpa annotation from servingruntime/clusterservingruntime
+	Context("When the model does not need autoscaler anymore", func() {
+		It("Disable autoscaler", func() {
+			deployTestPredictorAndCheckDefaultHPA()
+
+			// set modified annotations
+			By("Deleting this annotation: " + constants.AutoscalerClass)
+			srAnnotationsNone := make(map[string]interface{})
+			FVTClientInstance.SetServingRuntimeAnnotation(expectedRuntimeName, srAnnotationsNone)
+
+			time.Sleep(10 * time.Second)
+			// check that all runtimes except the one are scaled up to servingRuntime default replicas
+			expectScaledToTargetReplicas(1)
+
+			// check if HPA deleted
+			expectHPAExist(false)
+		})
+	})
+})
diff --git a/fvt/utils.go b/fvt/utils.go
index 204524855..787c8d170 100644
--- a/fvt/utils.go
+++ b/fvt/utils.go
@@ -93,6 +93,11 @@ func GetMap(obj *unstructured.Unstructured, fieldPath ...string) map[string]inte
 	return value
 }
 
+func SetMap(obj *unstructured.Unstructured, value map[string]interface{}, fieldPath ...string) {
+	err := unstructured.SetNestedMap(obj.Object, value, fieldPath...)
+	Expect(err).ToNot(HaveOccurred())
+}
+
 func SetString(obj *unstructured.Unstructured, value string, fieldPath ...string) {
 	err := unstructured.SetNestedField(obj.Object, value, fieldPath...)
 	Expect(err).ToNot(HaveOccurred())
diff --git a/go.mod b/go.mod
index 4ace4b1ba..7e90c62a7 100644
--- a/go.mod
+++ b/go.mod
@@ -14,6 +14,7 @@ require (
 	github.com/onsi/ginkgo/v2 v2.1.3
 	github.com/onsi/gomega v1.18.1
 	github.com/operator-framework/operator-lib v0.10.0
+	github.com/pkg/errors v0.9.1
 	github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.55.0
 	github.com/spf13/viper v1.10.1
 	github.com/stretchr/testify v1.8.0
@@ -81,7 +82,6 @@ require (
 	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
 	github.com/modern-go/reflect2 v1.0.2 // indirect
 	github.com/pelletier/go-toml v1.9.4 // indirect
-	github.com/pkg/errors v0.9.1 // indirect
 	github.com/pmezard/go-difflib v1.0.0 // indirect
 	github.com/prometheus/client_golang v1.14.0 // indirect
 	github.com/prometheus/client_model v0.3.0 // indirect
diff --git a/main.go b/main.go
index 3ce4518c3..6aede8edb 100644
--- a/main.go
+++ b/main.go
@@ -55,6 +55,8 @@ import (
 	"github.com/kserve/modelmesh-serving/controllers"
 	"github.com/kserve/modelmesh-serving/controllers/modelmesh"
 	"github.com/kserve/modelmesh-serving/pkg/mmesh"
+
+	"sigs.k8s.io/controller-runtime/pkg/webhook"
 	// +kubebuilder:scaffold:imports
 )
 
@@ -259,6 +261,15 @@ func main() {
 		os.Exit(1)
 	}
 
+	// Setup servingruntime validating webhook
+	hookServer := mgr.GetWebhookServer()
+	servingRuntimeWebhook := &webhook.Admission{
+		Handler: &servingv1alpha1.ServingRuntimeWebhook{
+			Client: mgr.GetClient(),
+		},
+	}
+	hookServer.Register("/validate-serving-modelmesh-io-v1alpha1-servingruntime", servingRuntimeWebhook)
+
 	_, err = mmesh.InitGrpcResolver(ControllerNamespace, mgr)
 	if err != nil {
 		setupLog.Error(err, "Failed to Initialize Grpc Resolver, exit")
diff --git a/scripts/install.sh b/scripts/install.sh
index a73ac028f..6d23e6f1c 100755
--- a/scripts/install.sh
+++ b/scripts/install.sh
@@ -26,6 +26,8 @@ quickstart=false
 fvt=false
 user_ns_array=
 namespace_scope_mode=false # change to true to run in namespace scope
+modelmesh_serving_image=
+enable_self_signed_ca=false
 
 function showHelp() {
   echo "usage: $0 [flags]"
@@ -39,6 +41,8 @@ function showHelp() {
   echo "  --fvt                          Install and configure required supporting datastores in the same namespace (etcd and MinIO) - for development with fvt enabled"
   echo "  -dev, --dev-mode-logging       Enable dev mode logging (stacktraces on warning and no sampling)"
   echo "  --namespace-scope-mode         Run ModelMesh Serving in namespace scope mode"
+  echo "  --modelmesh-serving-image      Set a custom modelmesh serving image"
+  echo "  --enable-self-signed-ca        Enable self-signed-ca, if you don't have cert-manager in the cluster"
   echo
   echo "Installs ModelMesh Serving CRDs, controller, and built-in runtimes into specified"
   echo "Kubernetes namespaces."
@@ -166,10 +170,18 @@ while (($# > 0)); do
     ;;
   --fvt)
     fvt=true
+    enable_self_signed_ca=true
     ;;
   --namespace-scope-mode)
     namespace_scope_mode=true
     ;;
+  --modelmesh-serving-image)
+    shift
+    modelmesh_serving_image="$1"
+    ;;
+  --enable-self-signed-ca )
+    enable_self_signed_ca=true
+    ;;
   -*)
     die "Unknown option: '${1}'"
     ;;
@@ -290,6 +302,41 @@ else
 fi
 
 info "Installing ModelMesh Serving CRDs and controller"
+if [[ ! -z $modelmesh_serving_image ]]; then 
+  info "Custom ModelMesh Serving Image: $modelmesh_serving_image"
+  if [[ ! -f manager/kustomization.yaml.ori ]]; then
+    cp manager/kustomization.yaml  manager/kustomization.yaml.ori
+  fi
+  cd manager; kustomize edit set image modelmesh-controller=${modelmesh_serving_image} ; cd ../
+fi   
+
+if [[ $enable_self_signed_ca == "true" ]]; then
+  info "Enabled Self Signed CA: Update manifest"
+  if [[ ! -f certmanager/kustomization.yaml.ori ]]; then
+    cp certmanager/kustomization.yaml  certmanager/kustomization.yaml.ori
+  fi
+  cd certmanager; kustomize edit remove resource certificate.yaml; cd ../
+
+  if [[ ! -f default/kustomization.yaml.ori ]]; then
+    cp default/kustomization.yaml  default/kustomization.yaml.ori
+  fi
+  cd default; kustomize edit remove resource ../certmanager; cd ../
+
+  # comment out vars
+  configMapGeneratorStartLine=$(grep -n configMapGenerator ./default/kustomization.yaml |cut -d':' -f1)
+  configMapGeneratorBeforeLine=$((configMapGeneratorStartLine-1))
+  sed "1,${configMapGeneratorBeforeLine}s/^/#/g" -i default/kustomization.yaml
+  
+  # remove webhookcainjection_patch.yaml
+  sed 's+- webhookcainjection_patch.yaml++g' -i default/kustomization.yaml
+
+  # create dummy secret 'modelmesh-webhook-server-cert'
+  secretExist=$(kubectl get secret modelmesh-webhook-server-cert --ignore-not-found|wc -l)
+  if [[ $secretExist == 0 ]]; then
+    kubectl create secret generic modelmesh-webhook-server-cert 
+  fi
+fi
+
 kustomize build default | kubectl apply -f -
 
 if [[ $dev_mode_logging == "true" ]]; then
@@ -305,8 +352,19 @@ if [[ $namespace_scope_mode == "true" ]]; then
   rm crd/kustomization.yaml.bak
 fi
 
-info "Waiting for ModelMesh Serving controller pod to be up..."
-wait_for_pods_ready "-l control-plane=modelmesh-controller"
+if [[ ! -z $modelmesh_serving_image ]]; then 
+  cp manager/kustomization.yaml.ori  manager/kustomization.yaml
+  rm manager/kustomization.yaml.ori 
+fi
+
+if [[ $enable_self_signed_ca == "true" ]]; then
+  cp certmanager/kustomization.yaml.ori  certmanager/kustomization.yaml
+  cp default/kustomization.yaml.ori  default/kustomization.yaml
+  rm certmanager/kustomization.yaml.ori default/kustomization.yaml.ori
+else
+  info "Waiting for ModelMesh Serving controller pod to be up..."
+  wait_for_pods_ready "-l control-plane=modelmesh-controller"
+fi
 
 # Older versions of kustomize have different load restrictor flag formats.
 # Can be removed once Kubeflow installation stops requiring v3.2.
@@ -318,6 +376,18 @@ elif [[ -n "$kustomize_version" && "$kustomize_version" < "4.0.1" ]]; then
     kustomize_load_restrictor_arg="--load_restrictor LoadRestrictionsNone"
 fi
 
+if [[ $enable_self_signed_ca == "true" ]]; then
+  info "Enabled Self Signed CA: Generate certificates and restart controller"
+  
+  # Delete dummy secret for webhook server
+  kubectl delete secret modelmesh-webhook-server-cert 
+
+  ../scripts/self-signed-ca.sh --namespace $namespace
+
+  info "Restarting ModelMesh Serving controller pod..."
+  wait_for_pods_ready "-l control-plane=modelmesh-controller"
+fi
+
 info "Installing ModelMesh Serving built-in runtimes"
 if [[ $namespace_scope_mode == "true" ]]; then
     kustomize build namespace-runtimes ${kustomize_load_restrictor_arg} | kubectl apply -f -
diff --git a/scripts/self-signed-ca.sh b/scripts/self-signed-ca.sh
new file mode 100755
index 000000000..6adad816b
--- /dev/null
+++ b/scripts/self-signed-ca.sh
@@ -0,0 +1,153 @@
+#!/bin/bash
+
+set -e
+
+usage() {
+    cat <<EOF
+Generate certificate suitable for use with an ModelMesh webhook service.
+This script uses openssl to generate self-signed CA certificate that is
+suitable for use with ModelMesh webhook services. See
+https://kubernetes.io/docs/concepts/cluster-administration/certificates/#distributing-self-signed-ca-certificate
+for detailed explanation and additional instructions.
+The server key/cert CA cert are stored in a k8s secret.
+
+usage: ${0} [OPTIONS]
+The following flags are optional.
+       --service           Service name of webhook. Default: modelmesh-webhook-server-service
+       --namespace         Namespace where webhook service and secret reside. Default: model-serving
+       --secret            Secret name for CA certificate and server certificate/key pair. Default: modelmesh-webhook-server-cert
+       --webhookName       Name for the mutating and validating webhook config. Default: servingruntime.serving.kserve.io
+       --webhookDeployment deployment name of the webhook controller. Default: modelmesh-controller
+EOF
+    exit 1
+}
+
+while [[ $# -gt 0 ]]; do
+    case ${1} in
+        --service)
+            service="$2"
+            shift
+            ;;
+        --secret)
+            secret="$2"
+            shift
+            ;;
+        --namespace)
+            namespace="$2"
+            shift
+            ;;
+        --webhookName)
+            webhookName="$2"
+            shift
+            ;;
+        --webhookDeployment)
+            webhookDeployment="$2"
+            shift
+            ;;
+        *)
+            usage
+            ;;
+    esac
+    shift
+done
+#TODO check backward compatibility
+[ -z ${secret} ] && secret=modelmesh-webhook-server-cert
+[ -z ${namespace} ] && namespace=model-serving
+[ -z ${webhookDeployment} ] && webhookDeployment=modelmesh-controller
+[ -z ${webhookName} ] && webhookName=servingruntime.serving.kserve.io
+[ -z ${service} ] && service=modelmesh-webhook-server-service
+webhookDeploymentName=${webhookDeployment}
+webhookConfigName=${webhookName}
+echo service: ${service}
+echo namespace: ${namespace}
+echo secret: ${secret}
+echo webhookDeploymentName: ${webhookDeploymentName}
+echo webhookConfigName: ${webhookConfigName}
+if [ ! -x "$(command -v openssl)" ]; then
+    echo "openssl not found"
+    exit 1
+fi
+tmpdir=$(mktemp -d)
+echo "creating certs in tmpdir ${tmpdir} "
+cat <<EOF >> ${tmpdir}/csr.conf
+[req]
+req_extensions = v3_req
+distinguished_name = req_distinguished_name
+[req_distinguished_name]
+[ v3_req ]
+basicConstraints = CA:FALSE
+keyUsage = nonRepudiation, digitalSignature, keyEncipherment
+extendedKeyUsage = serverAuth
+subjectAltName = @alt_names
+[alt_names]
+DNS.1 = ${service}
+DNS.2 = ${service}.${namespace}
+DNS.3 = ${service}.${namespace}.svc
+DNS.4 = ${service}.${namespace}.svc.cluster
+DNS.5 = ${service}.${namespace}.svc.cluster.local
+
+EOF
+# Create CA and Server key/certificate
+openssl genrsa -out ${tmpdir}/ca.key 2048
+openssl req -x509 -newkey rsa:2048 -key ${tmpdir}/ca.key -out ${tmpdir}/ca.crt -days 365 -nodes -subj "/CN=${service}.${namespace}.svc"
+
+openssl genrsa -out ${tmpdir}/server.key 2048
+openssl req -new -key ${tmpdir}/server.key -subj "/CN=${service}.${namespace}.svc" -out ${tmpdir}/server.csr -config ${tmpdir}/csr.conf
+
+# Self sign
+openssl x509 -extensions v3_req -req -days 365 -in ${tmpdir}/server.csr -CA ${tmpdir}/ca.crt -CAkey ${tmpdir}/ca.key -CAcreateserial -out ${tmpdir}/server.crt -extfile ${tmpdir}/csr.conf
+# create the secret with server cert/key
+kubectl create secret generic ${secret} \
+        --from-file=tls.key=${tmpdir}/server.key \
+        --from-file=tls.crt=${tmpdir}/server.crt \
+        --dry-run -o yaml |
+    kubectl -n ${namespace} apply -f -
+# Webhook pod needs to be restarted so that the service reload the secret
+# http://github.com/kueflow/kubeflow/issues/3227
+webhookPod=$(kubectl get pods -n ${namespace} |grep ${webhookDeploymentName} |awk '{print $1;}')
+# ignore error if webhook pod does not exist
+kubectl delete pod ${webhookPod} -n ${namespace} 2>/dev/null || true
+echo "webhook ${webhookPod} is restarted to utilize the new secret"
+
+echo "CA Certificate:"
+cat ${tmpdir}/ca.crt
+
+# -a means base64 encode
+caBundle=$(cat ${tmpdir}/ca.crt | openssl enc -a -A)
+echo "Encoded CA:"
+echo -e "${caBundle} \n"
+
+# check if jq is installed
+if [ ! -x "$(command -v jq)" ]; then
+    echo "jq not found"
+    exit 1
+fi
+# # Patch CA Certificate to mutatingWebhook
+# mutatingWebhookCount=$(kubectl get mutatingwebhookconfiguration ${webhookConfigName} -ojson | jq -r '.webhooks' | jq length)
+# # build patchstring based on webhook counts
+# mutatingPatchString='['
+# for i in $(seq 0 $(($mutatingWebhookCount-1)))
+# do
+#     mutatingPatchString=$mutatingPatchString'{"op": "replace", "path": "/webhooks/'$i'/clientConfig/caBundle", "value":"{{CA_BUNDLE}}"}, '
+# done
+# # strip ', '
+# mutatingPatchString=${mutatingPatchString%, }']'
+# mutatingPatchString=$(echo ${mutatingPatchString} | sed "s|{{CA_BUNDLE}}|${caBundle}|g")
+
+# echo "patching ca bundle for mutating webhook configuration..."
+# kubectl patch mutatingwebhookconfiguration ${webhookConfigName} \
+#     --type='json' -p="${mutatingPatchString}"
+
+# Patch CA Certificate to validatingWebhook
+validatingWebhookCount=$(kubectl get validatingwebhookconfiguration ${webhookConfigName} -ojson | jq -r '.webhooks' | jq length)
+validatingPatchString='['
+for i in $(seq 0 $(($validatingWebhookCount-1)))
+do
+    validatingPatchString=$validatingPatchString'{"op": "replace", "path": "/webhooks/'$i'/clientConfig/caBundle", "value":"{{CA_BUNDLE}}"}, '
+done
+validatingPatchString=${validatingPatchString%, }']'
+validatingPatchString=$(echo ${validatingPatchString} | sed "s|{{CA_BUNDLE}}|${caBundle}|g")
+
+echo "patching ca bundle for validating webhook configuration..."
+kubectl patch validatingwebhookconfiguration ${webhookConfigName} \
+    --type='json' -p="${validatingPatchString}"