From dd0229fe52d6c90703eabeecfdee287dd3ac55fd Mon Sep 17 00:00:00 2001
From: Jooho Lee <ljhiyh@gmail.com>
Date: Thu, 27 Apr 2023 17:07:13 -0400
Subject: [PATCH] feat: Enable Horizontal Pod Autoscaling for
 ServingRuntime/ClusterServingRuntime (#342)

Enable Horizontal Pod Autoscaling for ServingRuntime/ClusterServingRuntime
by adding annotation `serving.kserve.io/autoscalerClass: hpa`

- Add auto-scaling, HPA controller
- Add ServingRuntime Webhook
- Update deployment manifests
- Add script to generate self-signed certificate
- Add option to enable self-signed certificate to install script
- Add deploy-release-dev-mode-fvt target to Makefile
- Add FVT and unit tests
- Upgrade FVT minikube version from 1.25 to 1.27
- Enabe FVT deployment on OpenShift (etcd --data-dir)
- Update Docs

Resolves #329

Signed-off-by: Jooho Lee <ljhiyh@gmail.com>
---
 .github/workflows/run-fvt.yml                 |  22 +-
 .gitignore                                    |   1 +
 Dockerfile.develop                            |   1 +
 Makefile                                      |  12 +-
 .../v1alpha1/servingruntime_webhook.go        | 216 +++++++++++++++
 .../v1alpha1/servingruntime_webhook_test.go   | 106 ++++++++
 .../serving/v1alpha1/zz_generated.deepcopy.go |  15 ++
 config/certmanager/certificate.yaml           |  10 +-
 config/crd/patches/webhook_in_predictors.yaml |   1 -
 .../patches/webhook_in_servingruntimes.yaml   |   1 -
 config/default/kustomization.yaml             | 100 +++----
 config/default/manager_auth_proxy_patch.yaml  |   1 -
 config/default/manager_webhook_patch.yaml     |  11 +-
 config/default/webhookcainjection_patch.yaml  |  13 +-
 config/dependencies/fvt.yaml                  |   2 +
 config/namespace-runtimes/kustomization.yaml  |   2 +-
 config/prometheus/monitor.yaml                |   1 -
 config/rbac/cluster-scope/kustomization.yaml  |   2 +
 config/rbac/cluster-scope/role.yaml           |  12 +
 config/rbac/common/kustomization.yaml         |   1 +
 config/rbac/common/networkpolicy-webhook.yaml |  29 ++
 .../rbac/namespace-scope/kustomization.yaml   |   2 +
 config/rbac/namespace-scope/role.yaml         |  12 +
 config/webhook/kustomization.yaml             |  20 ++
 config/webhook/kustomizeconfig.yaml           |  31 +++
 config/webhook/manifests.yaml                 |  40 +++
 config/webhook/service.yaml                   |  25 ++
 .../autoscaler/autoscaler_reconciler.go       | 129 +++++++++
 .../autoscaler/autoscaler_reconciler_test.go  |  72 +++++
 controllers/hpa/hpa_reconciler.go             | 193 ++++++++++++++
 controllers/hpa/hpa_reconciler_test.go        | 157 +++++++++++
 controllers/servingruntime_controller.go      |  72 ++++-
 docs/developer.md                             |  12 +
 docs/install/install-script.md                |  24 +-
 docs/production-use/scaling.md                |  32 +++
 docs/quickstart.md                            |   4 +-
 docs/release-process.md                       |   1 -
 fvt/README.md                                 |   2 +-
 fvt/fvtclient.go                              |  82 ++++++
 fvt/globals.go                                |   1 +
 fvt/helpers.go                                |   1 +
 fvt/hpa/hpa_suite_test.go                     | 118 +++++++++
 fvt/hpa/hpa_test.go                           | 249 ++++++++++++++++++
 fvt/utils.go                                  |   5 +
 go.mod                                        |   2 +-
 main.go                                       |  11 +
 pkg/constants/constants.go                    |  21 ++
 scripts/install.sh                            |  68 ++++-
 scripts/self-signed-ca.sh                     | 169 ++++++++++++
 49 files changed, 1994 insertions(+), 120 deletions(-)
 create mode 100644 apis/serving/v1alpha1/servingruntime_webhook.go
 create mode 100644 apis/serving/v1alpha1/servingruntime_webhook_test.go
 create mode 100644 config/rbac/common/networkpolicy-webhook.yaml
 create mode 100644 config/webhook/kustomization.yaml
 create mode 100644 config/webhook/kustomizeconfig.yaml
 create mode 100644 config/webhook/manifests.yaml
 create mode 100644 config/webhook/service.yaml
 create mode 100644 controllers/autoscaler/autoscaler_reconciler.go
 create mode 100644 controllers/autoscaler/autoscaler_reconciler_test.go
 create mode 100644 controllers/hpa/hpa_reconciler.go
 create mode 100644 controllers/hpa/hpa_reconciler_test.go
 create mode 100644 fvt/hpa/hpa_suite_test.go
 create mode 100644 fvt/hpa/hpa_test.go
 create mode 100644 pkg/constants/constants.go
 create mode 100755 scripts/self-signed-ca.sh

diff --git a/.github/workflows/run-fvt.yml b/.github/workflows/run-fvt.yml
index 0b1987d8..ddc13024 100644
--- a/.github/workflows/run-fvt.yml
+++ b/.github/workflows/run-fvt.yml
@@ -20,14 +20,16 @@ jobs:
       - uses: actions/checkout@v2
       - uses: actions/setup-go@v2
         with:
-          go-version: '1.18.7'
-      - name: Setup Minikube
-        run: |
-          wget --no-verbose https://github.com/kubernetes/minikube/releases/download/v1.25.1/minikube-linux-amd64
-          sudo cp minikube-linux-amd64 /usr/local/bin/minikube
-          sudo chmod 755 /usr/local/bin/minikube
-          sudo apt-get install -y conntrack socat
-          minikube start --driver=none --kubernetes-version v1.22.10
+          go-version: '1.18.7'            
+      - name: Start Minikube
+        uses: medyagh/setup-minikube@v0.0.11
+        id: minikube
+        with:
+          minikube-version: 1.27.1
+          container-runtime: docker
+          kubernetes-version: v1.25.2
+          cpus: max
+          memory: max
       - name: Check pods
         run: |
           sleep 30
@@ -55,6 +57,7 @@ jobs:
           echo -e '\n  disabled: true' >> config/runtimes/torchserve-0.x.yaml
       - name: Build Controller image
         run: |
+          eval $(minikube -p minikube docker-env)
           make build.develop
           ./scripts/build_docker.sh --target runtime --tag ${{ env.IMAGE_TAG }}
       - name: Install ModelMesh Serving
@@ -63,12 +66,14 @@ jobs:
           ./scripts/install.sh --namespace modelmesh-serving --fvt --dev-mode-logging
       - name: Free up disk space
         run: |
+          eval $(minikube -p minikube docker-env)
           echo "Pruning images"
           docker image prune -a -f
           docker system df
           df -h
       - name: Pre-pull runtime images
         run: |
+          eval $(minikube -p minikube docker-env)
           docker pull nvcr.io/nvidia/tritonserver:21.06.1-py3
           docker pull seldonio/mlserver:0.5.2
           docker pull openvino/model_server:2022.2
@@ -78,6 +83,7 @@ jobs:
           docker pull kserve/modelmesh
       - name: Check installation
         run: |
+          eval $(minikube -p minikube docker-env)
           docker images
           kubectl get pods
           kubectl get clusterservingruntimes
diff --git a/.gitignore b/.gitignore
index 33a31d26..a5e66818 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@
 
 public/
 target/
+vendor/
 
 # Binaries for programs and plugins
 *.exe
diff --git a/Dockerfile.develop b/Dockerfile.develop
index e47f285d..ecb7a3f8 100644
--- a/Dockerfile.develop
+++ b/Dockerfile.develop
@@ -40,6 +40,7 @@ RUN microdnf install \
     tar \
     vim \
     git \
+    jq \
     python38 \
     nodejs && \
     pip3 install pre-commit && \
diff --git a/Makefile b/Makefile
index eabe70d5..7e995622 100644
--- a/Makefile
+++ b/Makefile
@@ -47,7 +47,8 @@ test:
 
 # Run fvt tests. This requires an etcd, kubernetes connection, and model serving installation. Ginkgo CLI is used to run them in parallel
 fvt:
-	ginkgo -v -procs=2 --progress --fail-fast fvt/predictor fvt/scaleToZero fvt/storage --timeout=50m
+	ginkgo -v -procs=2 --progress --fail-fast fvt/predictor fvt/scaleToZero fvt/storage fvt/hpa --timeout=50m
+
 
 # Command to regenerate the grpc go files from the proto files
 fvt-protoc:
@@ -87,6 +88,15 @@ deploy-release:
 deploy-release-dev-mode:
 	./scripts/install.sh --namespace ${NAMESPACE} --install-config-path config --dev-mode-logging
 
+deploy-release-dev-mode-fvt:
+ifdef MODELMESH_SERVING_IMAGE
+	$(eval extra_options += --modelmesh-serving-image ${MODELMESH_SERVING_IMAGE}) 
+endif
+ifdef NAMESPACE_SCOPE_MODE
+	$(eval extra_options += --namespace-scope-mode) 
+endif 
+	./scripts/install.sh --namespace ${NAMESPACE} --install-config-path config --dev-mode-logging --fvt ${extra_options}
+
 delete: oc-login
 	./scripts/delete.sh --namespace ${NAMESPACE} --local-config-path config
 
diff --git a/apis/serving/v1alpha1/servingruntime_webhook.go b/apis/serving/v1alpha1/servingruntime_webhook.go
new file mode 100644
index 00000000..3303f290
--- /dev/null
+++ b/apis/serving/v1alpha1/servingruntime_webhook.go
@@ -0,0 +1,216 @@
+//Copyright 2021 IBM Corporation
+//
+//Licensed under the Apache License, Version 2.0 (the "License");
+//you may not use this file except in compliance with the License.
+//You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+//Unless required by applicable law or agreed to in writing, software
+//distributed under the License is distributed on an "AS IS" BASIS,
+//WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//See the License for the specific language governing permissions and
+//limitations under the License.
+//
+package v1alpha1
+
+import (
+	"context"
+	"fmt"
+	"math"
+	"net/http"
+	"strconv"
+
+	kservev1alpha "github.com/kserve/kserve/pkg/apis/serving/v1alpha1"
+	"github.com/kserve/kserve/pkg/constants"
+	"github.com/kserve/modelmesh-serving/controllers/autoscaler"
+	mmcontstant "github.com/kserve/modelmesh-serving/pkg/constants"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
+)
+
+//+kubebuilder:webhook:path=/validate-serving-modelmesh-io-v1alpha1-servingruntime,mutating=false,failurePolicy=fail,sideEffects=None,groups=serving.kserve.io,resources=servingruntimes;clusterservingruntimes,verbs=create;update,versions=v1alpha1,name=servingruntime.modelmesh-webhook-server.default,admissionReviewVersions=v1
+type ServingRuntimeWebhook struct {
+	Client  client.Client
+	decoder *admission.Decoder
+}
+
+func (s *ServingRuntimeWebhook) Handle(ctx context.Context, req admission.Request) admission.Response {
+	var srAnnotations map[string]string
+	srReplicas := uint16(math.MaxUint16)
+	multiModel := false
+
+	if req.Kind.Kind == "ServingRuntime" {
+		servingRuntime := &kservev1alpha.ServingRuntime{}
+		err := s.decoder.Decode(req, servingRuntime)
+		if err != nil {
+			return admission.Errored(http.StatusBadRequest, err)
+		}
+		srAnnotations = servingRuntime.ObjectMeta.Annotations
+
+		if (*servingRuntime).Spec.Replicas != nil {
+			srReplicas = uint16(*servingRuntime.Spec.Replicas)
+		}
+
+		if (*servingRuntime).Spec.MultiModel != nil {
+			multiModel = *servingRuntime.Spec.MultiModel
+		}
+
+	} else {
+		clusterServingRuntime := &kservev1alpha.ClusterServingRuntime{}
+		err := s.decoder.Decode(req, clusterServingRuntime)
+		if err != nil {
+			return admission.Errored(http.StatusBadRequest, err)
+		}
+		srAnnotations = clusterServingRuntime.ObjectMeta.Annotations
+
+		if (*clusterServingRuntime).Spec.Replicas != nil {
+			srReplicas = uint16(*clusterServingRuntime.Spec.Replicas)
+		}
+
+		if (*clusterServingRuntime).Spec.MultiModel != nil {
+			multiModel = *clusterServingRuntime.Spec.MultiModel
+		}
+	}
+
+	if !multiModel {
+		return admission.Allowed("Not validating ServingRuntime because it is not ModelMesh compatible")
+	}
+
+	if err := validateServingRuntimeAutoscaler(srAnnotations); err != nil {
+		return admission.Denied(err.Error())
+	}
+
+	if err := validateAutoscalerTargetUtilizationPercentage(srAnnotations); err != nil {
+		return admission.Denied(err.Error())
+	}
+
+	if err := validateAutoScalingReplicas(srAnnotations, srReplicas); err != nil {
+		return admission.Denied(err.Error())
+	}
+
+	return admission.Allowed("Passed all validation checks for ServingRuntime")
+}
+
+// InjectDecoder injects the decoder.
+func (s *ServingRuntimeWebhook) InjectDecoder(d *admission.Decoder) error {
+	s.decoder = d
+	return nil
+}
+
+// Validation of servingruntime autoscaler class
+func validateServingRuntimeAutoscaler(annotations map[string]string) error {
+	value, ok := annotations[constants.AutoscalerClass]
+	class := constants.AutoscalerClassType(value)
+	if ok {
+		for _, item := range constants.AutoscalerAllowedClassList {
+			if class == item {
+				switch class {
+				case constants.AutoscalerClassHPA:
+					if metric, ok := annotations[constants.AutoscalerMetrics]; ok {
+						return validateHPAMetrics(constants.AutoscalerMetricsType(metric))
+					} else {
+						return nil
+					}
+				default:
+					return fmt.Errorf("unknown autoscaler class [%s]", class)
+				}
+			}
+		}
+		return fmt.Errorf("[%s] is not a supported autoscaler class type.\n", value)
+	}
+
+	return nil
+}
+
+// Validate of autoscaler targetUtilizationPercentage
+func validateAutoscalerTargetUtilizationPercentage(annotations map[string]string) error {
+	if value, ok := annotations[constants.TargetUtilizationPercentage]; ok {
+		t, err := strconv.Atoi(value)
+		if err != nil {
+			return fmt.Errorf("The target utilization percentage should be a [1-100] integer.")
+		} else {
+			if t < 1 || t > 100 {
+				return fmt.Errorf("The target utilization percentage should be a [1-100] integer.")
+			}
+		}
+	}
+
+	return nil
+}
+
+// Validate scaling options
+func validateAutoScalingReplicas(annotations map[string]string, srReplicas uint16) error {
+	autoscalerClassType := autoscaler.AutoscalerClassNone
+	if value, ok := annotations[constants.AutoscalerClass]; ok {
+		autoscalerClassType = value
+	}
+
+	switch autoscalerClassType {
+	case string(constants.AutoscalerClassHPA):
+		if srReplicas != math.MaxUint16 {
+			return fmt.Errorf("Autoscaler is enabled and also replicas variable set. You can not set both.")
+		}
+		return validateScalingHPA(annotations)
+	default:
+		return nil
+	}
+}
+
+func validateScalingHPA(annotations map[string]string) error {
+	metric := constants.AutoScalerMetricsCPU
+	if value, ok := annotations[constants.AutoscalerMetrics]; ok {
+		metric = constants.AutoscalerMetricsType(value)
+	}
+
+	minReplicas := 1
+	if value, ok := annotations[mmcontstant.MinScaleAnnotationKey]; ok {
+		if valueInt, err := strconv.Atoi(value); err != nil {
+			return fmt.Errorf("The min replicas should be a integer.")
+		} else if valueInt < 1 {
+			return fmt.Errorf("The min replicas should be more than 0")
+		} else {
+			minReplicas = valueInt
+		}
+	}
+
+	maxReplicas := 1
+	if value, ok := annotations[mmcontstant.MaxScaleAnnotationKey]; ok {
+		if valueInt, err := strconv.Atoi(value); err != nil {
+			return fmt.Errorf("The max replicas should be a integer.")
+		} else {
+			maxReplicas = valueInt
+		}
+	}
+
+	if minReplicas > maxReplicas {
+		return fmt.Errorf("The max replicas should be same or bigger than min replicas.")
+	}
+
+	err := validateHPAMetrics(metric)
+	if err != nil {
+		return err
+	}
+
+	if value, ok := annotations[constants.TargetUtilizationPercentage]; ok {
+		t, err := strconv.Atoi(value)
+		if err != nil {
+			return fmt.Errorf("The target utilization percentage should be a [1-100] integer.")
+		} else if metric == constants.AutoScalerMetricsMemory && t < 1 {
+			return fmt.Errorf("The target memory should be greater than 1 MiB")
+		}
+	}
+
+	return nil
+}
+
+// Validate of autoscaler HPA metrics
+func validateHPAMetrics(metric constants.AutoscalerMetricsType) error {
+	for _, item := range constants.AutoscalerAllowedMetricsList {
+		if item == metric {
+			return nil
+		}
+	}
+	return fmt.Errorf("[%s] is not a supported metric.\n", metric)
+
+}
diff --git a/apis/serving/v1alpha1/servingruntime_webhook_test.go b/apis/serving/v1alpha1/servingruntime_webhook_test.go
new file mode 100644
index 00000000..27e4b9bc
--- /dev/null
+++ b/apis/serving/v1alpha1/servingruntime_webhook_test.go
@@ -0,0 +1,106 @@
+/*
+Copyright 2021 IBM Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+package v1alpha1
+
+import (
+	"math"
+	"testing"
+
+	"github.com/onsi/gomega"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+
+	kservev1alpha "github.com/kserve/kserve/pkg/apis/serving/v1alpha1"
+	"github.com/kserve/kserve/pkg/constants"
+	mmcontstant "github.com/kserve/modelmesh-serving/pkg/constants"
+)
+
+func makeTestRawServingRuntime() kservev1alpha.ServingRuntime {
+	servingRuntime := kservev1alpha.ServingRuntime{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "foo",
+			Namespace: "default",
+			Annotations: map[string]string{
+				"serving.kserve.io/autoscalerClass":             "hpa",
+				"serving.kserve.io/metrics":                     "cpu",
+				"serving.kserve.io/targetUtilizationPercentage": "75",
+				"serving.kserve.io/min-scale":                   "2",
+				"serving.kserve.io/max-scale":                   "3",
+			},
+		},
+	}
+
+	return servingRuntime
+}
+
+func TestValidAutoscalerTypeAndHPAMetrics(t *testing.T) {
+	g := gomega.NewGomegaWithT(t)
+	sr := makeTestRawServingRuntime()
+	g.Expect(validateServingRuntimeAutoscaler(sr.Annotations)).Should(gomega.Succeed())
+}
+func TestInvalidAutoscalerClassType(t *testing.T) {
+	g := gomega.NewGomegaWithT(t)
+	sr := makeTestRawServingRuntime()
+	sr.ObjectMeta.Annotations[constants.AutoscalerClass] = "test"
+	g.Expect(validateServingRuntimeAutoscaler(sr.Annotations)).ShouldNot(gomega.Succeed())
+}
+
+func TestInvalidAutoscalerTargetUtilizationPercentageLowValue(t *testing.T) {
+	g := gomega.NewGomegaWithT(t)
+	sr := makeTestRawServingRuntime()
+	sr.ObjectMeta.Annotations[constants.TargetUtilizationPercentage] = "-1"
+	g.Expect(validateAutoscalerTargetUtilizationPercentage(sr.Annotations)).ShouldNot(gomega.Succeed())
+}
+
+func TestInvalidAutoscalerTargetUtilizationPercentageHighValue(t *testing.T) {
+	g := gomega.NewGomegaWithT(t)
+	sr := makeTestRawServingRuntime()
+	sr.ObjectMeta.Annotations[constants.TargetUtilizationPercentage] = "101"
+	g.Expect(validateAutoscalerTargetUtilizationPercentage(sr.Annotations)).ShouldNot(gomega.Succeed())
+}
+
+func TestInvalidAutoscalerLowMinReplicas(t *testing.T) {
+	g := gomega.NewGomegaWithT(t)
+	sr := makeTestRawServingRuntime()
+	sr.ObjectMeta.Annotations[mmcontstant.MinScaleAnnotationKey] = "0"
+	g.Expect(validateScalingHPA(sr.Annotations)).ShouldNot(gomega.Succeed())
+}
+
+func TestInvalidAutoscalerMaxReplicasMustBiggerThanMixReplicas(t *testing.T) {
+	g := gomega.NewGomegaWithT(t)
+	sr := makeTestRawServingRuntime()
+	sr.ObjectMeta.Annotations[mmcontstant.MinScaleAnnotationKey] = "4"
+	sr.ObjectMeta.Annotations[mmcontstant.MaxScaleAnnotationKey] = "3"
+	g.Expect(validateAutoScalingReplicas(sr.Annotations, math.MaxUint16)).ShouldNot(gomega.Succeed())
+}
+func TestDuplicatedReplicas(t *testing.T) {
+	g := gomega.NewGomegaWithT(t)
+	sr := makeTestRawServingRuntime()
+	g.Expect(validateAutoScalingReplicas(sr.Annotations, 1)).ShouldNot(gomega.Succeed())
+}
+
+func TestValidAutoscalerMetricsType(t *testing.T) {
+	g := gomega.NewGomegaWithT(t)
+	sr := makeTestRawServingRuntime()
+	sr.ObjectMeta.Annotations[constants.AutoscalerMetrics] = "memory"
+	g.Expect(validateHPAMetrics(constants.AutoscalerMetricsType("memory"))).Should(gomega.Succeed())
+}
+
+func TestInvalidAutoscalerMetricsType(t *testing.T) {
+	g := gomega.NewGomegaWithT(t)
+	sr := makeTestRawServingRuntime()
+	sr.ObjectMeta.Annotations[constants.AutoscalerMetrics] = "conccurrency"
+	g.Expect(validateHPAMetrics(constants.AutoscalerMetricsType("conccurrency"))).ShouldNot(gomega.Succeed())
+}
diff --git a/apis/serving/v1alpha1/zz_generated.deepcopy.go b/apis/serving/v1alpha1/zz_generated.deepcopy.go
index 73421bf5..c2c0e74f 100644
--- a/apis/serving/v1alpha1/zz_generated.deepcopy.go
+++ b/apis/serving/v1alpha1/zz_generated.deepcopy.go
@@ -260,6 +260,21 @@ func (in *S3StorageSource) DeepCopy() *S3StorageSource {
 	return out
 }
 
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ServingRuntimeWebhook) DeepCopyInto(out *ServingRuntimeWebhook) {
+	*out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServingRuntimeWebhook.
+func (in *ServingRuntimeWebhook) DeepCopy() *ServingRuntimeWebhook {
+	if in == nil {
+		return nil
+	}
+	out := new(ServingRuntimeWebhook)
+	in.DeepCopyInto(out)
+	return out
+}
+
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *Storage) DeepCopyInto(out *Storage) {
 	*out = *in
diff --git a/config/certmanager/certificate.yaml b/config/certmanager/certificate.yaml
index 6bd4ae7b..5782dfe3 100644
--- a/config/certmanager/certificate.yaml
+++ b/config/certmanager/certificate.yaml
@@ -26,14 +26,14 @@ spec:
 apiVersion: cert-manager.io/v1
 kind: Certificate
 metadata:
-  name: serving-cert # this name should match the one appeared in kustomizeconfig.yaml
+  name: modelmesh-webhook-server-cert # this name should match the one appeared in kustomizeconfig.yaml
   namespace: system
 spec:
-  # $(SERVICE_NAME) and $(SERVICE_NAMESPACE) will be substituted by kustomize
+  # SERVICE_NAME_PLACEHOLDER and SERVICE_NAMESPACE_PLACEHOLDER will be substituted by kustomize
   dnsNames:
-    - $(SERVICE_NAME).$(SERVICE_NAMESPACE).svc
-    - $(SERVICE_NAME).$(SERVICE_NAMESPACE).svc.cluster.local
+    - $(SERVICE_NAME_PLACEHOLDER).$(SERVICE_NAMESPACE_PLACEHOLDER).svc
+    - $(SERVICE_NAME_PLACEHOLDER).$(SERVICE_NAMESPACE_PLACEHOLDER).svc.cluster.local
   issuerRef:
     kind: Issuer
     name: selfsigned-issuer
-  secretName: webhook-server-cert # this secret will not be prefixed, since it's not managed by kustomize
+  secretName: modelmesh-webhook-server-cert # this secret will not be prefixed, since it's not managed by kustomize
diff --git a/config/crd/patches/webhook_in_predictors.yaml b/config/crd/patches/webhook_in_predictors.yaml
index c3daebf5..792421ae 100644
--- a/config/crd/patches/webhook_in_predictors.yaml
+++ b/config/crd/patches/webhook_in_predictors.yaml
@@ -27,6 +27,5 @@ spec:
         # but we're going to set it later using the cert-manager (or potentially a patch if not using cert-manager)
         caBundle: Cg==
         service:
-          namespace: system
           name: webhook-service
           path: /convert
diff --git a/config/crd/patches/webhook_in_servingruntimes.yaml b/config/crd/patches/webhook_in_servingruntimes.yaml
index 704b6026..30724f90 100644
--- a/config/crd/patches/webhook_in_servingruntimes.yaml
+++ b/config/crd/patches/webhook_in_servingruntimes.yaml
@@ -27,6 +27,5 @@ spec:
         # but we're going to set it later using the cert-manager (or potentially a patch if not using cert-manager)
         caBundle: Cg==
         service:
-          namespace: system
           name: webhook-service
           path: /convert
diff --git a/config/default/kustomization.yaml b/config/default/kustomization.yaml
index 8001c186..b5467fc6 100644
--- a/config/default/kustomization.yaml
+++ b/config/default/kustomization.yaml
@@ -11,72 +11,36 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# Adds namespace to all resources.
-#namespace: model-serving
 
-# Value of this field is prepended to the
-# names of all resources, e.g. a deployment named
-# "wordpress" becomes "alices-wordpress".
-# Note that it should also match with the prefix (text before '-') of the namespace
-# field above.
-#namePrefix: model-serving-
-
-# Labels to add to all resources and selectors.
-#commonLabels:
-#  someName: someValue
-
-# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in
-# crd/kustomization.yaml
-#- ../webhook
-# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. 'WEBHOOK' components are required.
-#- ../certmanager
-# [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'.
-#- ../prometheus
-
-#patchesStrategicMerge:
-# Protect the /metrics endpoint by putting it behind auth.
-# If you want your controller-manager to expose the /metrics
-# endpoint w/o any authn/z, please comment the following line.
-#- manager_auth_proxy_patch.yaml
-
-# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in
-# crd/kustomization.yaml
-#- manager_webhook_patch.yaml
-
-# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'.
-# Uncomment 'CERTMANAGER' sections in crd/kustomization.yaml to enable the CA injection in the admission webhooks.
-# 'CERTMANAGER' needs to be enabled to use ca injection
-#- webhookcainjection_patch.yaml
-
-# the following config is for teaching kustomize how to do var substitution
-#vars:
-# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER' prefix.
-#- name: CERTIFICATE_NAMESPACE # namespace of the certificate CR
-#  objref:
-#    kind: Certificate
-#    group: cert-manager.io
-#    version: v1alpha2
-#    name: serving-cert # this name should match the one in certificate.yaml
-#  fieldref:
-#    fieldpath: metadata.namespace
-#- name: CERTIFICATE_NAME
-#  objref:
-#    kind: Certificate
-#    group: cert-manager.io
-#    version: v1alpha2
-#    name: serving-cert # this name should match the one in certificate.yaml
-#- name: SERVICE_NAMESPACE # namespace of the service
-#  objref:
-#    kind: Service
-#    version: v1
-#    name: webhook-service
-#  fieldref:
-#    fieldpath: metadata.namespace
-#- name: SERVICE_NAME
-#  objref:
-#    kind: Service
-#    version: v1
-#    name: webhook-service
+vars:
+  - fieldref:
+      fieldPath: metadata.namespace
+    name: CERTIFICATE_NAMESPACE_PLACEHOLDER
+    objref:
+      group: cert-manager.io
+      kind: Certificate
+      name: modelmesh-webhook-server-cert
+      version: v1
+  - fieldref: {}
+    name: CERTIFICATE_NAME_PLACEHOLDER
+    objref:
+      group: cert-manager.io
+      kind: Certificate
+      name: modelmesh-webhook-server-cert
+      version: v1
+  - fieldref:
+      fieldPath: metadata.namespace
+    name: SERVICE_NAMESPACE_PLACEHOLDER
+    objref:
+      kind: Service
+      name: modelmesh-webhook-server-service
+      version: v1
+  - fieldref: {}
+    name: SERVICE_NAME_PLACEHOLDER
+    objref:
+      kind: Service
+      name: modelmesh-webhook-server-service
+      version: v1
 
 configMapGenerator:
   - files:
@@ -93,3 +57,9 @@ kind: Kustomization
 resources:
   - ../crd
   - ../manager
+  - ../webhook
+  - ../certmanager
+
+patches:
+  - path: manager_webhook_patch.yaml
+  - path: webhookcainjection_patch.yaml
diff --git a/config/default/manager_auth_proxy_patch.yaml b/config/default/manager_auth_proxy_patch.yaml
index dbb1175c..5e40f99e 100644
--- a/config/default/manager_auth_proxy_patch.yaml
+++ b/config/default/manager_auth_proxy_patch.yaml
@@ -17,7 +17,6 @@ apiVersion: apps/v1
 kind: Deployment
 metadata:
   name: controller-manager
-  namespace: system
 spec:
   template:
     spec:
diff --git a/config/default/manager_webhook_patch.yaml b/config/default/manager_webhook_patch.yaml
index b094527f..57279dbf 100644
--- a/config/default/manager_webhook_patch.yaml
+++ b/config/default/manager_webhook_patch.yaml
@@ -14,8 +14,7 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: controller-manager
-  namespace: system
+  name: modelmesh-controller
 spec:
   template:
     spec:
@@ -23,14 +22,14 @@ spec:
         - name: manager
           ports:
             - containerPort: 9443
-              name: webhook-server
+              name: webhook
               protocol: TCP
           volumeMounts:
             - mountPath: /tmp/k8s-webhook-server/serving-certs
-              name: cert
+              name: modelmesh-webhook-server-cert
               readOnly: true
       volumes:
-        - name: cert
+        - name: modelmesh-webhook-server-cert
           secret:
             defaultMode: 420
-            secretName: webhook-server-cert
+            secretName: modelmesh-webhook-server-cert
diff --git a/config/default/webhookcainjection_patch.yaml b/config/default/webhookcainjection_patch.yaml
index 60e4bc91..33e8bcba 100644
--- a/config/default/webhookcainjection_patch.yaml
+++ b/config/default/webhookcainjection_patch.yaml
@@ -12,17 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # This patch add annotation to admission webhook config and
-# the variables $(CERTIFICATE_NAMESPACE) and $(CERTIFICATE_NAME) will be substituted by kustomize.
-apiVersion: admissionregistration.k8s.io/v1
-kind: MutatingWebhookConfiguration
-metadata:
-  name: mutating-webhook-configuration
-  annotations:
-    cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME)
----
+# the string CERTIFICATE_NAMESPACE_PLACEHOLDER and CERTIFICATE_NAME_PLACEHOLDER will be replaced by kustomize.
 apiVersion: admissionregistration.k8s.io/v1
 kind: ValidatingWebhookConfiguration
 metadata:
-  name: validating-webhook-configuration
+  name: servingruntime.serving.kserve.io
   annotations:
-    cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME)
+    cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE_PLACEHOLDER)/$(CERTIFICATE_NAME_PLACEHOLDER)
diff --git a/config/dependencies/fvt.yaml b/config/dependencies/fvt.yaml
index db6942d5..42859f50 100644
--- a/config/dependencies/fvt.yaml
+++ b/config/dependencies/fvt.yaml
@@ -43,6 +43,8 @@ spec:
       containers:
         - command:
             - etcd
+            - --data-dir # use data directory under /tmp for read/write access by non-root user on OpenShift
+            - /tmp/etcd.data
             - --listen-client-urls
             - http://0.0.0.0:2379
             - --advertise-client-urls
diff --git a/config/namespace-runtimes/kustomization.yaml b/config/namespace-runtimes/kustomization.yaml
index 657e278f..e361106f 100644
--- a/config/namespace-runtimes/kustomization.yaml
+++ b/config/namespace-runtimes/kustomization.yaml
@@ -2,7 +2,7 @@ apiVersion: kustomize.config.k8s.io/v1beta1
 kind: Kustomization
 bases:
   - ../runtimes
-patchesJson6902:
+patches:
   - target:
       group: serving.kserve.io
       version: v1alpha1
diff --git a/config/prometheus/monitor.yaml b/config/prometheus/monitor.yaml
index 53db59a4..a5dcccc7 100644
--- a/config/prometheus/monitor.yaml
+++ b/config/prometheus/monitor.yaml
@@ -18,7 +18,6 @@ metadata:
   labels:
     control-plane: controller-manager
   name: controller-manager-metrics-monitor
-  namespace: system
 spec:
   endpoints:
     - path: /metrics
diff --git a/config/rbac/cluster-scope/kustomization.yaml b/config/rbac/cluster-scope/kustomization.yaml
index 3cf2dadc..53af0645 100644
--- a/config/rbac/cluster-scope/kustomization.yaml
+++ b/config/rbac/cluster-scope/kustomization.yaml
@@ -15,3 +15,5 @@ resources:
   - ../common
   - role.yaml
   - role_binding.yaml
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
diff --git a/config/rbac/cluster-scope/role.yaml b/config/rbac/cluster-scope/role.yaml
index e0110393..84cffb27 100644
--- a/config/rbac/cluster-scope/role.yaml
+++ b/config/rbac/cluster-scope/role.yaml
@@ -197,3 +197,15 @@ rules:
       - get
       - patch
       - update
+  - apiGroups:
+      - autoscaling
+    resources:
+      - horizontalpodautoscalers
+      - horizontalpodautoscalers/status
+    verbs:
+      - get
+      - list
+      - watch
+      - create
+      - delete
+      - update
diff --git a/config/rbac/common/kustomization.yaml b/config/rbac/common/kustomization.yaml
index 577e5b55..3168c7c7 100644
--- a/config/rbac/common/kustomization.yaml
+++ b/config/rbac/common/kustomization.yaml
@@ -24,6 +24,7 @@ resources:
   - modelmesh-service-account.yaml
   - networkpolicy-controller.yaml
   - networkpolicy-runtimes.yaml
+  - networkpolicy-webhook.yaml
 # Comment the following 4 lines if you want to disable
 # the auth proxy (https://github.com/brancz/kube-rbac-proxy)
 # which protects your /metrics endpoint.
diff --git a/config/rbac/common/networkpolicy-webhook.yaml b/config/rbac/common/networkpolicy-webhook.yaml
new file mode 100644
index 00000000..8c337ac2
--- /dev/null
+++ b/config/rbac/common/networkpolicy-webhook.yaml
@@ -0,0 +1,29 @@
+# Copyright 2021 IBM Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+apiVersion: networking.k8s.io/v1
+kind: NetworkPolicy
+metadata:
+  name: modelmesh-webhook
+spec:
+  podSelector:
+    matchLabels:
+      app.kubernetes.io/managed-by: modelmesh-controller
+      control-plane: modelmesh-controller
+  ingress:
+    # exposed for webhook
+    - ports:
+        - port: 9443
+          protocol: TCP
+  policyTypes:
+    - Ingress
diff --git a/config/rbac/namespace-scope/kustomization.yaml b/config/rbac/namespace-scope/kustomization.yaml
index 3cf2dadc..53af0645 100644
--- a/config/rbac/namespace-scope/kustomization.yaml
+++ b/config/rbac/namespace-scope/kustomization.yaml
@@ -15,3 +15,5 @@ resources:
   - ../common
   - role.yaml
   - role_binding.yaml
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
diff --git a/config/rbac/namespace-scope/role.yaml b/config/rbac/namespace-scope/role.yaml
index 8624c152..238c7b51 100644
--- a/config/rbac/namespace-scope/role.yaml
+++ b/config/rbac/namespace-scope/role.yaml
@@ -165,3 +165,15 @@ rules:
       - get
       - patch
       - update
+  - apiGroups:
+      - autoscaling
+    resources:
+      - horizontalpodautoscalers
+      - horizontalpodautoscalers/status
+    verbs:
+      - get
+      - list
+      - watch
+      - create
+      - delete
+      - update
diff --git a/config/webhook/kustomization.yaml b/config/webhook/kustomization.yaml
new file mode 100644
index 00000000..79e0fef7
--- /dev/null
+++ b/config/webhook/kustomization.yaml
@@ -0,0 +1,20 @@
+# Copyright 2021 IBM Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+---
+resources:
+  - manifests.yaml
+  - service.yaml
+
+configurations:
+  - kustomizeconfig.yaml
diff --git a/config/webhook/kustomizeconfig.yaml b/config/webhook/kustomizeconfig.yaml
new file mode 100644
index 00000000..487da1e6
--- /dev/null
+++ b/config/webhook/kustomizeconfig.yaml
@@ -0,0 +1,31 @@
+# Copyright 2021 IBM Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# the following config is for teaching kustomize where to look at when substituting vars.
+# It requires kustomize v2.1.0 or newer to work properly.
+nameReference:
+  - kind: Service
+    version: v1
+    fieldSpecs:
+      - kind: ValidatingWebhookConfiguration
+        group: admissionregistration.k8s.io
+        path: webhooks/clientConfig/service/name
+
+namespace:
+  - kind: ValidatingWebhookConfiguration
+    group: admissionregistration.k8s.io
+    path: webhooks/clientConfig/service/namespace
+    create: true
+
+varReference:
+  - path: metadata/annotations
diff --git a/config/webhook/manifests.yaml b/config/webhook/manifests.yaml
new file mode 100644
index 00000000..5cd4b89d
--- /dev/null
+++ b/config/webhook/manifests.yaml
@@ -0,0 +1,40 @@
+# Copyright 2021 IBM Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+apiVersion: admissionregistration.k8s.io/v1
+kind: ValidatingWebhookConfiguration
+metadata:
+  name: servingruntime.serving.kserve.io
+webhooks:
+  - admissionReviewVersions:
+      - v1
+    clientConfig:
+      caBundle: Cg==
+      service:
+        name: modelmesh-webhook-server-service
+        path: /validate-serving-modelmesh-io-v1alpha1-servingruntime
+        port: 9443
+    failurePolicy: Fail
+    name: servingruntime.modelmesh-webhook-server.default
+    rules:
+      - apiGroups:
+          - serving.kserve.io
+        apiVersions:
+          - v1alpha1
+        operations:
+          - CREATE
+          - UPDATE
+        resources:
+          - clusterservingruntimes
+          - servingruntimes
+    sideEffects: None
diff --git a/config/webhook/service.yaml b/config/webhook/service.yaml
new file mode 100644
index 00000000..b1f4d3db
--- /dev/null
+++ b/config/webhook/service.yaml
@@ -0,0 +1,25 @@
+# Copyright 2021 IBM Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+apiVersion: v1
+kind: Service
+metadata:
+  name: modelmesh-webhook-server-service
+  namespace: system
+spec:
+  ports:
+    - port: 9443
+      protocol: TCP
+      targetPort: webhook
+  selector:
+    control-plane: modelmesh-controller
diff --git a/controllers/autoscaler/autoscaler_reconciler.go b/controllers/autoscaler/autoscaler_reconciler.go
new file mode 100644
index 00000000..95909b71
--- /dev/null
+++ b/controllers/autoscaler/autoscaler_reconciler.go
@@ -0,0 +1,129 @@
+//Copyright 2021 IBM Corporation
+//
+//Licensed under the Apache License, Version 2.0 (the "License");
+//you may not use this file except in compliance with the License.
+//You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+//Unless required by applicable law or agreed to in writing, software
+//distributed under the License is distributed on an "AS IS" BASIS,
+//WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//See the License for the specific language governing permissions and
+//limitations under the License.
+
+package autoscaler
+
+import (
+	"fmt"
+
+	"github.com/pkg/errors"
+
+	kserveapi "github.com/kserve/kserve/pkg/apis/serving/v1alpha1"
+	"github.com/kserve/kserve/pkg/constants"
+	"github.com/kserve/modelmesh-serving/controllers/hpa"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+
+	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
+)
+
+const (
+	AutoscalerClassNone = "none"
+)
+
+type Autoscaler struct {
+	AutoscalerClass constants.AutoscalerClassType
+	HPA             *hpa.HPAReconciler
+}
+
+// AutoscalerReconciler is the struct of Raw K8S Object
+type AutoscalerReconciler struct {
+	client     client.Client
+	scheme     *runtime.Scheme
+	Autoscaler *Autoscaler
+}
+
+func NewAutoscalerReconciler(client client.Client,
+	scheme *runtime.Scheme,
+	servingRuntime interface{}, mmDeploymentName string, mmNamespace string) (*AutoscalerReconciler, error) {
+
+	as, err := createAutoscaler(client, scheme, servingRuntime, mmDeploymentName, mmNamespace)
+	if err != nil {
+		return nil, err
+	}
+	return &AutoscalerReconciler{
+		client:     client,
+		scheme:     scheme,
+		Autoscaler: as,
+	}, err
+}
+
+func getAutoscalerClass(metadata metav1.ObjectMeta) constants.AutoscalerClassType {
+	annotations := metadata.Annotations
+	if value, ok := annotations[constants.AutoscalerClass]; ok {
+		return constants.AutoscalerClassType(value)
+	} else {
+		return AutoscalerClassNone
+	}
+}
+
+func createAutoscaler(client client.Client,
+	scheme *runtime.Scheme, servingRuntime interface{}, mmDeploymentName string, mmNamespace string) (*Autoscaler, error) {
+	var runtimeMeta metav1.ObjectMeta
+	isSR := false
+
+	sr, ok := servingRuntime.(*kserveapi.ServingRuntime)
+	if ok {
+		runtimeMeta = sr.ObjectMeta
+		isSR = true
+	}
+	csr, ok := servingRuntime.(*kserveapi.ClusterServingRuntime)
+	if ok {
+		runtimeMeta = csr.ObjectMeta
+	}
+
+	as := &Autoscaler{}
+	ac := getAutoscalerClass(runtimeMeta)
+	as.AutoscalerClass = ac
+
+	switch ac {
+	case constants.AutoscalerClassHPA:
+		as.HPA = hpa.NewHPAReconciler(client, scheme, runtimeMeta, mmDeploymentName, mmNamespace)
+		if isSR {
+			if err := controllerutil.SetControllerReference(sr, as.HPA.HPA, scheme); err != nil {
+				return nil, fmt.Errorf("fails to set HPA owner reference for ServingRuntime: %w", err)
+			}
+		} else {
+			if err := controllerutil.SetControllerReference(csr, as.HPA.HPA, scheme); err != nil {
+				return nil, fmt.Errorf("fails to set HPA owner reference for ClusterServingRuntime: %w", err)
+			}
+		}
+	case AutoscalerClassNone:
+		// Set HPA reconciler even though AutoscalerClass is None to delete existing hpa
+		as.HPA = hpa.NewHPAReconciler(client, scheme, runtimeMeta, mmDeploymentName, mmNamespace)
+		return as, nil
+	default:
+		return nil, errors.New("unknown autoscaler class type.")
+	}
+	return as, nil
+}
+
+// Reconcile ...
+func (r *AutoscalerReconciler) Reconcile(scaleToZero bool) (*Autoscaler, error) {
+	//reconcile Autoscaler
+	//In the case of a new autoscaler plugin, it checks AutoscalerClassType
+	if r.Autoscaler.AutoscalerClass == constants.AutoscalerClassHPA || r.Autoscaler.AutoscalerClass == AutoscalerClassNone {
+		_, err := r.Autoscaler.HPA.Reconcile(scaleToZero)
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	if scaleToZero {
+		r.Autoscaler.HPA.HPA = nil
+	}
+
+	return r.Autoscaler, nil
+}
diff --git a/controllers/autoscaler/autoscaler_reconciler_test.go b/controllers/autoscaler/autoscaler_reconciler_test.go
new file mode 100644
index 00000000..8de8c1ba
--- /dev/null
+++ b/controllers/autoscaler/autoscaler_reconciler_test.go
@@ -0,0 +1,72 @@
+//Copyright 2021 IBM Corporation
+//
+//Licensed under the Apache License, Version 2.0 (the "License");
+//you may not use this file except in compliance with the License.
+//You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+//Unless required by applicable law or agreed to in writing, software
+//distributed under the License is distributed on an "AS IS" BASIS,
+//WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//See the License for the specific language governing permissions and
+//limitations under the License.
+
+package autoscaler
+
+import (
+	"testing"
+
+	"github.com/google/go-cmp/cmp"
+
+	"github.com/kserve/kserve/pkg/constants"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+)
+
+func TestGetAutoscalerClass(t *testing.T) {
+	servingRuntimeName := "my-model"
+	namespace := "test"
+
+	testCases := []struct {
+		name                   string
+		servingRuntimeMetaData *metav1.ObjectMeta
+		expectedAutoScalerType constants.AutoscalerClassType
+	}{
+		{
+			name: "Return default AutoScaler, if the autoscalerClass annotation is not set",
+			servingRuntimeMetaData: &metav1.ObjectMeta{
+				Name:        servingRuntimeName,
+				Namespace:   namespace,
+				Annotations: map[string]string{},
+			},
+			expectedAutoScalerType: AutoscalerClassNone,
+		},
+		{
+			name: "Return none AutoScaler, if the autoscalerClass annotation set none",
+			servingRuntimeMetaData: &metav1.ObjectMeta{
+				Name:        servingRuntimeName,
+				Namespace:   namespace,
+				Annotations: map[string]string{"serving.kserve.io/autoscalerClass": "none"},
+			},
+			expectedAutoScalerType: AutoscalerClassNone,
+		},
+		{
+			name: "Return hpa AutoScaler, if the autoscalerClass annotation set hpa",
+			servingRuntimeMetaData: &metav1.ObjectMeta{
+				Name:        servingRuntimeName,
+				Namespace:   namespace,
+				Annotations: map[string]string{"serving.kserve.io/autoscalerClass": "hpa"},
+			},
+			expectedAutoScalerType: constants.AutoscalerClassHPA,
+		},
+	}
+
+	for _, tt := range testCases {
+		t.Run(tt.name, func(t *testing.T) {
+			result := getAutoscalerClass(*tt.servingRuntimeMetaData)
+			if diff := cmp.Diff(tt.expectedAutoScalerType, result); diff != "" {
+				t.Errorf("Test %q unexpected result (-want +got): %v", t.Name(), diff)
+			}
+		})
+	}
+}
diff --git a/controllers/hpa/hpa_reconciler.go b/controllers/hpa/hpa_reconciler.go
new file mode 100644
index 00000000..1158dd8e
--- /dev/null
+++ b/controllers/hpa/hpa_reconciler.go
@@ -0,0 +1,193 @@
+//Copyright 2021 IBM Corporation
+//
+//Licensed under the Apache License, Version 2.0 (the "License");
+//you may not use this file except in compliance with the License.
+//You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+//Unless required by applicable law or agreed to in writing, software
+//distributed under the License is distributed on an "AS IS" BASIS,
+//WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//See the License for the specific language governing permissions and
+//limitations under the License.
+
+package hpa
+
+import (
+	"context"
+	"strconv"
+
+	"github.com/kserve/kserve/pkg/apis/serving/v1beta1"
+	"github.com/kserve/kserve/pkg/constants"
+	"github.com/kserve/kserve/pkg/utils"
+	mmcontstant "github.com/kserve/modelmesh-serving/pkg/constants"
+	v2beta2 "k8s.io/api/autoscaling/v2beta2"
+	corev1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/equality"
+	apierr "k8s.io/apimachinery/pkg/api/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/types"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	logf "sigs.k8s.io/controller-runtime/pkg/log"
+)
+
+var log = logf.Log.WithName("HPAReconciler")
+
+// HPAReconciler is the struct of Raw K8S Object
+type HPAReconciler struct {
+	client client.Client
+	scheme *runtime.Scheme
+	HPA    *v2beta2.HorizontalPodAutoscaler
+}
+
+func NewHPAReconciler(client client.Client,
+	scheme *runtime.Scheme, runtimeMeta metav1.ObjectMeta, mmDeploymentName string, mmNamespace string) *HPAReconciler {
+	return &HPAReconciler{
+		client: client,
+		scheme: scheme,
+		HPA:    createHPA(runtimeMeta, mmDeploymentName, mmNamespace),
+	}
+}
+
+func getHPAMetrics(metadata metav1.ObjectMeta) []v2beta2.MetricSpec {
+	var metrics []v2beta2.MetricSpec
+	var utilization int32 = constants.DefaultCPUUtilization
+
+	annotations := metadata.Annotations
+	resourceName := corev1.ResourceCPU
+
+	if value, ok := annotations[constants.TargetUtilizationPercentage]; ok {
+		utilizationInt, _ := strconv.Atoi(value)
+		utilization = int32(utilizationInt)
+	}
+
+	if value, ok := annotations[constants.AutoscalerMetrics]; ok {
+		resourceName = corev1.ResourceName(value)
+	}
+
+	metricTarget := v2beta2.MetricTarget{
+		Type:               "Utilization",
+		AverageUtilization: &utilization,
+	}
+
+	ms := v2beta2.MetricSpec{
+		Type: v2beta2.ResourceMetricSourceType,
+		Resource: &v2beta2.ResourceMetricSource{
+			Name:   resourceName,
+			Target: metricTarget,
+		},
+	}
+
+	metrics = append(metrics, ms)
+	return metrics
+}
+
+func createHPA(runtimeMeta metav1.ObjectMeta, mmDeploymentName string, mmNamespace string) *v2beta2.HorizontalPodAutoscaler {
+	minReplicas := int32(constants.DefaultMinReplicas)
+	maxReplicas := int32(constants.DefaultMinReplicas)
+	annotations := runtimeMeta.Annotations
+
+	if value, ok := annotations[mmcontstant.MinScaleAnnotationKey]; ok {
+		minReplicasInt, _ := strconv.Atoi(value)
+		minReplicas = int32(minReplicasInt)
+
+	}
+	if value, ok := annotations[mmcontstant.MaxScaleAnnotationKey]; ok {
+		maxReplicasInt, _ := strconv.Atoi(value)
+		maxReplicas = int32(maxReplicasInt)
+	}
+
+	if maxReplicas < minReplicas {
+		maxReplicas = minReplicas
+	}
+
+	metrics := getHPAMetrics(runtimeMeta)
+
+	hpaObjectMeta := metav1.ObjectMeta{
+		Name:      mmDeploymentName,
+		Namespace: mmNamespace,
+		Labels: utils.Union(runtimeMeta.Labels, map[string]string{
+			constants.InferenceServicePodLabelKey: runtimeMeta.Name,
+			constants.KServiceComponentLabel:      string(v1beta1.PredictorComponent),
+		}),
+		Annotations: runtimeMeta.Annotations,
+	}
+
+	hpa := &v2beta2.HorizontalPodAutoscaler{
+		ObjectMeta: hpaObjectMeta,
+		Spec: v2beta2.HorizontalPodAutoscalerSpec{
+			ScaleTargetRef: v2beta2.CrossVersionObjectReference{
+				APIVersion: "apps/v1",
+				Kind:       "Deployment",
+				Name:       hpaObjectMeta.Name,
+			},
+			MinReplicas: &minReplicas,
+			MaxReplicas: maxReplicas,
+
+			Metrics:  metrics,
+			Behavior: &v2beta2.HorizontalPodAutoscalerBehavior{},
+		},
+	}
+	return hpa
+}
+
+// checkHPAExist checks if the hpa exists?
+func (r *HPAReconciler) checkHPAExist(client client.Client) (constants.CheckResultType, *v2beta2.HorizontalPodAutoscaler, error) {
+	existingHPA := &v2beta2.HorizontalPodAutoscaler{}
+	err := client.Get(context.TODO(), types.NamespacedName{
+		Namespace: r.HPA.ObjectMeta.Namespace,
+		Name:      r.HPA.ObjectMeta.Name,
+	}, existingHPA)
+	if err != nil {
+		if apierr.IsNotFound(err) {
+			return constants.CheckResultCreate, nil, nil
+		}
+		return constants.CheckResultUnknown, nil, err
+	}
+
+	//existed, check equivalent
+	if semanticHPAEquals(r.HPA, existingHPA) {
+		return constants.CheckResultExisted, existingHPA, nil
+	}
+	return constants.CheckResultUpdate, existingHPA, nil
+}
+
+func semanticHPAEquals(desired, existing *v2beta2.HorizontalPodAutoscaler) bool {
+	return equality.Semantic.DeepEqual(desired.Spec.Metrics, existing.Spec.Metrics) &&
+		equality.Semantic.DeepEqual(desired.Spec.MaxReplicas, existing.Spec.MaxReplicas) &&
+		equality.Semantic.DeepEqual(*desired.Spec.MinReplicas, *existing.Spec.MinReplicas)
+}
+
+// Reconcile ...
+func (r *HPAReconciler) Reconcile(scaleToZero bool) (*v2beta2.HorizontalPodAutoscaler, error) {
+	//reconcile
+	checkResult, existingHPA, err := r.checkHPAExist(r.client)
+	log.Info("service reconcile", "checkResult", checkResult, "scaleToZero", scaleToZero, "err", err)
+	if err != nil {
+		return nil, err
+	}
+
+	if checkResult == constants.CheckResultCreate && !scaleToZero {
+		if err = r.client.Create(context.TODO(), r.HPA); err != nil {
+			return nil, err
+		}
+		return r.HPA, nil
+
+	} else if checkResult == constants.CheckResultUpdate { //CheckResultUpdate
+		if err = r.client.Update(context.TODO(), r.HPA); err != nil {
+			return nil, err
+		}
+		return r.HPA, nil
+
+	} else if checkResult == constants.CheckResultExisted && scaleToZero {
+		// when scaleToZero is true, delete HPA if it exist
+		if err = r.client.Delete(context.TODO(), existingHPA, &client.DeleteOptions{}); err != nil {
+			return nil, err
+		}
+		return nil, nil
+	} else {
+		return existingHPA, nil
+	}
+}
diff --git a/controllers/hpa/hpa_reconciler_test.go b/controllers/hpa/hpa_reconciler_test.go
new file mode 100644
index 00000000..bdb857e9
--- /dev/null
+++ b/controllers/hpa/hpa_reconciler_test.go
@@ -0,0 +1,157 @@
+//Copyright 2021 IBM Corporation
+//
+//Licensed under the Apache License, Version 2.0 (the "License");
+//you may not use this file except in compliance with the License.
+//You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+//Unless required by applicable law or agreed to in writing, software
+//distributed under the License is distributed on an "AS IS" BASIS,
+//WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//See the License for the specific language governing permissions and
+//limitations under the License.
+
+package hpa
+
+import (
+	"fmt"
+	"testing"
+
+	"github.com/google/go-cmp/cmp"
+	"github.com/kserve/kserve/pkg/constants"
+	mmcontstant "github.com/kserve/modelmesh-serving/pkg/constants"
+	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+)
+
+func TestGetHPAMetrics(t *testing.T) {
+	servingRuntimeName := "my-model"
+	namespace := "test"
+
+	testCases := []struct {
+		name                                string
+		servingRuntimeMetaData              *metav1.ObjectMeta
+		expectedTargetUtilizationPercentage int32
+		expectedAutoscalerMetrics           corev1.ResourceName
+	}{
+		{
+			name: "Check default HPAMetrics",
+			servingRuntimeMetaData: &metav1.ObjectMeta{
+				Name:        servingRuntimeName,
+				Namespace:   namespace,
+				Annotations: map[string]string{},
+			},
+			expectedTargetUtilizationPercentage: int32(80),
+			expectedAutoscalerMetrics:           corev1.ResourceName("cpu"),
+		},
+		{
+			name: "Check HPAMetrics if annotations has " + constants.AutoscalerMetrics,
+			servingRuntimeMetaData: &metav1.ObjectMeta{
+				Name:        servingRuntimeName,
+				Namespace:   namespace,
+				Annotations: map[string]string{constants.AutoscalerMetrics: "memory"},
+			},
+			expectedTargetUtilizationPercentage: int32(80),
+			expectedAutoscalerMetrics:           corev1.ResourceName("memory"),
+		},
+		{
+			name: "Check HPAMetrics if annotations has " + constants.TargetUtilizationPercentage,
+			servingRuntimeMetaData: &metav1.ObjectMeta{
+				Name:        servingRuntimeName,
+				Namespace:   namespace,
+				Annotations: map[string]string{constants.TargetUtilizationPercentage: "50"},
+			},
+			expectedTargetUtilizationPercentage: int32(50),
+			expectedAutoscalerMetrics:           corev1.ResourceName("cpu"),
+		},
+	}
+
+	for _, tt := range testCases {
+		t.Run(tt.name, func(t *testing.T) {
+			result := getHPAMetrics(*tt.servingRuntimeMetaData)
+			if diff := cmp.Diff(tt.expectedTargetUtilizationPercentage, *result[0].Resource.Target.AverageUtilization); diff != "" {
+				t.Errorf("Test %q unexpected result (-want +got): %v", t.Name(), diff)
+			}
+			if diff := cmp.Diff(tt.expectedAutoscalerMetrics, result[0].Resource.Name); diff != "" {
+				t.Errorf("Test %q unexpected result (-want +got): %v", t.Name(), diff)
+			}
+		})
+	}
+}
+
+func TestCreateHPA(t *testing.T) {
+	servingRuntimeName := "my-model"
+	namespace := "test"
+	deploymentName := fmt.Sprintf("%s-%s", servingRuntimeName, namespace)
+
+	testCases := []struct {
+		name                   string
+		servingRuntimeMetaData *metav1.ObjectMeta
+		mmDeploymentName       *string
+		mmNamespace            *string
+		expectedMinReplicas    int32
+		expectedMaxReplicas    int32
+	}{
+		{
+			name: "Check default HPA replicas",
+			servingRuntimeMetaData: &metav1.ObjectMeta{
+				Name:        servingRuntimeName,
+				Namespace:   namespace,
+				Annotations: map[string]string{},
+			},
+			mmDeploymentName:    &deploymentName,
+			mmNamespace:         &namespace,
+			expectedMinReplicas: int32(1),
+			expectedMaxReplicas: int32(1),
+		},
+		{
+			name: "Check HPA replicas if annotations has " + mmcontstant.MaxScaleAnnotationKey,
+			servingRuntimeMetaData: &metav1.ObjectMeta{
+				Name:        servingRuntimeName,
+				Namespace:   namespace,
+				Annotations: map[string]string{mmcontstant.MaxScaleAnnotationKey: "2"},
+			},
+			mmDeploymentName:    &deploymentName,
+			mmNamespace:         &namespace,
+			expectedMinReplicas: int32(1),
+			expectedMaxReplicas: int32(2),
+		},
+		{
+			name: "Check HPA replicas if annotations has " + mmcontstant.MinScaleAnnotationKey + ". max replicas should be the same as min replicas",
+			servingRuntimeMetaData: &metav1.ObjectMeta{
+				Name:        servingRuntimeName,
+				Namespace:   namespace,
+				Annotations: map[string]string{mmcontstant.MinScaleAnnotationKey: "2"},
+			},
+			mmDeploymentName:    &deploymentName,
+			mmNamespace:         &namespace,
+			expectedMinReplicas: int32(2),
+			expectedMaxReplicas: int32(2),
+		},
+		{
+			name: "Check HPA replicas if annotations set min/max replicas both",
+			servingRuntimeMetaData: &metav1.ObjectMeta{
+				Name:        servingRuntimeName,
+				Namespace:   namespace,
+				Annotations: map[string]string{mmcontstant.MinScaleAnnotationKey: "2", mmcontstant.MaxScaleAnnotationKey: "3"},
+			},
+			mmDeploymentName:    &deploymentName,
+			mmNamespace:         &namespace,
+			expectedMinReplicas: int32(2),
+			expectedMaxReplicas: int32(3),
+		},
+	}
+
+	for _, tt := range testCases {
+		t.Run(tt.name, func(t *testing.T) {
+			hpa := createHPA(*tt.servingRuntimeMetaData, *tt.mmDeploymentName, *tt.mmNamespace)
+			if diff := cmp.Diff(tt.expectedMinReplicas, *hpa.Spec.MinReplicas); diff != "" {
+				t.Errorf("Test %q unexpected result (-want +got): %v", t.Name(), diff)
+			}
+			if diff := cmp.Diff(tt.expectedMaxReplicas, hpa.Spec.MaxReplicas); diff != "" {
+				t.Errorf("Test %q unexpected result (-want +got): %v", t.Name(), diff)
+			}
+		})
+	}
+}
diff --git a/controllers/servingruntime_controller.go b/controllers/servingruntime_controller.go
index 7fdefac8..adbfd46a 100644
--- a/controllers/servingruntime_controller.go
+++ b/controllers/servingruntime_controller.go
@@ -23,31 +23,30 @@ import (
 	"sync"
 	"time"
 
+	"github.com/go-logr/logr"
+	mf "github.com/manifestival/manifestival"
+
+	kserveapi "github.com/kserve/kserve/pkg/apis/serving/v1alpha1"
 	"github.com/kserve/kserve/pkg/apis/serving/v1beta1"
+	api "github.com/kserve/modelmesh-serving/apis/serving/v1alpha1"
+	"github.com/kserve/modelmesh-serving/controllers/autoscaler"
+	"github.com/kserve/modelmesh-serving/controllers/modelmesh"
 	"github.com/kserve/modelmesh-serving/pkg/config"
-
 	"github.com/kserve/modelmesh-serving/pkg/mmesh"
 	"github.com/kserve/modelmesh-serving/pkg/predictor_source"
 
+	ctrl "sigs.k8s.io/controller-runtime"
+	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/event"
-
 	"sigs.k8s.io/controller-runtime/pkg/handler"
 	"sigs.k8s.io/controller-runtime/pkg/reconcile"
+	"sigs.k8s.io/controller-runtime/pkg/source"
 
-	"github.com/go-logr/logr"
-	mf "github.com/manifestival/manifestival"
 	appsv1 "k8s.io/api/apps/v1"
 	corev1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/errors"
 	"k8s.io/apimachinery/pkg/runtime"
 	"k8s.io/apimachinery/pkg/types"
-	ctrl "sigs.k8s.io/controller-runtime"
-	"sigs.k8s.io/controller-runtime/pkg/client"
-	"sigs.k8s.io/controller-runtime/pkg/source"
-
-	kserveapi "github.com/kserve/kserve/pkg/apis/serving/v1alpha1"
-	api "github.com/kserve/modelmesh-serving/apis/serving/v1alpha1"
-	"github.com/kserve/modelmesh-serving/controllers/modelmesh"
 )
 
 const (
@@ -184,6 +183,7 @@ func (r *ServingRuntimeReconciler) Reconcile(ctx context.Context, req ctrl.Reque
 
 	// Reconcile this serving runtime
 	rt := &kserveapi.ServingRuntime{}
+	crt := &kserveapi.ClusterServingRuntime{}
 	var owner mf.Owner
 	var spec *kserveapi.ServingRuntimeSpec
 
@@ -197,7 +197,6 @@ func (r *ServingRuntimeReconciler) Reconcile(ctx context.Context, req ctrl.Reque
 			return r.removeRuntimeFromInfoMap(req)
 		}
 		// try to find the runtime in cluster ServingRuntimes
-		crt := &kserveapi.ClusterServingRuntime{}
 		if err = r.Client.Get(ctx, types.NamespacedName{Name: req.Name}, crt); err == nil {
 			spec = &crt.Spec
 			owner = crt
@@ -270,11 +269,58 @@ func (r *ServingRuntimeReconciler) Reconcile(ctx context.Context, req ctrl.Reque
 		return ctrl.Result{}, nil
 	}
 
+	// At the moment, ModelMesh deployment name is the combined of ServingRuntime and deploymentObject name.
+	// TO-DO: refactor the mmDeploymentName to use mmDeployment object name.
+	mmDeploymentName := fmt.Sprintf("%s-%s", mmDeployment.ServiceName, mmDeployment.Name)
+
+	var as *autoscaler.AutoscalerReconciler
+	if crt.GetName() != "" {
+		as, err = autoscaler.NewAutoscalerReconciler(r.Client, r.Client.Scheme(), crt, mmDeploymentName, mmDeployment.Namespace)
+	} else {
+		as, err = autoscaler.NewAutoscalerReconciler(r.Client, r.Client.Scheme(), rt, mmDeploymentName, mmDeployment.Namespace)
+	}
+
+	if err != nil {
+		log.Error(err, "fails to create an autoscaler controller: %w", "skip to create HPA")
+	}
+
 	replicas, requeueDuration, err := r.determineReplicasAndRequeueDuration(ctx, log, cfg, spec, req.NamespacedName)
 	if err != nil {
 		return RequeueResult, fmt.Errorf("could not determine replicas: %w", err)
 	}
-	mmDeployment.Replicas = replicas
+
+	//ScaleToZero or None autoscaler case
+	if replicas == uint16(0) || as.Autoscaler.AutoscalerClass == autoscaler.AutoscalerClassNone {
+		mmDeployment.Replicas = replicas
+		if _, err = as.Reconcile(true); err != nil {
+			return ctrl.Result{}, fmt.Errorf("HPA reconcile error: %w", err)
+		}
+	} else {
+		//Autoscaler case
+		if as.Autoscaler != nil {
+
+			// To-Do Skip changing replicas when the replicas of the runtime deployment is bigger than 0
+			// Workaround - if deployment replica is 0, set HPA minReplicas. Else, it sets the same replicas of the deployment
+			existingDeployment := &appsv1.Deployment{}
+			if err = r.Client.Get(ctx, types.NamespacedName{
+				Name:      mmDeploymentName,
+				Namespace: req.Namespace,
+			}, existingDeployment); err != nil {
+				return ctrl.Result{}, fmt.Errorf("Could not get the deployment for the servingruntime : %w", err)
+			}
+			if *existingDeployment.Spec.Replicas == int32(0) {
+				mmDeployment.Replicas = uint16(*(as.Autoscaler.HPA.HPA).Spec.MinReplicas)
+			} else {
+				mmDeployment.Replicas = uint16(*(existingDeployment.Spec.Replicas))
+			}
+		}
+
+		//Create or Update HPA
+		if _, err = as.Reconcile(false); err != nil {
+			return ctrl.Result{}, fmt.Errorf("HPA reconcile error: %w", err)
+		}
+	}
+
 	if err = mmDeployment.Apply(ctx); err != nil {
 		if errors.IsConflict(err) {
 			// this can occur during normal operations if the deployment was updated
diff --git a/docs/developer.md b/docs/developer.md
index 524c97f1..8f4623b2 100644
--- a/docs/developer.md
+++ b/docs/developer.md
@@ -6,6 +6,8 @@ This document outlines some of the development practices with ModelMesh Serving.
 
 Local Kubernetes clusters can easily be set up using tools like [kind](https://kind.sigs.k8s.io/) and [minikube](https://minikube.sigs.k8s.io/docs/).
 
+_(Note)_ Regarding webhook certificates, there are 2 options: `cert-manager` and `self-signed-certificate`. Please refer to [install help](install/install-script.md#installation-1).
+
 For example, using `kind`:
 
 ```shell
@@ -58,6 +60,16 @@ you will need to restart the controller pod. This can be done through the follow
 kubectl rollout restart deploy modelmesh-controller
 ```
 
+## Deploying a custom controller image
+
+If you have a custom controller image in your repository, set `MODELMESH_SERVING_IMAGE` to deploy it. The following example deploys the custom controller image `quay.io/$org/modelmesh-controller:custom` in the `modelmesh-serving` namespace with `fvt` dependencies:
+
+```shell
+NAMESPACE=modelmesh-serving \
+MODELMESH_SERVING_IMAGE=quay.io/$org/modelmesh-controller:custom \
+make deploy-release-dev-mode-fvt
+```
+
 ## Building the developer image
 
 A dockerized development environment is provided to help set up dependencies for testing, linting, and code generating.
diff --git a/docs/install/install-script.md b/docs/install/install-script.md
index fc33249d..1b525c3b 100644
--- a/docs/install/install-script.md
+++ b/docs/install/install-script.md
@@ -58,7 +58,7 @@ The `--quickstart` option can be specified to install and configure supporting d
 
 ```shell
 kubectl create namespace modelmesh-serving
-./scripts/install.sh --namespace modelmesh-serving --quickstart
+./scripts/install.sh --namespace modelmesh-serving --quickstart --enable-self-signed-ca
 ```
 
 See the installation help below for detail:
@@ -73,9 +73,11 @@ Flags:
   -d, --delete                   Delete any existing instances of ModelMesh Serving in Kube namespace before running install, including CRDs, RBACs, controller, older CRD with serving.kserve.io api group name, etc.
   -u, --user-namespaces          Kubernetes namespaces to enable for ModelMesh Serving
   --quickstart                   Install and configure required supporting datastores in the same namespace (etcd and MinIO) - for experimentation/development
-  --fvt                          Install and configure required supporting datastores in the same namespace (etcd and MinIO) - for development with fvt enabled
+  --fvt                          Install and configure required supporting datastores in the same namespace (etcd and MinIO) and set `enable-self-signed-ca` - for development with fvt enabled
   -dev, --dev-mode-logging       Enable dev mode logging (stacktraces on warning and no sampling)
   --namespace-scope-mode         Run ModelMesh Serving in namespace scope mode
+  --modelmesh-serving-image      Set a custom ModelMesh serving image
+  --enable-self-signed-ca        Enable self-signed-ca, if the cluster doesn't have `cert-manager` installed
 
 Installs ModelMesh Serving CRDs, controller, and built-in runtimes into specified
 Kubernetes namespaces.
@@ -92,6 +94,24 @@ The installation will create a secret named `storage-config` if it does not alre
 
 The `--namespace-scope-mode` will deploy `ServingRuntime`s confined to the same namespace, instead of the default cluster-scoped runtimes `ClusterServingRuntime`s. These serving runtimes are accessible to any user/namespace in the cluster.
 
+You can optionally provide a custom ModelMesh Serving image with `--modelmesh-serving-image`. If not specified, it will pull the latest image.
+
+The ModelMesh controller uses a webhook that requires a certificate. We suggest using [cert-manager](https://github.com/cert-manager/cert-manager) to provision the certificates for the webhook server. Other solutions should also work as long as they put the certificates in the desired location. You can follow [the cert-manager documentation](https://cert-manager.io/docs/installation/) to install it. If you don't want to install `cert-manager`, use the `--enable-self-signed-ca` flag. It will execute a script to create a self-signed CA and patch it to the webhook config.
+
+- [cert-manager latest version](https://github.com/cert-manager/cert-manager/releases/latest)
+
+  ```shell
+  CERT_MANAGER_VERSION="v1.11.0"    # Use the latest version
+
+  echo "Installing cert manager ..."
+  kubectl create namespace cert-manager
+  sleep 2
+  kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/${CERT_MANAGER_VERSION}/cert-manager.yaml
+
+  echo "Waiting for cert manager started ..."
+  kubectl wait --for=condition=ready pod -l 'app in (cert-manager,webhook)' --timeout=180s -n cert-manager
+  ```
+
 ## Setup additional namespaces
 
 To enable additional namespaces for ModelMesh after the initial installation, you need to add a label named `modelmesh-enabled`, and optionally setup the storage secret `storage-config` and built-in runtimes, in the user namespaces.
diff --git a/docs/production-use/scaling.md b/docs/production-use/scaling.md
index 1b2c8c9a..46c55ce7 100644
--- a/docs/production-use/scaling.md
+++ b/docs/production-use/scaling.md
@@ -14,3 +14,35 @@ Increasing the number of runtime replicas has two important effects:
 If a given `ServingRuntime` has no `InferenceService`s that it supports, the `Deployment` for that runtime can safely be scaled to 0 replicas to save on resources. By enabling `ScaleToZero` in the configuration, ModelMesh Serving will perform this scaling automatically. If an `InferenceService` is later added that requires the runtime, it will be scaled back up.
 
 To prevent unnecessary churn, the `ScaleToZero` behavior has a grace period that delays scaling down after the last `InferenceService` required by the runtime is deleted. If a new `InferenceService` is created in that window there will be no change to the scale.
+
+### Autoscaler
+
+In addition to the `ScaleToZero` to Zero feature, runtime pods can be autoscaled through HPA. This feature is disabled by default, but it can be enabled at any time by annotating each ServingRuntime/ClusterServingRuntime.
+To enable the Autoscaler feature, add the following annotation.
+
+```shell
+apiVersion: serving.kserve.io/v1alpha1
+kind: ServingRuntime
+metadata:
+  annotations:
+    serving.kserve.io/autoscalerClass: hpa
+```
+
+Additional annotations:
+
+```shell
+metadata:
+  annotations:
+    serving.kserve.io/autoscalerClass: hpa
+    serving.kserve.io/targetUtilizationPercentage: "75"
+    serving.kserve.io/metrics: "cpu"
+    serving.kserve.io/min-scale: "2"
+    serving.kserve.io/max-scale: "3"
+```
+
+You can disable the Autoscaler feature even if a runtime pod created based on that ServingRuntime is running.
+
+**NOTE**
+
+- If `serving.kserve.io/autoscalerClass: hpa` is not set, the other annotations will be ignored.
+- If `ScaleToZero` is enabled and there are no `InferenceService`s, HPA will be deleted and the ServingRuntime deployment will be scaled down to 0.
diff --git a/docs/quickstart.md b/docs/quickstart.md
index 7eb52436..e0561cf8 100644
--- a/docs/quickstart.md
+++ b/docs/quickstart.md
@@ -8,6 +8,8 @@ To quickly get started using ModelMesh Serving, here is a brief guide.
 - [kubectl](https://kubernetes.io/docs/tasks/tools/#kubectl) and [kustomize](https://kubectl.docs.kubernetes.io/installation/kustomize/) (v3.2.0+)
 - At least 4 vCPU and 8 GB memory. For more details, please see [here](install/README.md#deployed-components).
 
+_(Note)_ Regarding webhook certificates, there are 2 options: `cert-manager` and `self-signed-certificate`. Please refer to [install help](install/install-script.md#installation-1).
+
 ## 1. Install ModelMesh Serving
 
 ### Get the latest release
@@ -22,7 +24,7 @@ cd modelmesh-serving
 
 ```shell
 kubectl create namespace modelmesh-serving
-./scripts/install.sh --namespace-scope-mode --namespace modelmesh-serving --quickstart
+./scripts/install.sh --namespace-scope-mode --namespace modelmesh-serving --quickstart --enable-self-signed-ca
 ```
 
 This will install ModelMesh Serving in the `modelmesh-serving` namespace, along with an etcd and MinIO instances.
diff --git a/docs/release-process.md b/docs/release-process.md
index 97af46c0..4822f260 100644
--- a/docs/release-process.md
+++ b/docs/release-process.md
@@ -141,7 +141,6 @@ with KServe.
 1. Generate the release manifests on the `release-*` branch:
 
    ```Shell
-   kustomize build config/default > modelmesh.yaml
    kustomize build config/runtimes --load-restrictor LoadRestrictionsNone > modelmesh-runtimes.yaml
    cp config/dependencies/quickstart.yaml modelmesh-quickstart-dependencies.yaml
    ```
diff --git a/fvt/README.md b/fvt/README.md
index ef557243..d6a9fe48 100644
--- a/fvt/README.md
+++ b/fvt/README.md
@@ -4,7 +4,7 @@ Functional Verification Test (FVT) suite for ModelMesh Serving using [Ginkgo](ht
 
 ## How the tests are structured
 
-- The entry points for FVT suite are located in `predictor/predictor_suite_test.go` and `scaleToZero/scaleToZero_suite_test.go`.
+- The entry points for FVT suite are located in `predictor/predictor_suite_test.go`, `scaleToZero/scaleToZero_suite_test.go` and `hpa/hpa_suite_test.go`.
 - Framework, utility, and helper functions for all suites are in the `fvt` package in this directory.
 - Manifests used to create predictors, inference services, and runtimes are in the `testdata` folder.
 
diff --git a/fvt/fvtclient.go b/fvt/fvtclient.go
index 97d50154..329072b1 100644
--- a/fvt/fvtclient.go
+++ b/fvt/fvtclient.go
@@ -39,6 +39,7 @@ import (
 	"google.golang.org/grpc/credentials"
 
 	appsv1 "k8s.io/api/apps/v1"
+	hpav2beta2 "k8s.io/api/autoscaling/v2beta2"
 	corev1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
@@ -247,6 +248,11 @@ var (
 		Version:  "v1",
 		Resource: "pods", // this must be the plural form
 	}
+	gvrHPA = schema.GroupVersionResource{
+		Group:    "autoscaling",
+		Version:  "v2beta2",
+		Resource: "horizontalpodautoscalers", // this must be the plural form
+	}
 )
 
 func (fvt *FVTClient) CreatePredictorExpectSuccess(resource *unstructured.Unstructured) *unstructured.Unstructured {
@@ -284,6 +290,57 @@ func (fvt *FVTClient) ApplyPredictorExpectSuccess(predictor *unstructured.Unstru
 	return obj
 }
 
+func (fvt *FVTClient) ApplyServingRuntimeExpectSuccess(servingRuntime *unstructured.Unstructured) *unstructured.Unstructured {
+	// use server-side-apply with Patch
+	servingRuntime.SetManagedFields(nil)
+	patch, err := yaml.Marshal(servingRuntime)
+	Expect(err).ToNot(HaveOccurred())
+
+	obj, err := fvt.Resource(gvrRuntime).Namespace(fvt.namespace).Patch(context.TODO(), servingRuntime.GetName(), types.ApplyPatchType, patch, applyPatchOptions)
+	Expect(err).ToNot(HaveOccurred())
+	Expect(obj).ToNot(BeNil())
+	Expect(obj.GetKind()).To(Equal(ServingRuntimeKind))
+	return obj
+}
+
+func (fvt *FVTClient) ApplyClusterServingRuntimeExpectSuccess(clusterServingRuntime *unstructured.Unstructured) *unstructured.Unstructured {
+	// use server-side-apply with Patch
+	clusterServingRuntime.SetManagedFields(nil)
+	patch, err := yaml.Marshal(clusterServingRuntime)
+	Expect(err).ToNot(HaveOccurred())
+
+	obj, err := fvt.Resource(gvrCRuntime).Patch(context.TODO(), clusterServingRuntime.GetName(), types.ApplyPatchType, patch, applyPatchOptions)
+	Expect(err).ToNot(HaveOccurred())
+	Expect(obj).ToNot(BeNil())
+	Expect(obj.GetKind()).To(Equal(ClusterServingRuntimeKind))
+	return obj
+}
+
+func (fvt *FVTClient) SetServingRuntimeAnnotation(expectedRuntimeName string, annotations map[string]interface{}) {
+	fvt.log.Info("Set annotations for a runtime: "+expectedRuntimeName, "annotations", annotations)
+
+	var srObject *unstructured.Unstructured
+	if NameSpaceScopeMode {
+		srObject = FVTClientInstance.GetServingRuntime(expectedRuntimeName)
+	} else {
+		srObject = FVTClientInstance.GetClusterServingRuntime(expectedRuntimeName)
+	}
+
+	SetMap(srObject, annotations, "metadata", "annotations")
+
+	if NameSpaceScopeMode {
+		FVTClientInstance.ApplyServingRuntimeExpectSuccess(srObject)
+	} else {
+		FVTClientInstance.ApplyClusterServingRuntimeExpectSuccess(srObject)
+	}
+}
+
+func (fvt *FVTClient) GetClusterServingRuntime(name string) *unstructured.Unstructured {
+	obj, err := fvt.Resource(gvrCRuntime).Get(context.TODO(), name, metav1.GetOptions{})
+	Expect(err).ToNot(HaveOccurred())
+	return obj
+}
+
 func (fvt *FVTClient) GetServingRuntime(name string) *unstructured.Unstructured {
 	obj, err := fvt.Resource(gvrRuntime).Namespace(fvt.namespace).Get(context.TODO(), name, metav1.GetOptions{})
 	Expect(err).ToNot(HaveOccurred())
@@ -382,6 +439,13 @@ func (fvt *FVTClient) PrintPredictors() {
 	}
 }
 
+func (fvt *FVTClient) PrintHPAs() {
+	err := fvt.RunKubectl("get", "hpa")
+	if err != nil {
+		fvt.log.Error(err, "Error running get hpa command")
+	}
+}
+
 func (fvt *FVTClient) PrintIsvcs() {
 	err := fvt.RunKubectl("get", "inferenceservices")
 	if err != nil {
@@ -753,6 +817,24 @@ func (fvt *FVTClient) StartWatchingDeploys() watch.Interface {
 	return deployWatcher
 }
 
+func (fvt *FVTClient) ListHPAs() hpav2beta2.HorizontalPodAutoscalerList {
+	var err error
+
+	listOptions := metav1.ListOptions{LabelSelector: "app.kubernetes.io/managed-by=modelmesh-controller", TimeoutSeconds: &DefaultTimeout}
+	u, err := fvt.Resource(gvrHPA).Namespace(fvt.namespace).List(context.TODO(), listOptions)
+	Expect(err).ToNot(HaveOccurred())
+
+	var hpaList hpav2beta2.HorizontalPodAutoscalerList
+	for _, uh := range u.Items {
+		var h hpav2beta2.HorizontalPodAutoscaler
+		err = runtime.DefaultUnstructuredConverter.FromUnstructured(uh.Object, &h)
+		Expect(err).ToNot(HaveOccurred())
+		hpaList.Items = append(hpaList.Items, h)
+	}
+
+	return hpaList
+}
+
 func (fvt *FVTClient) ListDeploys() appsv1.DeploymentList {
 	var err error
 
diff --git a/fvt/globals.go b/fvt/globals.go
index e181993e..b4779b34 100644
--- a/fvt/globals.go
+++ b/fvt/globals.go
@@ -81,6 +81,7 @@ var MutualTLSConfig = map[string]interface{}{
 
 const (
 	ServingRuntimeKind         = "ServingRuntime"
+	ClusterServingRuntimeKind  = "ClusterServingRuntime"
 	PredictorKind              = "Predictor"
 	IsvcKind                   = "InferenceService"
 	ConfigMapKind              = "ConfigMap"
diff --git a/fvt/helpers.go b/fvt/helpers.go
index 84b4e6de..d978d610 100644
--- a/fvt/helpers.go
+++ b/fvt/helpers.go
@@ -437,6 +437,7 @@ func WaitForRuntimeDeploymentsToBeStable(timeToStabilize time.Duration, watcher
 	}
 
 	Expect(allReady).To(BeTrue(), fmt.Sprintf("Timed out before deployments were ready: %v", deploymentReady))
+
 }
 
 func logPredictorStatus(obj *unstructured.Unstructured) []interface{} {
diff --git a/fvt/hpa/hpa_suite_test.go b/fvt/hpa/hpa_suite_test.go
new file mode 100644
index 00000000..6f5aebd2
--- /dev/null
+++ b/fvt/hpa/hpa_suite_test.go
@@ -0,0 +1,118 @@
+// Copyright 2022 IBM Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+package hpa
+
+import (
+	"os"
+	"testing"
+	"time"
+
+	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
+
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"sigs.k8s.io/controller-runtime/pkg/log/zap"
+
+	. "github.com/kserve/modelmesh-serving/fvt"
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+func TestHPASuite(t *testing.T) {
+	RegisterFailHandler(Fail)
+	RunSpecs(t, "HPA autoscaler suite")
+}
+
+var _ = SynchronizedBeforeSuite(func() []byte {
+	// runs *only* on process #1
+	return nil
+}, func(_ []byte) {
+	// runs on *all* processes
+	Log = zap.New(zap.UseDevMode(true), zap.WriteTo(GinkgoWriter))
+	Log.Info("Initializing test suite")
+
+	namespace := os.Getenv("NAMESPACE")
+	if namespace == "" {
+		namespace = DefaultTestNamespace
+	}
+	serviceName := os.Getenv("SERVICENAME")
+	if serviceName == "" {
+		serviceName = DefaultTestServiceName
+	}
+	controllerNamespace := os.Getenv("CONTROLLERNAMESPACE")
+	if controllerNamespace == "" {
+		controllerNamespace = DefaultControllerNamespace
+	}
+	NameSpaceScopeMode = os.Getenv("NAMESPACESCOPEMODE") == "true"
+	Log.Info("Using environment variables", "NAMESPACE", namespace, "SERVICENAME", serviceName,
+		"CONTROLLERNAMESPACE", controllerNamespace, "NAMESPACESCOPEMODE", NameSpaceScopeMode)
+
+	var err error
+	FVTClientInstance, err = GetFVTClient(Log, namespace, serviceName, controllerNamespace)
+	Expect(err).ToNot(HaveOccurred())
+	Expect(FVTClientInstance).ToNot(BeNil())
+	Log.Info("FVTClientInstance created", "client", FVTClientInstance)
+
+	// confirm 3 cluster serving runtimes or serving runtimes
+	var list *unstructured.UnstructuredList
+	if NameSpaceScopeMode {
+		list, err = FVTClientInstance.ListServingRuntimes(metav1.ListOptions{})
+	} else {
+		list, err = FVTClientInstance.ListClusterServingRuntimes(metav1.ListOptions{})
+	}
+	Expect(err).ToNot(HaveOccurred())
+	Expect(list.Items).To(HaveLen(4))
+
+	config := map[string]interface{}{
+		"scaleToZero": map[string]interface{}{
+			"enabled":            true,
+			"gracePeriodSeconds": 5,
+		},
+		"podsPerRuntime": 1,
+	}
+	FVTClientInstance.ApplyUserConfigMap(config)
+
+	// cleanup any predictors and inference services if they exist
+	FVTClientInstance.DeleteAllPredictors()
+	FVTClientInstance.DeleteAllIsvcs()
+
+	Log.Info("Setup completed")
+})
+
+var _ = SynchronizedAfterSuite(func() {
+	// runs on *all* processes
+	// ensure we cleanup any port-forward
+	FVTClientInstance.DisconnectFromModelServing()
+}, func() {
+	// runs *only* on process #1
+	// cleanup any predictors and inference services if they exist
+	FVTClientInstance.DeleteAllPredictors()
+	FVTClientInstance.DeleteAllIsvcs()
+})
+
+// register handlers for a failed test case to print info to the console
+var startTime string
+var _ = JustBeforeEach(func() {
+	startTime = time.Now().Format("2006-01-02T15:04:05Z")
+})
+var _ = JustAfterEach(func() {
+	if CurrentSpecReport().Failed() {
+		FVTClientInstance.PrintPredictors()
+		FVTClientInstance.PrintIsvcs()
+		FVTClientInstance.PrintHPAs()
+		FVTClientInstance.PrintPods()
+		FVTClientInstance.PrintDescribeNodes()
+		FVTClientInstance.PrintEvents()
+		FVTClientInstance.TailPodLogs(startTime)
+	}
+})
diff --git a/fvt/hpa/hpa_test.go b/fvt/hpa/hpa_test.go
new file mode 100644
index 00000000..2fe632e0
--- /dev/null
+++ b/fvt/hpa/hpa_test.go
@@ -0,0 +1,249 @@
+// Copyright 2021 IBM Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+package hpa
+
+import (
+	"strings"
+	"time"
+
+	"github.com/kserve/kserve/pkg/constants"
+	mmcontstant "github.com/kserve/modelmesh-serving/pkg/constants"
+	hpav2beta2 "k8s.io/api/autoscaling/v2beta2"
+
+	. "github.com/kserve/modelmesh-serving/fvt"
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+	corev1 "k8s.io/api/core/v1"
+)
+
+var _ = Describe("Scaling of runtime deployments with HPA Autoscaler", Ordered, func() {
+	// constants
+	testPredictorObject := NewPredictorForFVT("mlserver-sklearn-predictor.yaml")
+	// runtime expected to serve the test predictor
+	expectedRuntimeName := "mlserver-0.x"
+
+	// checkDeploymentState returns the replicas value for the expected runtime
+	// and expects others to be scaled to zero
+	checkDeploymentState := func() int32 {
+		deployments := FVTClientInstance.ListDeploys()
+		var replicas int32
+		for _, d := range deployments.Items {
+			Log.Info("Checking deployment scale", "name", d.ObjectMeta.Name)
+			// the service prefix may change
+			if strings.HasSuffix(d.ObjectMeta.Name, expectedRuntimeName) {
+				// since we list existing deploys Replicas should never be nil
+				replicas = *d.Spec.Replicas
+			} else {
+				Expect(*d.Spec.Replicas).To(BeEquivalentTo(int32(0)))
+			}
+		}
+		return replicas
+	}
+	expectScaledToTargetReplicas := func(targetReplicas int32) {
+		replicas := checkDeploymentState()
+		Expect(replicas).To(BeEquivalentTo(targetReplicas))
+	}
+
+	expectScaledToZero := func() {
+		replicas := checkDeploymentState()
+		Expect(replicas).To(BeEquivalentTo(int32(0)))
+	}
+
+	checkHPAState := func() *hpav2beta2.HorizontalPodAutoscaler {
+		hpaList := FVTClientInstance.ListHPAs()
+
+		var hpa *hpav2beta2.HorizontalPodAutoscaler
+		if len(hpaList.Items) == 0 {
+			hpa = nil
+		} else {
+			for _, d := range hpaList.Items {
+				Log.Info("Checking if HPA exist", "name", d.ObjectMeta.Name)
+				// the service prefix may change
+				if strings.HasSuffix(d.ObjectMeta.Name, expectedRuntimeName) {
+					hpa = &d
+				}
+			}
+		}
+		return hpa
+	}
+
+	expectHPAExist := func(exist bool) {
+		hpa := checkHPAState()
+		if exist {
+			Expect(hpa).NotTo(BeNil())
+		} else {
+			Expect(hpa).To(BeNil())
+		}
+	}
+
+	expectHPAMinReplicas := func(minReplicas int32) {
+		hpa := checkHPAState
+		Expect(*hpa().Spec.MinReplicas).To(Equal(minReplicas))
+	}
+
+	expectHPAMaxReplicas := func(maxReplicas int32) {
+		hpa := checkHPAState
+		Expect(hpa().Spec.MaxReplicas).To(Equal(maxReplicas))
+	}
+
+	expectHPATargetUtilizationPercentage := func(targetUtilizationPercentage int32) {
+		hpa := checkHPAState
+		Expect(*hpa().Spec.Metrics[0].Resource.Target.AverageUtilization).To(Equal(targetUtilizationPercentage))
+	}
+
+	expectHPAResourceName := func(resourceName corev1.ResourceName) {
+		hpa := checkHPAState
+		Expect(hpa().Spec.Metrics[0].Resource.Name).To(Equal(resourceName))
+	}
+
+	deployTestPredictorAndCheckDefaultHPA := func() {
+		CreatePredictorAndWaitAndExpectLoaded(testPredictorObject)
+		expectScaledToTargetReplicas(int32(constants.DefaultMinReplicas))
+
+		// check HPA object
+		expectHPAExist(true)
+		expectHPAMinReplicas(1)
+		expectHPAMaxReplicas(1)
+		expectHPATargetUtilizationPercentage(80)
+		expectHPAResourceName(corev1.ResourceCPU)
+	}
+	BeforeAll(func() {
+		srAnnotations := make(map[string]interface{})
+		srAnnotations[constants.AutoscalerClass] = string(constants.AutoscalerClassHPA)
+
+		FVTClientInstance.SetServingRuntimeAnnotation(expectedRuntimeName, srAnnotations)
+	})
+
+	BeforeEach(func() {
+		FVTClientInstance.DeleteAllPredictors()
+		// ensure a stable deploy state
+		WaitForStableActiveDeployState(10 * time.Second)
+	})
+
+	AfterAll(func() {
+		FVTClientInstance.DeleteAllPredictors()
+
+		annotations := make(map[string]interface{})
+		FVTClientInstance.SetServingRuntimeAnnotation(expectedRuntimeName, annotations)
+	})
+
+	Context("when there are no predictors", func() {
+		It("Scale all runtimes down", func() {
+			// check that all runtimes are scaled to zero
+			By("Check ScaleToZero and No HPA")
+			expectScaledToZero()
+			expectHPAExist(false)
+		})
+		It("Scale all runtimes down after a created test predictor is deleted", func() {
+			By("Creating a test predictor for one Runtime")
+			deployTestPredictorAndCheckDefaultHPA()
+
+			By("Delete all predictors")
+			FVTClientInstance.DeleteAllPredictors()
+			// ensure a stable deploy state
+			WaitForStableActiveDeployState(10 * time.Second)
+
+			By("Check ScaleToZero and No HPA")
+			expectScaledToZero()
+			expectHPAExist(false)
+		})
+	})
+	Context("when there are predictors", func() {
+		It("Creating a predictor should create an HPA and scale up the runtime to minReplicas of HPA", func() {
+			By("Creating a test predictor for one Runtime")
+			deployTestPredictorAndCheckDefaultHPA()
+		})
+		It("Scaleup/Scaledown and Change targetUtilizationPercentage by an annotation in ServingRuntime", func() {
+			By("Creating a test predictor for one Runtime")
+			deployTestPredictorAndCheckDefaultHPA()
+
+			// ScaleUp Test
+			By("ScaleUp to min(2)/max(4): " + mmcontstant.MinScaleAnnotationKey)
+			By("Increase TargetUtilizationPercentage to 90: " + constants.TargetUtilizationPercentage)
+			By("Change Metrics to memory: " + constants.TargetUtilizationPercentage)
+			srAnnotationsScaleUp := make(map[string]interface{})
+			srAnnotationsScaleUp[constants.AutoscalerClass] = string(constants.AutoscalerClassHPA)
+			srAnnotationsScaleUp[mmcontstant.MinScaleAnnotationKey] = "2"
+			srAnnotationsScaleUp[mmcontstant.MaxScaleAnnotationKey] = "4"
+			srAnnotationsScaleUp[constants.TargetUtilizationPercentage] = "90"
+			srAnnotationsScaleUp[constants.AutoscalerMetrics] = "memory"
+
+			// set modified annotations
+			FVTClientInstance.SetServingRuntimeAnnotation(expectedRuntimeName, srAnnotationsScaleUp)
+
+			// sleep to give time for changes to propagate to the deployment
+			time.Sleep(10 * time.Second)
+			WaitForStableActiveDeployState(time.Second * 30)
+
+			// check that all runtimes except the one are scaled up to minimum replicas of HPA
+			expectScaledToTargetReplicas(2)
+
+			// check HPA
+			expectHPAExist(true)
+			expectHPAMinReplicas(2)
+			expectHPAMaxReplicas(4)
+			expectHPATargetUtilizationPercentage(90)
+			expectHPAResourceName(corev1.ResourceMemory)
+
+			// ScaleDown Test
+			By("ScaleDown to min(1)/max(1): " + mmcontstant.MinScaleAnnotationKey)
+			By("Decrease TargetUtilizationPercentage to 80: " + constants.TargetUtilizationPercentage)
+			By("Change Metrics to cpu: " + constants.TargetUtilizationPercentage)
+			srAnnotationsScaleDown := make(map[string]interface{})
+			srAnnotationsScaleDown[constants.AutoscalerClass] = string(constants.AutoscalerClassHPA)
+			srAnnotationsScaleDown[mmcontstant.MinScaleAnnotationKey] = "1"
+			srAnnotationsScaleDown[mmcontstant.MaxScaleAnnotationKey] = "1"
+			srAnnotationsScaleDown[constants.TargetUtilizationPercentage] = "80"
+			srAnnotationsScaleDown[constants.AutoscalerMetrics] = "cpu"
+
+			// set modified annotations
+			FVTClientInstance.SetServingRuntimeAnnotation(expectedRuntimeName, srAnnotationsScaleDown)
+
+			// sleep to give time for changes to propagate to the deployment
+			time.Sleep(10 * time.Second)
+			WaitForStableActiveDeployState(time.Second * 30)
+
+			// check that all runtimes except the one are scaled up to minimum replicas of HPA
+			expectScaledToTargetReplicas(1)
+
+			// check HPA object
+			expectHPAExist(true)
+			expectHPAMinReplicas(1)
+			expectHPAMaxReplicas(1)
+			expectHPATargetUtilizationPercentage(80)
+			expectHPAResourceName(corev1.ResourceCPU)
+		})
+	})
+	// This test must be the last because it will remove hpa annotation from servingruntime/clusterservingruntime
+	Context("When the model does not need autoscaler anymore", func() {
+		It("Disable autoscaler", func() {
+			deployTestPredictorAndCheckDefaultHPA()
+
+			// set modified annotations
+			By("Deleting this annotation: " + constants.AutoscalerClass)
+			srAnnotationsNone := make(map[string]interface{})
+			FVTClientInstance.SetServingRuntimeAnnotation(expectedRuntimeName, srAnnotationsNone)
+
+			// sleep to give time for changes to propagate to the deployment
+			time.Sleep(10 * time.Second)
+			WaitForStableActiveDeployState(time.Second * 30)
+
+			// check that all runtimes except the one are scaled up to servingRuntime default replicas
+			expectScaledToTargetReplicas(1)
+
+			// check if HPA deleted
+			expectHPAExist(false)
+		})
+	})
+})
diff --git a/fvt/utils.go b/fvt/utils.go
index efaedceb..82257e1f 100644
--- a/fvt/utils.go
+++ b/fvt/utils.go
@@ -122,6 +122,11 @@ func GetMap(obj *unstructured.Unstructured, fieldPath ...string) map[string]inte
 	return value
 }
 
+func SetMap(obj *unstructured.Unstructured, value map[string]interface{}, fieldPath ...string) {
+	err := unstructured.SetNestedMap(obj.Object, value, fieldPath...)
+	Expect(err).ToNot(HaveOccurred())
+}
+
 func SetString(obj *unstructured.Unstructured, value string, fieldPath ...string) {
 	err := unstructured.SetNestedField(obj.Object, value, fieldPath...)
 	Expect(err).ToNot(HaveOccurred())
diff --git a/go.mod b/go.mod
index 4ace4b1b..7e90c62a 100644
--- a/go.mod
+++ b/go.mod
@@ -14,6 +14,7 @@ require (
 	github.com/onsi/ginkgo/v2 v2.1.3
 	github.com/onsi/gomega v1.18.1
 	github.com/operator-framework/operator-lib v0.10.0
+	github.com/pkg/errors v0.9.1
 	github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.55.0
 	github.com/spf13/viper v1.10.1
 	github.com/stretchr/testify v1.8.0
@@ -81,7 +82,6 @@ require (
 	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
 	github.com/modern-go/reflect2 v1.0.2 // indirect
 	github.com/pelletier/go-toml v1.9.4 // indirect
-	github.com/pkg/errors v0.9.1 // indirect
 	github.com/pmezard/go-difflib v1.0.0 // indirect
 	github.com/prometheus/client_golang v1.14.0 // indirect
 	github.com/prometheus/client_model v0.3.0 // indirect
diff --git a/main.go b/main.go
index 84eafcea..fc3c071d 100644
--- a/main.go
+++ b/main.go
@@ -54,6 +54,8 @@ import (
 	"github.com/kserve/modelmesh-serving/controllers"
 	"github.com/kserve/modelmesh-serving/controllers/modelmesh"
 	"github.com/kserve/modelmesh-serving/pkg/mmesh"
+
+	"sigs.k8s.io/controller-runtime/pkg/webhook"
 	// +kubebuilder:scaffold:imports
 )
 
@@ -258,6 +260,15 @@ func main() {
 		os.Exit(1)
 	}
 
+	// Setup servingruntime validating webhook
+	hookServer := mgr.GetWebhookServer()
+	servingRuntimeWebhook := &webhook.Admission{
+		Handler: &servingv1alpha1.ServingRuntimeWebhook{
+			Client: mgr.GetClient(),
+		},
+	}
+	hookServer.Register("/validate-serving-modelmesh-io-v1alpha1-servingruntime", servingRuntimeWebhook)
+
 	_, err = mmesh.InitGrpcResolver(ControllerNamespace, mgr)
 	if err != nil {
 		setupLog.Error(err, "Failed to Initialize Grpc Resolver, exit")
diff --git a/pkg/constants/constants.go b/pkg/constants/constants.go
new file mode 100644
index 00000000..6002a784
--- /dev/null
+++ b/pkg/constants/constants.go
@@ -0,0 +1,21 @@
+// Copyright 2021 IBM Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+package constants
+
+import "github.com/kserve/kserve/pkg/constants"
+
+var (
+	MinScaleAnnotationKey = constants.KServeAPIGroupName + "/min-scale"
+	MaxScaleAnnotationKey = constants.KServeAPIGroupName + "/max-scale"
+)
diff --git a/scripts/install.sh b/scripts/install.sh
index f4662677..f5c65ef0 100755
--- a/scripts/install.sh
+++ b/scripts/install.sh
@@ -26,6 +26,8 @@ quickstart=false
 fvt=false
 user_ns_array=
 namespace_scope_mode=false # change to true to run in namespace scope
+modelmesh_serving_image=
+enable_self_signed_ca=false
 
 function showHelp() {
   echo "usage: $0 [flags]"
@@ -39,6 +41,8 @@ function showHelp() {
   echo "  --fvt                          Install and configure required supporting datastores in the same namespace (etcd and MinIO) - for development with fvt enabled"
   echo "  -dev, --dev-mode-logging       Enable dev mode logging (stacktraces on warning and no sampling)"
   echo "  --namespace-scope-mode         Run ModelMesh Serving in namespace scope mode"
+  echo "  --modelmesh-serving-image      Set a custom modelmesh serving image"
+  echo "  --enable-self-signed-ca        Enable self-signed-ca, if you don't have cert-manager in the cluster"
   echo
   echo "Installs ModelMesh Serving CRDs, controller, and built-in runtimes into specified"
   echo "Kubernetes namespaces."
@@ -166,10 +170,18 @@ while (($# > 0)); do
     ;;
   --fvt)
     fvt=true
+    enable_self_signed_ca=true
     ;;
   --namespace-scope-mode)
     namespace_scope_mode=true
     ;;
+  --modelmesh-serving-image)
+    shift
+    modelmesh_serving_image="$1"
+    ;;
+  --enable-self-signed-ca )
+    enable_self_signed_ca=true
+    ;;
   -*)
     die "Unknown option: '${1}'"
     ;;
@@ -290,6 +302,41 @@ else
 fi
 
 info "Installing ModelMesh Serving CRDs and controller"
+if [[ -n $modelmesh_serving_image ]]; then 
+  info "Custom ModelMesh Serving Image: $modelmesh_serving_image"
+  if [[ ! -f manager/kustomization.yaml.ori ]]; then
+    cp manager/kustomization.yaml  manager/kustomization.yaml.ori
+  fi
+  cd manager; kustomize edit set image modelmesh-controller=${modelmesh_serving_image}; cd ../
+fi   
+
+if [[ $enable_self_signed_ca == "true" ]]; then
+  info "Enabled Self Signed CA: Update manifest"
+  if [[ ! -f certmanager/kustomization.yaml.ori ]]; then
+    cp certmanager/kustomization.yaml  certmanager/kustomization.yaml.ori
+  fi
+  cd certmanager; kustomize edit remove resource certificate.yaml; cd ../
+
+  if [[ ! -f default/kustomization.yaml.ori ]]; then
+    cp default/kustomization.yaml  default/kustomization.yaml.ori
+  fi
+  cd default; kustomize edit remove resource ../certmanager; cd ../
+
+  # comment out vars
+  configMapGeneratorStartLine=$(grep -n configMapGenerator ./default/kustomization.yaml |cut -d':' -f1)
+  configMapGeneratorBeforeLine=$((configMapGeneratorStartLine-1))
+  sed "1,${configMapGeneratorBeforeLine}s/^/#/g" -i default/kustomization.yaml
+  
+  # remove webhookcainjection_patch.yaml
+  sed 's+- webhookcainjection_patch.yaml++g' -i default/kustomization.yaml
+
+  # create dummy secret 'modelmesh-webhook-server-cert'
+  secretExist=$(kubectl get secret modelmesh-webhook-server-cert --ignore-not-found|wc -l)
+  if [[ $secretExist == 0 ]]; then
+    kubectl create secret generic modelmesh-webhook-server-cert 
+  fi
+fi
+
 kustomize build default | kubectl apply -f -
 
 if [[ $dev_mode_logging == "true" ]]; then
@@ -305,7 +352,26 @@ if [[ $namespace_scope_mode == "true" ]]; then
   rm crd/kustomization.yaml.bak
 fi
 
-info "Waiting for ModelMesh Serving controller pod to be up ..."
+if [[ -n $modelmesh_serving_image ]]; then 
+  cp manager/kustomization.yaml.ori  manager/kustomization.yaml
+  rm manager/kustomization.yaml.ori 
+fi
+
+if [[ $enable_self_signed_ca == "true" ]]; then
+  cp certmanager/kustomization.yaml.ori  certmanager/kustomization.yaml
+  cp default/kustomization.yaml.ori  default/kustomization.yaml
+  rm certmanager/kustomization.yaml.ori default/kustomization.yaml.ori
+
+  info "Enabled Self Signed CA: Generate certificates and restart controller"
+  
+  # Delete dummy secret for webhook server
+  kubectl delete secret modelmesh-webhook-server-cert 
+
+  ../scripts/self-signed-ca.sh --namespace $namespace
+
+fi
+
+info "Waiting for ModelMesh Serving controller pod to be up..."
 wait_for_pods_ready "-l control-plane=modelmesh-controller"
 
 # Older versions of kustomize have different load restrictor flag formats.
diff --git a/scripts/self-signed-ca.sh b/scripts/self-signed-ca.sh
new file mode 100755
index 00000000..de104cd9
--- /dev/null
+++ b/scripts/self-signed-ca.sh
@@ -0,0 +1,169 @@
+#!/usr/bin/env bash
+# Copyright 2021 IBM Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.#
+
+# Install ModelMesh Serving CRDs, controller, and built-in runtimes into specified Kubernetes namespaces.
+# Expect cluster-admin authority and Kube cluster access to be configured prior to running.
+
+set -e
+
+usage() {
+    cat <<EOF
+Generate certificate suitable for use with an ModelMesh webhook service.
+This script uses openssl to generate self-signed CA certificate that is
+suitable for use with ModelMesh webhook services. See
+https://kubernetes.io/docs/concepts/cluster-administration/certificates/#distributing-self-signed-ca-certificate
+for detailed explanation and additional instructions.
+The server key/cert CA cert are stored in a k8s secret.
+
+usage: ${0} [OPTIONS]
+The following flags are optional.
+       --service           Service name of webhook. Default: modelmesh-webhook-server-service
+       --namespace         Namespace where webhook service and secret reside. Default: model-serving
+       --secret            Secret name for CA certificate and server certificate/key pair. Default: modelmesh-webhook-server-cert
+       --webhookName       Name for the mutating and validating webhook config. Default: servingruntime.serving.kserve.io
+       --webhookDeployment deployment name of the webhook controller. Default: modelmesh-controller
+EOF
+    exit 1
+}
+
+while [[ $# -gt 0 ]]; do
+    case ${1} in
+        --service)
+            service="$2"
+            shift
+            ;;
+        --secret)
+            secret="$2"
+            shift
+            ;;
+        --namespace)
+            namespace="$2"
+            shift
+            ;;
+        --webhookName)
+            webhookName="$2"
+            shift
+            ;;
+        --webhookDeployment)
+            webhookDeployment="$2"
+            shift
+            ;;
+        *)
+            usage
+            ;;
+    esac
+    shift
+done
+#TODO check backward compatibility
+[ -z ${secret} ] && secret=modelmesh-webhook-server-cert
+[ -z ${namespace} ] && namespace=model-serving
+[ -z ${webhookDeployment} ] && webhookDeployment=modelmesh-controller
+[ -z ${webhookName} ] && webhookName=servingruntime.serving.kserve.io
+[ -z ${service} ] && service=modelmesh-webhook-server-service
+webhookDeploymentName=${webhookDeployment}
+webhookConfigName=${webhookName}
+echo service: ${service}
+echo namespace: ${namespace}
+echo secret: ${secret}
+echo webhookDeploymentName: ${webhookDeploymentName}
+echo webhookConfigName: ${webhookConfigName}
+if [ ! -x "$(command -v openssl)" ]; then
+    echo "openssl not found"
+    exit 1
+fi
+tmpdir=$(mktemp -d)
+echo "creating certs in tmpdir ${tmpdir} "
+cat <<EOF >> ${tmpdir}/csr.conf
+[req]
+req_extensions = v3_req
+distinguished_name = req_distinguished_name
+[req_distinguished_name]
+[ v3_req ]
+basicConstraints = CA:FALSE
+keyUsage = nonRepudiation, digitalSignature, keyEncipherment
+extendedKeyUsage = serverAuth
+subjectAltName = @alt_names
+[alt_names]
+DNS.1 = ${service}
+DNS.2 = ${service}.${namespace}
+DNS.3 = ${service}.${namespace}.svc
+DNS.4 = ${service}.${namespace}.svc.cluster
+DNS.5 = ${service}.${namespace}.svc.cluster.local
+
+EOF
+# Create CA and Server key/certificate
+openssl genrsa -out ${tmpdir}/ca.key 2048
+openssl req -x509 -newkey rsa:2048 -key ${tmpdir}/ca.key -out ${tmpdir}/ca.crt -days 365 -nodes -subj "/CN=${service}.${namespace}.svc"
+
+openssl genrsa -out ${tmpdir}/server.key 2048
+openssl req -new -key ${tmpdir}/server.key -subj "/CN=${service}.${namespace}.svc" -out ${tmpdir}/server.csr -config ${tmpdir}/csr.conf
+
+# Self sign
+openssl x509 -extensions v3_req -req -days 365 -in ${tmpdir}/server.csr -CA ${tmpdir}/ca.crt -CAkey ${tmpdir}/ca.key -CAcreateserial -out ${tmpdir}/server.crt -extfile ${tmpdir}/csr.conf
+# create the secret with server cert/key
+kubectl create secret generic ${secret} \
+        --from-file=tls.key=${tmpdir}/server.key \
+        --from-file=tls.crt=${tmpdir}/server.crt \
+        --dry-run -o yaml |
+    kubectl -n ${namespace} apply -f -
+# Webhook pod needs to be restarted so that the service reload the secret
+# http://github.com/kueflow/kubeflow/issues/3227
+webhookPod=$(kubectl get pods -n ${namespace} |grep ${webhookDeploymentName} |awk '{print $1;}')
+# ignore error if webhook pod does not exist
+kubectl delete pod ${webhookPod} -n ${namespace} 2>/dev/null || true
+echo "webhook ${webhookPod} is restarted to utilize the new secret"
+
+echo "CA Certificate:"
+cat ${tmpdir}/ca.crt
+
+# -a means base64 encode
+caBundle=$(cat ${tmpdir}/ca.crt | openssl enc -a -A)
+echo "Encoded CA:"
+echo -e "${caBundle} \n"
+
+# check if jq is installed
+if [ ! -x "$(command -v jq)" ]; then
+    echo "jq not found"
+    exit 1
+fi
+# # Patch CA Certificate to mutatingWebhook
+# mutatingWebhookCount=$(kubectl get mutatingwebhookconfiguration ${webhookConfigName} -ojson | jq -r '.webhooks' | jq length)
+# # build patchstring based on webhook counts
+# mutatingPatchString='['
+# for i in $(seq 0 $(($mutatingWebhookCount-1)))
+# do
+#     mutatingPatchString=$mutatingPatchString'{"op": "replace", "path": "/webhooks/'$i'/clientConfig/caBundle", "value":"{{CA_BUNDLE}}"}, '
+# done
+# # strip ', '
+# mutatingPatchString=${mutatingPatchString%, }']'
+# mutatingPatchString=$(echo ${mutatingPatchString} | sed "s|{{CA_BUNDLE}}|${caBundle}|g")
+
+# echo "patching ca bundle for mutating webhook configuration..."
+# kubectl patch mutatingwebhookconfiguration ${webhookConfigName} \
+#     --type='json' -p="${mutatingPatchString}"
+
+# Patch CA Certificate to validatingWebhook
+validatingWebhookCount=$(kubectl get validatingwebhookconfiguration ${webhookConfigName} -ojson | jq -r '.webhooks' | jq length)
+validatingPatchString='['
+for i in $(seq 0 $(($validatingWebhookCount-1)))
+do
+    validatingPatchString=$validatingPatchString'{"op": "replace", "path": "/webhooks/'$i'/clientConfig/caBundle", "value":"{{CA_BUNDLE}}"}, '
+done
+validatingPatchString=${validatingPatchString%, }']'
+validatingPatchString=$(echo ${validatingPatchString} | sed "s|{{CA_BUNDLE}}|${caBundle}|g")
+
+echo "patching ca bundle for validating webhook configuration..."
+kubectl patch validatingwebhookconfiguration ${webhookConfigName} \
+    --type='json' -p="${validatingPatchString}"