From dd0229fe52d6c90703eabeecfdee287dd3ac55fd Mon Sep 17 00:00:00 2001 From: Jooho Lee Date: Thu, 27 Apr 2023 17:07:13 -0400 Subject: [PATCH] feat: Enable Horizontal Pod Autoscaling for ServingRuntime/ClusterServingRuntime (#342) Enable Horizontal Pod Autoscaling for ServingRuntime/ClusterServingRuntime by adding annotation `serving.kserve.io/autoscalerClass: hpa` - Add auto-scaling, HPA controller - Add ServingRuntime Webhook - Update deployment manifests - Add script to generate self-signed certificate - Add option to enable self-signed certificate to install script - Add deploy-release-dev-mode-fvt target to Makefile - Add FVT and unit tests - Upgrade FVT minikube version from 1.25 to 1.27 - Enabe FVT deployment on OpenShift (etcd --data-dir) - Update Docs Resolves #329 Signed-off-by: Jooho Lee --- .github/workflows/run-fvt.yml | 22 +- .gitignore | 1 + Dockerfile.develop | 1 + Makefile | 12 +- .../v1alpha1/servingruntime_webhook.go | 216 +++++++++++++++ .../v1alpha1/servingruntime_webhook_test.go | 106 ++++++++ .../serving/v1alpha1/zz_generated.deepcopy.go | 15 ++ config/certmanager/certificate.yaml | 10 +- config/crd/patches/webhook_in_predictors.yaml | 1 - .../patches/webhook_in_servingruntimes.yaml | 1 - config/default/kustomization.yaml | 100 +++---- config/default/manager_auth_proxy_patch.yaml | 1 - config/default/manager_webhook_patch.yaml | 11 +- config/default/webhookcainjection_patch.yaml | 13 +- config/dependencies/fvt.yaml | 2 + config/namespace-runtimes/kustomization.yaml | 2 +- config/prometheus/monitor.yaml | 1 - config/rbac/cluster-scope/kustomization.yaml | 2 + config/rbac/cluster-scope/role.yaml | 12 + config/rbac/common/kustomization.yaml | 1 + config/rbac/common/networkpolicy-webhook.yaml | 29 ++ .../rbac/namespace-scope/kustomization.yaml | 2 + config/rbac/namespace-scope/role.yaml | 12 + config/webhook/kustomization.yaml | 20 ++ config/webhook/kustomizeconfig.yaml | 31 +++ config/webhook/manifests.yaml | 40 +++ config/webhook/service.yaml | 25 ++ .../autoscaler/autoscaler_reconciler.go | 129 +++++++++ .../autoscaler/autoscaler_reconciler_test.go | 72 +++++ controllers/hpa/hpa_reconciler.go | 193 ++++++++++++++ controllers/hpa/hpa_reconciler_test.go | 157 +++++++++++ controllers/servingruntime_controller.go | 72 ++++- docs/developer.md | 12 + docs/install/install-script.md | 24 +- docs/production-use/scaling.md | 32 +++ docs/quickstart.md | 4 +- docs/release-process.md | 1 - fvt/README.md | 2 +- fvt/fvtclient.go | 82 ++++++ fvt/globals.go | 1 + fvt/helpers.go | 1 + fvt/hpa/hpa_suite_test.go | 118 +++++++++ fvt/hpa/hpa_test.go | 249 ++++++++++++++++++ fvt/utils.go | 5 + go.mod | 2 +- main.go | 11 + pkg/constants/constants.go | 21 ++ scripts/install.sh | 68 ++++- scripts/self-signed-ca.sh | 169 ++++++++++++ 49 files changed, 1994 insertions(+), 120 deletions(-) create mode 100644 apis/serving/v1alpha1/servingruntime_webhook.go create mode 100644 apis/serving/v1alpha1/servingruntime_webhook_test.go create mode 100644 config/rbac/common/networkpolicy-webhook.yaml create mode 100644 config/webhook/kustomization.yaml create mode 100644 config/webhook/kustomizeconfig.yaml create mode 100644 config/webhook/manifests.yaml create mode 100644 config/webhook/service.yaml create mode 100644 controllers/autoscaler/autoscaler_reconciler.go create mode 100644 controllers/autoscaler/autoscaler_reconciler_test.go create mode 100644 controllers/hpa/hpa_reconciler.go create mode 100644 controllers/hpa/hpa_reconciler_test.go create mode 100644 fvt/hpa/hpa_suite_test.go create mode 100644 fvt/hpa/hpa_test.go create mode 100644 pkg/constants/constants.go create mode 100755 scripts/self-signed-ca.sh diff --git a/.github/workflows/run-fvt.yml b/.github/workflows/run-fvt.yml index 0b1987d8..ddc13024 100644 --- a/.github/workflows/run-fvt.yml +++ b/.github/workflows/run-fvt.yml @@ -20,14 +20,16 @@ jobs: - uses: actions/checkout@v2 - uses: actions/setup-go@v2 with: - go-version: '1.18.7' - - name: Setup Minikube - run: | - wget --no-verbose https://github.com/kubernetes/minikube/releases/download/v1.25.1/minikube-linux-amd64 - sudo cp minikube-linux-amd64 /usr/local/bin/minikube - sudo chmod 755 /usr/local/bin/minikube - sudo apt-get install -y conntrack socat - minikube start --driver=none --kubernetes-version v1.22.10 + go-version: '1.18.7' + - name: Start Minikube + uses: medyagh/setup-minikube@v0.0.11 + id: minikube + with: + minikube-version: 1.27.1 + container-runtime: docker + kubernetes-version: v1.25.2 + cpus: max + memory: max - name: Check pods run: | sleep 30 @@ -55,6 +57,7 @@ jobs: echo -e '\n disabled: true' >> config/runtimes/torchserve-0.x.yaml - name: Build Controller image run: | + eval $(minikube -p minikube docker-env) make build.develop ./scripts/build_docker.sh --target runtime --tag ${{ env.IMAGE_TAG }} - name: Install ModelMesh Serving @@ -63,12 +66,14 @@ jobs: ./scripts/install.sh --namespace modelmesh-serving --fvt --dev-mode-logging - name: Free up disk space run: | + eval $(minikube -p minikube docker-env) echo "Pruning images" docker image prune -a -f docker system df df -h - name: Pre-pull runtime images run: | + eval $(minikube -p minikube docker-env) docker pull nvcr.io/nvidia/tritonserver:21.06.1-py3 docker pull seldonio/mlserver:0.5.2 docker pull openvino/model_server:2022.2 @@ -78,6 +83,7 @@ jobs: docker pull kserve/modelmesh - name: Check installation run: | + eval $(minikube -p minikube docker-env) docker images kubectl get pods kubectl get clusterservingruntimes diff --git a/.gitignore b/.gitignore index 33a31d26..a5e66818 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ public/ target/ +vendor/ # Binaries for programs and plugins *.exe diff --git a/Dockerfile.develop b/Dockerfile.develop index e47f285d..ecb7a3f8 100644 --- a/Dockerfile.develop +++ b/Dockerfile.develop @@ -40,6 +40,7 @@ RUN microdnf install \ tar \ vim \ git \ + jq \ python38 \ nodejs && \ pip3 install pre-commit && \ diff --git a/Makefile b/Makefile index eabe70d5..7e995622 100644 --- a/Makefile +++ b/Makefile @@ -47,7 +47,8 @@ test: # Run fvt tests. This requires an etcd, kubernetes connection, and model serving installation. Ginkgo CLI is used to run them in parallel fvt: - ginkgo -v -procs=2 --progress --fail-fast fvt/predictor fvt/scaleToZero fvt/storage --timeout=50m + ginkgo -v -procs=2 --progress --fail-fast fvt/predictor fvt/scaleToZero fvt/storage fvt/hpa --timeout=50m + # Command to regenerate the grpc go files from the proto files fvt-protoc: @@ -87,6 +88,15 @@ deploy-release: deploy-release-dev-mode: ./scripts/install.sh --namespace ${NAMESPACE} --install-config-path config --dev-mode-logging +deploy-release-dev-mode-fvt: +ifdef MODELMESH_SERVING_IMAGE + $(eval extra_options += --modelmesh-serving-image ${MODELMESH_SERVING_IMAGE}) +endif +ifdef NAMESPACE_SCOPE_MODE + $(eval extra_options += --namespace-scope-mode) +endif + ./scripts/install.sh --namespace ${NAMESPACE} --install-config-path config --dev-mode-logging --fvt ${extra_options} + delete: oc-login ./scripts/delete.sh --namespace ${NAMESPACE} --local-config-path config diff --git a/apis/serving/v1alpha1/servingruntime_webhook.go b/apis/serving/v1alpha1/servingruntime_webhook.go new file mode 100644 index 00000000..3303f290 --- /dev/null +++ b/apis/serving/v1alpha1/servingruntime_webhook.go @@ -0,0 +1,216 @@ +//Copyright 2021 IBM Corporation +// +//Licensed under the Apache License, Version 2.0 (the "License"); +//you may not use this file except in compliance with the License. +//You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +//Unless required by applicable law or agreed to in writing, software +//distributed under the License is distributed on an "AS IS" BASIS, +//WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//See the License for the specific language governing permissions and +//limitations under the License. +// +package v1alpha1 + +import ( + "context" + "fmt" + "math" + "net/http" + "strconv" + + kservev1alpha "github.com/kserve/kserve/pkg/apis/serving/v1alpha1" + "github.com/kserve/kserve/pkg/constants" + "github.com/kserve/modelmesh-serving/controllers/autoscaler" + mmcontstant "github.com/kserve/modelmesh-serving/pkg/constants" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/webhook/admission" +) + +//+kubebuilder:webhook:path=/validate-serving-modelmesh-io-v1alpha1-servingruntime,mutating=false,failurePolicy=fail,sideEffects=None,groups=serving.kserve.io,resources=servingruntimes;clusterservingruntimes,verbs=create;update,versions=v1alpha1,name=servingruntime.modelmesh-webhook-server.default,admissionReviewVersions=v1 +type ServingRuntimeWebhook struct { + Client client.Client + decoder *admission.Decoder +} + +func (s *ServingRuntimeWebhook) Handle(ctx context.Context, req admission.Request) admission.Response { + var srAnnotations map[string]string + srReplicas := uint16(math.MaxUint16) + multiModel := false + + if req.Kind.Kind == "ServingRuntime" { + servingRuntime := &kservev1alpha.ServingRuntime{} + err := s.decoder.Decode(req, servingRuntime) + if err != nil { + return admission.Errored(http.StatusBadRequest, err) + } + srAnnotations = servingRuntime.ObjectMeta.Annotations + + if (*servingRuntime).Spec.Replicas != nil { + srReplicas = uint16(*servingRuntime.Spec.Replicas) + } + + if (*servingRuntime).Spec.MultiModel != nil { + multiModel = *servingRuntime.Spec.MultiModel + } + + } else { + clusterServingRuntime := &kservev1alpha.ClusterServingRuntime{} + err := s.decoder.Decode(req, clusterServingRuntime) + if err != nil { + return admission.Errored(http.StatusBadRequest, err) + } + srAnnotations = clusterServingRuntime.ObjectMeta.Annotations + + if (*clusterServingRuntime).Spec.Replicas != nil { + srReplicas = uint16(*clusterServingRuntime.Spec.Replicas) + } + + if (*clusterServingRuntime).Spec.MultiModel != nil { + multiModel = *clusterServingRuntime.Spec.MultiModel + } + } + + if !multiModel { + return admission.Allowed("Not validating ServingRuntime because it is not ModelMesh compatible") + } + + if err := validateServingRuntimeAutoscaler(srAnnotations); err != nil { + return admission.Denied(err.Error()) + } + + if err := validateAutoscalerTargetUtilizationPercentage(srAnnotations); err != nil { + return admission.Denied(err.Error()) + } + + if err := validateAutoScalingReplicas(srAnnotations, srReplicas); err != nil { + return admission.Denied(err.Error()) + } + + return admission.Allowed("Passed all validation checks for ServingRuntime") +} + +// InjectDecoder injects the decoder. +func (s *ServingRuntimeWebhook) InjectDecoder(d *admission.Decoder) error { + s.decoder = d + return nil +} + +// Validation of servingruntime autoscaler class +func validateServingRuntimeAutoscaler(annotations map[string]string) error { + value, ok := annotations[constants.AutoscalerClass] + class := constants.AutoscalerClassType(value) + if ok { + for _, item := range constants.AutoscalerAllowedClassList { + if class == item { + switch class { + case constants.AutoscalerClassHPA: + if metric, ok := annotations[constants.AutoscalerMetrics]; ok { + return validateHPAMetrics(constants.AutoscalerMetricsType(metric)) + } else { + return nil + } + default: + return fmt.Errorf("unknown autoscaler class [%s]", class) + } + } + } + return fmt.Errorf("[%s] is not a supported autoscaler class type.\n", value) + } + + return nil +} + +// Validate of autoscaler targetUtilizationPercentage +func validateAutoscalerTargetUtilizationPercentage(annotations map[string]string) error { + if value, ok := annotations[constants.TargetUtilizationPercentage]; ok { + t, err := strconv.Atoi(value) + if err != nil { + return fmt.Errorf("The target utilization percentage should be a [1-100] integer.") + } else { + if t < 1 || t > 100 { + return fmt.Errorf("The target utilization percentage should be a [1-100] integer.") + } + } + } + + return nil +} + +// Validate scaling options +func validateAutoScalingReplicas(annotations map[string]string, srReplicas uint16) error { + autoscalerClassType := autoscaler.AutoscalerClassNone + if value, ok := annotations[constants.AutoscalerClass]; ok { + autoscalerClassType = value + } + + switch autoscalerClassType { + case string(constants.AutoscalerClassHPA): + if srReplicas != math.MaxUint16 { + return fmt.Errorf("Autoscaler is enabled and also replicas variable set. You can not set both.") + } + return validateScalingHPA(annotations) + default: + return nil + } +} + +func validateScalingHPA(annotations map[string]string) error { + metric := constants.AutoScalerMetricsCPU + if value, ok := annotations[constants.AutoscalerMetrics]; ok { + metric = constants.AutoscalerMetricsType(value) + } + + minReplicas := 1 + if value, ok := annotations[mmcontstant.MinScaleAnnotationKey]; ok { + if valueInt, err := strconv.Atoi(value); err != nil { + return fmt.Errorf("The min replicas should be a integer.") + } else if valueInt < 1 { + return fmt.Errorf("The min replicas should be more than 0") + } else { + minReplicas = valueInt + } + } + + maxReplicas := 1 + if value, ok := annotations[mmcontstant.MaxScaleAnnotationKey]; ok { + if valueInt, err := strconv.Atoi(value); err != nil { + return fmt.Errorf("The max replicas should be a integer.") + } else { + maxReplicas = valueInt + } + } + + if minReplicas > maxReplicas { + return fmt.Errorf("The max replicas should be same or bigger than min replicas.") + } + + err := validateHPAMetrics(metric) + if err != nil { + return err + } + + if value, ok := annotations[constants.TargetUtilizationPercentage]; ok { + t, err := strconv.Atoi(value) + if err != nil { + return fmt.Errorf("The target utilization percentage should be a [1-100] integer.") + } else if metric == constants.AutoScalerMetricsMemory && t < 1 { + return fmt.Errorf("The target memory should be greater than 1 MiB") + } + } + + return nil +} + +// Validate of autoscaler HPA metrics +func validateHPAMetrics(metric constants.AutoscalerMetricsType) error { + for _, item := range constants.AutoscalerAllowedMetricsList { + if item == metric { + return nil + } + } + return fmt.Errorf("[%s] is not a supported metric.\n", metric) + +} diff --git a/apis/serving/v1alpha1/servingruntime_webhook_test.go b/apis/serving/v1alpha1/servingruntime_webhook_test.go new file mode 100644 index 00000000..27e4b9bc --- /dev/null +++ b/apis/serving/v1alpha1/servingruntime_webhook_test.go @@ -0,0 +1,106 @@ +/* +Copyright 2021 IBM Corporation + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +package v1alpha1 + +import ( + "math" + "testing" + + "github.com/onsi/gomega" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + kservev1alpha "github.com/kserve/kserve/pkg/apis/serving/v1alpha1" + "github.com/kserve/kserve/pkg/constants" + mmcontstant "github.com/kserve/modelmesh-serving/pkg/constants" +) + +func makeTestRawServingRuntime() kservev1alpha.ServingRuntime { + servingRuntime := kservev1alpha.ServingRuntime{ + ObjectMeta: metav1.ObjectMeta{ + Name: "foo", + Namespace: "default", + Annotations: map[string]string{ + "serving.kserve.io/autoscalerClass": "hpa", + "serving.kserve.io/metrics": "cpu", + "serving.kserve.io/targetUtilizationPercentage": "75", + "serving.kserve.io/min-scale": "2", + "serving.kserve.io/max-scale": "3", + }, + }, + } + + return servingRuntime +} + +func TestValidAutoscalerTypeAndHPAMetrics(t *testing.T) { + g := gomega.NewGomegaWithT(t) + sr := makeTestRawServingRuntime() + g.Expect(validateServingRuntimeAutoscaler(sr.Annotations)).Should(gomega.Succeed()) +} +func TestInvalidAutoscalerClassType(t *testing.T) { + g := gomega.NewGomegaWithT(t) + sr := makeTestRawServingRuntime() + sr.ObjectMeta.Annotations[constants.AutoscalerClass] = "test" + g.Expect(validateServingRuntimeAutoscaler(sr.Annotations)).ShouldNot(gomega.Succeed()) +} + +func TestInvalidAutoscalerTargetUtilizationPercentageLowValue(t *testing.T) { + g := gomega.NewGomegaWithT(t) + sr := makeTestRawServingRuntime() + sr.ObjectMeta.Annotations[constants.TargetUtilizationPercentage] = "-1" + g.Expect(validateAutoscalerTargetUtilizationPercentage(sr.Annotations)).ShouldNot(gomega.Succeed()) +} + +func TestInvalidAutoscalerTargetUtilizationPercentageHighValue(t *testing.T) { + g := gomega.NewGomegaWithT(t) + sr := makeTestRawServingRuntime() + sr.ObjectMeta.Annotations[constants.TargetUtilizationPercentage] = "101" + g.Expect(validateAutoscalerTargetUtilizationPercentage(sr.Annotations)).ShouldNot(gomega.Succeed()) +} + +func TestInvalidAutoscalerLowMinReplicas(t *testing.T) { + g := gomega.NewGomegaWithT(t) + sr := makeTestRawServingRuntime() + sr.ObjectMeta.Annotations[mmcontstant.MinScaleAnnotationKey] = "0" + g.Expect(validateScalingHPA(sr.Annotations)).ShouldNot(gomega.Succeed()) +} + +func TestInvalidAutoscalerMaxReplicasMustBiggerThanMixReplicas(t *testing.T) { + g := gomega.NewGomegaWithT(t) + sr := makeTestRawServingRuntime() + sr.ObjectMeta.Annotations[mmcontstant.MinScaleAnnotationKey] = "4" + sr.ObjectMeta.Annotations[mmcontstant.MaxScaleAnnotationKey] = "3" + g.Expect(validateAutoScalingReplicas(sr.Annotations, math.MaxUint16)).ShouldNot(gomega.Succeed()) +} +func TestDuplicatedReplicas(t *testing.T) { + g := gomega.NewGomegaWithT(t) + sr := makeTestRawServingRuntime() + g.Expect(validateAutoScalingReplicas(sr.Annotations, 1)).ShouldNot(gomega.Succeed()) +} + +func TestValidAutoscalerMetricsType(t *testing.T) { + g := gomega.NewGomegaWithT(t) + sr := makeTestRawServingRuntime() + sr.ObjectMeta.Annotations[constants.AutoscalerMetrics] = "memory" + g.Expect(validateHPAMetrics(constants.AutoscalerMetricsType("memory"))).Should(gomega.Succeed()) +} + +func TestInvalidAutoscalerMetricsType(t *testing.T) { + g := gomega.NewGomegaWithT(t) + sr := makeTestRawServingRuntime() + sr.ObjectMeta.Annotations[constants.AutoscalerMetrics] = "conccurrency" + g.Expect(validateHPAMetrics(constants.AutoscalerMetricsType("conccurrency"))).ShouldNot(gomega.Succeed()) +} diff --git a/apis/serving/v1alpha1/zz_generated.deepcopy.go b/apis/serving/v1alpha1/zz_generated.deepcopy.go index 73421bf5..c2c0e74f 100644 --- a/apis/serving/v1alpha1/zz_generated.deepcopy.go +++ b/apis/serving/v1alpha1/zz_generated.deepcopy.go @@ -260,6 +260,21 @@ func (in *S3StorageSource) DeepCopy() *S3StorageSource { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ServingRuntimeWebhook) DeepCopyInto(out *ServingRuntimeWebhook) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServingRuntimeWebhook. +func (in *ServingRuntimeWebhook) DeepCopy() *ServingRuntimeWebhook { + if in == nil { + return nil + } + out := new(ServingRuntimeWebhook) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Storage) DeepCopyInto(out *Storage) { *out = *in diff --git a/config/certmanager/certificate.yaml b/config/certmanager/certificate.yaml index 6bd4ae7b..5782dfe3 100644 --- a/config/certmanager/certificate.yaml +++ b/config/certmanager/certificate.yaml @@ -26,14 +26,14 @@ spec: apiVersion: cert-manager.io/v1 kind: Certificate metadata: - name: serving-cert # this name should match the one appeared in kustomizeconfig.yaml + name: modelmesh-webhook-server-cert # this name should match the one appeared in kustomizeconfig.yaml namespace: system spec: - # $(SERVICE_NAME) and $(SERVICE_NAMESPACE) will be substituted by kustomize + # SERVICE_NAME_PLACEHOLDER and SERVICE_NAMESPACE_PLACEHOLDER will be substituted by kustomize dnsNames: - - $(SERVICE_NAME).$(SERVICE_NAMESPACE).svc - - $(SERVICE_NAME).$(SERVICE_NAMESPACE).svc.cluster.local + - $(SERVICE_NAME_PLACEHOLDER).$(SERVICE_NAMESPACE_PLACEHOLDER).svc + - $(SERVICE_NAME_PLACEHOLDER).$(SERVICE_NAMESPACE_PLACEHOLDER).svc.cluster.local issuerRef: kind: Issuer name: selfsigned-issuer - secretName: webhook-server-cert # this secret will not be prefixed, since it's not managed by kustomize + secretName: modelmesh-webhook-server-cert # this secret will not be prefixed, since it's not managed by kustomize diff --git a/config/crd/patches/webhook_in_predictors.yaml b/config/crd/patches/webhook_in_predictors.yaml index c3daebf5..792421ae 100644 --- a/config/crd/patches/webhook_in_predictors.yaml +++ b/config/crd/patches/webhook_in_predictors.yaml @@ -27,6 +27,5 @@ spec: # but we're going to set it later using the cert-manager (or potentially a patch if not using cert-manager) caBundle: Cg== service: - namespace: system name: webhook-service path: /convert diff --git a/config/crd/patches/webhook_in_servingruntimes.yaml b/config/crd/patches/webhook_in_servingruntimes.yaml index 704b6026..30724f90 100644 --- a/config/crd/patches/webhook_in_servingruntimes.yaml +++ b/config/crd/patches/webhook_in_servingruntimes.yaml @@ -27,6 +27,5 @@ spec: # but we're going to set it later using the cert-manager (or potentially a patch if not using cert-manager) caBundle: Cg== service: - namespace: system name: webhook-service path: /convert diff --git a/config/default/kustomization.yaml b/config/default/kustomization.yaml index 8001c186..b5467fc6 100644 --- a/config/default/kustomization.yaml +++ b/config/default/kustomization.yaml @@ -11,72 +11,36 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# Adds namespace to all resources. -#namespace: model-serving -# Value of this field is prepended to the -# names of all resources, e.g. a deployment named -# "wordpress" becomes "alices-wordpress". -# Note that it should also match with the prefix (text before '-') of the namespace -# field above. -#namePrefix: model-serving- - -# Labels to add to all resources and selectors. -#commonLabels: -# someName: someValue - -# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in -# crd/kustomization.yaml -#- ../webhook -# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. 'WEBHOOK' components are required. -#- ../certmanager -# [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'. -#- ../prometheus - -#patchesStrategicMerge: -# Protect the /metrics endpoint by putting it behind auth. -# If you want your controller-manager to expose the /metrics -# endpoint w/o any authn/z, please comment the following line. -#- manager_auth_proxy_patch.yaml - -# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in -# crd/kustomization.yaml -#- manager_webhook_patch.yaml - -# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. -# Uncomment 'CERTMANAGER' sections in crd/kustomization.yaml to enable the CA injection in the admission webhooks. -# 'CERTMANAGER' needs to be enabled to use ca injection -#- webhookcainjection_patch.yaml - -# the following config is for teaching kustomize how to do var substitution -#vars: -# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER' prefix. -#- name: CERTIFICATE_NAMESPACE # namespace of the certificate CR -# objref: -# kind: Certificate -# group: cert-manager.io -# version: v1alpha2 -# name: serving-cert # this name should match the one in certificate.yaml -# fieldref: -# fieldpath: metadata.namespace -#- name: CERTIFICATE_NAME -# objref: -# kind: Certificate -# group: cert-manager.io -# version: v1alpha2 -# name: serving-cert # this name should match the one in certificate.yaml -#- name: SERVICE_NAMESPACE # namespace of the service -# objref: -# kind: Service -# version: v1 -# name: webhook-service -# fieldref: -# fieldpath: metadata.namespace -#- name: SERVICE_NAME -# objref: -# kind: Service -# version: v1 -# name: webhook-service +vars: + - fieldref: + fieldPath: metadata.namespace + name: CERTIFICATE_NAMESPACE_PLACEHOLDER + objref: + group: cert-manager.io + kind: Certificate + name: modelmesh-webhook-server-cert + version: v1 + - fieldref: {} + name: CERTIFICATE_NAME_PLACEHOLDER + objref: + group: cert-manager.io + kind: Certificate + name: modelmesh-webhook-server-cert + version: v1 + - fieldref: + fieldPath: metadata.namespace + name: SERVICE_NAMESPACE_PLACEHOLDER + objref: + kind: Service + name: modelmesh-webhook-server-service + version: v1 + - fieldref: {} + name: SERVICE_NAME_PLACEHOLDER + objref: + kind: Service + name: modelmesh-webhook-server-service + version: v1 configMapGenerator: - files: @@ -93,3 +57,9 @@ kind: Kustomization resources: - ../crd - ../manager + - ../webhook + - ../certmanager + +patches: + - path: manager_webhook_patch.yaml + - path: webhookcainjection_patch.yaml diff --git a/config/default/manager_auth_proxy_patch.yaml b/config/default/manager_auth_proxy_patch.yaml index dbb1175c..5e40f99e 100644 --- a/config/default/manager_auth_proxy_patch.yaml +++ b/config/default/manager_auth_proxy_patch.yaml @@ -17,7 +17,6 @@ apiVersion: apps/v1 kind: Deployment metadata: name: controller-manager - namespace: system spec: template: spec: diff --git a/config/default/manager_webhook_patch.yaml b/config/default/manager_webhook_patch.yaml index b094527f..57279dbf 100644 --- a/config/default/manager_webhook_patch.yaml +++ b/config/default/manager_webhook_patch.yaml @@ -14,8 +14,7 @@ apiVersion: apps/v1 kind: Deployment metadata: - name: controller-manager - namespace: system + name: modelmesh-controller spec: template: spec: @@ -23,14 +22,14 @@ spec: - name: manager ports: - containerPort: 9443 - name: webhook-server + name: webhook protocol: TCP volumeMounts: - mountPath: /tmp/k8s-webhook-server/serving-certs - name: cert + name: modelmesh-webhook-server-cert readOnly: true volumes: - - name: cert + - name: modelmesh-webhook-server-cert secret: defaultMode: 420 - secretName: webhook-server-cert + secretName: modelmesh-webhook-server-cert diff --git a/config/default/webhookcainjection_patch.yaml b/config/default/webhookcainjection_patch.yaml index 60e4bc91..33e8bcba 100644 --- a/config/default/webhookcainjection_patch.yaml +++ b/config/default/webhookcainjection_patch.yaml @@ -12,17 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. # This patch add annotation to admission webhook config and -# the variables $(CERTIFICATE_NAMESPACE) and $(CERTIFICATE_NAME) will be substituted by kustomize. -apiVersion: admissionregistration.k8s.io/v1 -kind: MutatingWebhookConfiguration -metadata: - name: mutating-webhook-configuration - annotations: - cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME) ---- +# the string CERTIFICATE_NAMESPACE_PLACEHOLDER and CERTIFICATE_NAME_PLACEHOLDER will be replaced by kustomize. apiVersion: admissionregistration.k8s.io/v1 kind: ValidatingWebhookConfiguration metadata: - name: validating-webhook-configuration + name: servingruntime.serving.kserve.io annotations: - cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME) + cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE_PLACEHOLDER)/$(CERTIFICATE_NAME_PLACEHOLDER) diff --git a/config/dependencies/fvt.yaml b/config/dependencies/fvt.yaml index db6942d5..42859f50 100644 --- a/config/dependencies/fvt.yaml +++ b/config/dependencies/fvt.yaml @@ -43,6 +43,8 @@ spec: containers: - command: - etcd + - --data-dir # use data directory under /tmp for read/write access by non-root user on OpenShift + - /tmp/etcd.data - --listen-client-urls - http://0.0.0.0:2379 - --advertise-client-urls diff --git a/config/namespace-runtimes/kustomization.yaml b/config/namespace-runtimes/kustomization.yaml index 657e278f..e361106f 100644 --- a/config/namespace-runtimes/kustomization.yaml +++ b/config/namespace-runtimes/kustomization.yaml @@ -2,7 +2,7 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization bases: - ../runtimes -patchesJson6902: +patches: - target: group: serving.kserve.io version: v1alpha1 diff --git a/config/prometheus/monitor.yaml b/config/prometheus/monitor.yaml index 53db59a4..a5dcccc7 100644 --- a/config/prometheus/monitor.yaml +++ b/config/prometheus/monitor.yaml @@ -18,7 +18,6 @@ metadata: labels: control-plane: controller-manager name: controller-manager-metrics-monitor - namespace: system spec: endpoints: - path: /metrics diff --git a/config/rbac/cluster-scope/kustomization.yaml b/config/rbac/cluster-scope/kustomization.yaml index 3cf2dadc..53af0645 100644 --- a/config/rbac/cluster-scope/kustomization.yaml +++ b/config/rbac/cluster-scope/kustomization.yaml @@ -15,3 +15,5 @@ resources: - ../common - role.yaml - role_binding.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization diff --git a/config/rbac/cluster-scope/role.yaml b/config/rbac/cluster-scope/role.yaml index e0110393..84cffb27 100644 --- a/config/rbac/cluster-scope/role.yaml +++ b/config/rbac/cluster-scope/role.yaml @@ -197,3 +197,15 @@ rules: - get - patch - update + - apiGroups: + - autoscaling + resources: + - horizontalpodautoscalers + - horizontalpodautoscalers/status + verbs: + - get + - list + - watch + - create + - delete + - update diff --git a/config/rbac/common/kustomization.yaml b/config/rbac/common/kustomization.yaml index 577e5b55..3168c7c7 100644 --- a/config/rbac/common/kustomization.yaml +++ b/config/rbac/common/kustomization.yaml @@ -24,6 +24,7 @@ resources: - modelmesh-service-account.yaml - networkpolicy-controller.yaml - networkpolicy-runtimes.yaml + - networkpolicy-webhook.yaml # Comment the following 4 lines if you want to disable # the auth proxy (https://github.com/brancz/kube-rbac-proxy) # which protects your /metrics endpoint. diff --git a/config/rbac/common/networkpolicy-webhook.yaml b/config/rbac/common/networkpolicy-webhook.yaml new file mode 100644 index 00000000..8c337ac2 --- /dev/null +++ b/config/rbac/common/networkpolicy-webhook.yaml @@ -0,0 +1,29 @@ +# Copyright 2021 IBM Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: modelmesh-webhook +spec: + podSelector: + matchLabels: + app.kubernetes.io/managed-by: modelmesh-controller + control-plane: modelmesh-controller + ingress: + # exposed for webhook + - ports: + - port: 9443 + protocol: TCP + policyTypes: + - Ingress diff --git a/config/rbac/namespace-scope/kustomization.yaml b/config/rbac/namespace-scope/kustomization.yaml index 3cf2dadc..53af0645 100644 --- a/config/rbac/namespace-scope/kustomization.yaml +++ b/config/rbac/namespace-scope/kustomization.yaml @@ -15,3 +15,5 @@ resources: - ../common - role.yaml - role_binding.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization diff --git a/config/rbac/namespace-scope/role.yaml b/config/rbac/namespace-scope/role.yaml index 8624c152..238c7b51 100644 --- a/config/rbac/namespace-scope/role.yaml +++ b/config/rbac/namespace-scope/role.yaml @@ -165,3 +165,15 @@ rules: - get - patch - update + - apiGroups: + - autoscaling + resources: + - horizontalpodautoscalers + - horizontalpodautoscalers/status + verbs: + - get + - list + - watch + - create + - delete + - update diff --git a/config/webhook/kustomization.yaml b/config/webhook/kustomization.yaml new file mode 100644 index 00000000..79e0fef7 --- /dev/null +++ b/config/webhook/kustomization.yaml @@ -0,0 +1,20 @@ +# Copyright 2021 IBM Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +resources: + - manifests.yaml + - service.yaml + +configurations: + - kustomizeconfig.yaml diff --git a/config/webhook/kustomizeconfig.yaml b/config/webhook/kustomizeconfig.yaml new file mode 100644 index 00000000..487da1e6 --- /dev/null +++ b/config/webhook/kustomizeconfig.yaml @@ -0,0 +1,31 @@ +# Copyright 2021 IBM Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# the following config is for teaching kustomize where to look at when substituting vars. +# It requires kustomize v2.1.0 or newer to work properly. +nameReference: + - kind: Service + version: v1 + fieldSpecs: + - kind: ValidatingWebhookConfiguration + group: admissionregistration.k8s.io + path: webhooks/clientConfig/service/name + +namespace: + - kind: ValidatingWebhookConfiguration + group: admissionregistration.k8s.io + path: webhooks/clientConfig/service/namespace + create: true + +varReference: + - path: metadata/annotations diff --git a/config/webhook/manifests.yaml b/config/webhook/manifests.yaml new file mode 100644 index 00000000..5cd4b89d --- /dev/null +++ b/config/webhook/manifests.yaml @@ -0,0 +1,40 @@ +# Copyright 2021 IBM Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +apiVersion: admissionregistration.k8s.io/v1 +kind: ValidatingWebhookConfiguration +metadata: + name: servingruntime.serving.kserve.io +webhooks: + - admissionReviewVersions: + - v1 + clientConfig: + caBundle: Cg== + service: + name: modelmesh-webhook-server-service + path: /validate-serving-modelmesh-io-v1alpha1-servingruntime + port: 9443 + failurePolicy: Fail + name: servingruntime.modelmesh-webhook-server.default + rules: + - apiGroups: + - serving.kserve.io + apiVersions: + - v1alpha1 + operations: + - CREATE + - UPDATE + resources: + - clusterservingruntimes + - servingruntimes + sideEffects: None diff --git a/config/webhook/service.yaml b/config/webhook/service.yaml new file mode 100644 index 00000000..b1f4d3db --- /dev/null +++ b/config/webhook/service.yaml @@ -0,0 +1,25 @@ +# Copyright 2021 IBM Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +apiVersion: v1 +kind: Service +metadata: + name: modelmesh-webhook-server-service + namespace: system +spec: + ports: + - port: 9443 + protocol: TCP + targetPort: webhook + selector: + control-plane: modelmesh-controller diff --git a/controllers/autoscaler/autoscaler_reconciler.go b/controllers/autoscaler/autoscaler_reconciler.go new file mode 100644 index 00000000..95909b71 --- /dev/null +++ b/controllers/autoscaler/autoscaler_reconciler.go @@ -0,0 +1,129 @@ +//Copyright 2021 IBM Corporation +// +//Licensed under the Apache License, Version 2.0 (the "License"); +//you may not use this file except in compliance with the License. +//You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +//Unless required by applicable law or agreed to in writing, software +//distributed under the License is distributed on an "AS IS" BASIS, +//WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//See the License for the specific language governing permissions and +//limitations under the License. + +package autoscaler + +import ( + "fmt" + + "github.com/pkg/errors" + + kserveapi "github.com/kserve/kserve/pkg/apis/serving/v1alpha1" + "github.com/kserve/kserve/pkg/constants" + "github.com/kserve/modelmesh-serving/controllers/hpa" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" +) + +const ( + AutoscalerClassNone = "none" +) + +type Autoscaler struct { + AutoscalerClass constants.AutoscalerClassType + HPA *hpa.HPAReconciler +} + +// AutoscalerReconciler is the struct of Raw K8S Object +type AutoscalerReconciler struct { + client client.Client + scheme *runtime.Scheme + Autoscaler *Autoscaler +} + +func NewAutoscalerReconciler(client client.Client, + scheme *runtime.Scheme, + servingRuntime interface{}, mmDeploymentName string, mmNamespace string) (*AutoscalerReconciler, error) { + + as, err := createAutoscaler(client, scheme, servingRuntime, mmDeploymentName, mmNamespace) + if err != nil { + return nil, err + } + return &AutoscalerReconciler{ + client: client, + scheme: scheme, + Autoscaler: as, + }, err +} + +func getAutoscalerClass(metadata metav1.ObjectMeta) constants.AutoscalerClassType { + annotations := metadata.Annotations + if value, ok := annotations[constants.AutoscalerClass]; ok { + return constants.AutoscalerClassType(value) + } else { + return AutoscalerClassNone + } +} + +func createAutoscaler(client client.Client, + scheme *runtime.Scheme, servingRuntime interface{}, mmDeploymentName string, mmNamespace string) (*Autoscaler, error) { + var runtimeMeta metav1.ObjectMeta + isSR := false + + sr, ok := servingRuntime.(*kserveapi.ServingRuntime) + if ok { + runtimeMeta = sr.ObjectMeta + isSR = true + } + csr, ok := servingRuntime.(*kserveapi.ClusterServingRuntime) + if ok { + runtimeMeta = csr.ObjectMeta + } + + as := &Autoscaler{} + ac := getAutoscalerClass(runtimeMeta) + as.AutoscalerClass = ac + + switch ac { + case constants.AutoscalerClassHPA: + as.HPA = hpa.NewHPAReconciler(client, scheme, runtimeMeta, mmDeploymentName, mmNamespace) + if isSR { + if err := controllerutil.SetControllerReference(sr, as.HPA.HPA, scheme); err != nil { + return nil, fmt.Errorf("fails to set HPA owner reference for ServingRuntime: %w", err) + } + } else { + if err := controllerutil.SetControllerReference(csr, as.HPA.HPA, scheme); err != nil { + return nil, fmt.Errorf("fails to set HPA owner reference for ClusterServingRuntime: %w", err) + } + } + case AutoscalerClassNone: + // Set HPA reconciler even though AutoscalerClass is None to delete existing hpa + as.HPA = hpa.NewHPAReconciler(client, scheme, runtimeMeta, mmDeploymentName, mmNamespace) + return as, nil + default: + return nil, errors.New("unknown autoscaler class type.") + } + return as, nil +} + +// Reconcile ... +func (r *AutoscalerReconciler) Reconcile(scaleToZero bool) (*Autoscaler, error) { + //reconcile Autoscaler + //In the case of a new autoscaler plugin, it checks AutoscalerClassType + if r.Autoscaler.AutoscalerClass == constants.AutoscalerClassHPA || r.Autoscaler.AutoscalerClass == AutoscalerClassNone { + _, err := r.Autoscaler.HPA.Reconcile(scaleToZero) + if err != nil { + return nil, err + } + } + + if scaleToZero { + r.Autoscaler.HPA.HPA = nil + } + + return r.Autoscaler, nil +} diff --git a/controllers/autoscaler/autoscaler_reconciler_test.go b/controllers/autoscaler/autoscaler_reconciler_test.go new file mode 100644 index 00000000..8de8c1ba --- /dev/null +++ b/controllers/autoscaler/autoscaler_reconciler_test.go @@ -0,0 +1,72 @@ +//Copyright 2021 IBM Corporation +// +//Licensed under the Apache License, Version 2.0 (the "License"); +//you may not use this file except in compliance with the License. +//You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +//Unless required by applicable law or agreed to in writing, software +//distributed under the License is distributed on an "AS IS" BASIS, +//WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//See the License for the specific language governing permissions and +//limitations under the License. + +package autoscaler + +import ( + "testing" + + "github.com/google/go-cmp/cmp" + + "github.com/kserve/kserve/pkg/constants" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func TestGetAutoscalerClass(t *testing.T) { + servingRuntimeName := "my-model" + namespace := "test" + + testCases := []struct { + name string + servingRuntimeMetaData *metav1.ObjectMeta + expectedAutoScalerType constants.AutoscalerClassType + }{ + { + name: "Return default AutoScaler, if the autoscalerClass annotation is not set", + servingRuntimeMetaData: &metav1.ObjectMeta{ + Name: servingRuntimeName, + Namespace: namespace, + Annotations: map[string]string{}, + }, + expectedAutoScalerType: AutoscalerClassNone, + }, + { + name: "Return none AutoScaler, if the autoscalerClass annotation set none", + servingRuntimeMetaData: &metav1.ObjectMeta{ + Name: servingRuntimeName, + Namespace: namespace, + Annotations: map[string]string{"serving.kserve.io/autoscalerClass": "none"}, + }, + expectedAutoScalerType: AutoscalerClassNone, + }, + { + name: "Return hpa AutoScaler, if the autoscalerClass annotation set hpa", + servingRuntimeMetaData: &metav1.ObjectMeta{ + Name: servingRuntimeName, + Namespace: namespace, + Annotations: map[string]string{"serving.kserve.io/autoscalerClass": "hpa"}, + }, + expectedAutoScalerType: constants.AutoscalerClassHPA, + }, + } + + for _, tt := range testCases { + t.Run(tt.name, func(t *testing.T) { + result := getAutoscalerClass(*tt.servingRuntimeMetaData) + if diff := cmp.Diff(tt.expectedAutoScalerType, result); diff != "" { + t.Errorf("Test %q unexpected result (-want +got): %v", t.Name(), diff) + } + }) + } +} diff --git a/controllers/hpa/hpa_reconciler.go b/controllers/hpa/hpa_reconciler.go new file mode 100644 index 00000000..1158dd8e --- /dev/null +++ b/controllers/hpa/hpa_reconciler.go @@ -0,0 +1,193 @@ +//Copyright 2021 IBM Corporation +// +//Licensed under the Apache License, Version 2.0 (the "License"); +//you may not use this file except in compliance with the License. +//You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +//Unless required by applicable law or agreed to in writing, software +//distributed under the License is distributed on an "AS IS" BASIS, +//WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//See the License for the specific language governing permissions and +//limitations under the License. + +package hpa + +import ( + "context" + "strconv" + + "github.com/kserve/kserve/pkg/apis/serving/v1beta1" + "github.com/kserve/kserve/pkg/constants" + "github.com/kserve/kserve/pkg/utils" + mmcontstant "github.com/kserve/modelmesh-serving/pkg/constants" + v2beta2 "k8s.io/api/autoscaling/v2beta2" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/equality" + apierr "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + logf "sigs.k8s.io/controller-runtime/pkg/log" +) + +var log = logf.Log.WithName("HPAReconciler") + +// HPAReconciler is the struct of Raw K8S Object +type HPAReconciler struct { + client client.Client + scheme *runtime.Scheme + HPA *v2beta2.HorizontalPodAutoscaler +} + +func NewHPAReconciler(client client.Client, + scheme *runtime.Scheme, runtimeMeta metav1.ObjectMeta, mmDeploymentName string, mmNamespace string) *HPAReconciler { + return &HPAReconciler{ + client: client, + scheme: scheme, + HPA: createHPA(runtimeMeta, mmDeploymentName, mmNamespace), + } +} + +func getHPAMetrics(metadata metav1.ObjectMeta) []v2beta2.MetricSpec { + var metrics []v2beta2.MetricSpec + var utilization int32 = constants.DefaultCPUUtilization + + annotations := metadata.Annotations + resourceName := corev1.ResourceCPU + + if value, ok := annotations[constants.TargetUtilizationPercentage]; ok { + utilizationInt, _ := strconv.Atoi(value) + utilization = int32(utilizationInt) + } + + if value, ok := annotations[constants.AutoscalerMetrics]; ok { + resourceName = corev1.ResourceName(value) + } + + metricTarget := v2beta2.MetricTarget{ + Type: "Utilization", + AverageUtilization: &utilization, + } + + ms := v2beta2.MetricSpec{ + Type: v2beta2.ResourceMetricSourceType, + Resource: &v2beta2.ResourceMetricSource{ + Name: resourceName, + Target: metricTarget, + }, + } + + metrics = append(metrics, ms) + return metrics +} + +func createHPA(runtimeMeta metav1.ObjectMeta, mmDeploymentName string, mmNamespace string) *v2beta2.HorizontalPodAutoscaler { + minReplicas := int32(constants.DefaultMinReplicas) + maxReplicas := int32(constants.DefaultMinReplicas) + annotations := runtimeMeta.Annotations + + if value, ok := annotations[mmcontstant.MinScaleAnnotationKey]; ok { + minReplicasInt, _ := strconv.Atoi(value) + minReplicas = int32(minReplicasInt) + + } + if value, ok := annotations[mmcontstant.MaxScaleAnnotationKey]; ok { + maxReplicasInt, _ := strconv.Atoi(value) + maxReplicas = int32(maxReplicasInt) + } + + if maxReplicas < minReplicas { + maxReplicas = minReplicas + } + + metrics := getHPAMetrics(runtimeMeta) + + hpaObjectMeta := metav1.ObjectMeta{ + Name: mmDeploymentName, + Namespace: mmNamespace, + Labels: utils.Union(runtimeMeta.Labels, map[string]string{ + constants.InferenceServicePodLabelKey: runtimeMeta.Name, + constants.KServiceComponentLabel: string(v1beta1.PredictorComponent), + }), + Annotations: runtimeMeta.Annotations, + } + + hpa := &v2beta2.HorizontalPodAutoscaler{ + ObjectMeta: hpaObjectMeta, + Spec: v2beta2.HorizontalPodAutoscalerSpec{ + ScaleTargetRef: v2beta2.CrossVersionObjectReference{ + APIVersion: "apps/v1", + Kind: "Deployment", + Name: hpaObjectMeta.Name, + }, + MinReplicas: &minReplicas, + MaxReplicas: maxReplicas, + + Metrics: metrics, + Behavior: &v2beta2.HorizontalPodAutoscalerBehavior{}, + }, + } + return hpa +} + +// checkHPAExist checks if the hpa exists? +func (r *HPAReconciler) checkHPAExist(client client.Client) (constants.CheckResultType, *v2beta2.HorizontalPodAutoscaler, error) { + existingHPA := &v2beta2.HorizontalPodAutoscaler{} + err := client.Get(context.TODO(), types.NamespacedName{ + Namespace: r.HPA.ObjectMeta.Namespace, + Name: r.HPA.ObjectMeta.Name, + }, existingHPA) + if err != nil { + if apierr.IsNotFound(err) { + return constants.CheckResultCreate, nil, nil + } + return constants.CheckResultUnknown, nil, err + } + + //existed, check equivalent + if semanticHPAEquals(r.HPA, existingHPA) { + return constants.CheckResultExisted, existingHPA, nil + } + return constants.CheckResultUpdate, existingHPA, nil +} + +func semanticHPAEquals(desired, existing *v2beta2.HorizontalPodAutoscaler) bool { + return equality.Semantic.DeepEqual(desired.Spec.Metrics, existing.Spec.Metrics) && + equality.Semantic.DeepEqual(desired.Spec.MaxReplicas, existing.Spec.MaxReplicas) && + equality.Semantic.DeepEqual(*desired.Spec.MinReplicas, *existing.Spec.MinReplicas) +} + +// Reconcile ... +func (r *HPAReconciler) Reconcile(scaleToZero bool) (*v2beta2.HorizontalPodAutoscaler, error) { + //reconcile + checkResult, existingHPA, err := r.checkHPAExist(r.client) + log.Info("service reconcile", "checkResult", checkResult, "scaleToZero", scaleToZero, "err", err) + if err != nil { + return nil, err + } + + if checkResult == constants.CheckResultCreate && !scaleToZero { + if err = r.client.Create(context.TODO(), r.HPA); err != nil { + return nil, err + } + return r.HPA, nil + + } else if checkResult == constants.CheckResultUpdate { //CheckResultUpdate + if err = r.client.Update(context.TODO(), r.HPA); err != nil { + return nil, err + } + return r.HPA, nil + + } else if checkResult == constants.CheckResultExisted && scaleToZero { + // when scaleToZero is true, delete HPA if it exist + if err = r.client.Delete(context.TODO(), existingHPA, &client.DeleteOptions{}); err != nil { + return nil, err + } + return nil, nil + } else { + return existingHPA, nil + } +} diff --git a/controllers/hpa/hpa_reconciler_test.go b/controllers/hpa/hpa_reconciler_test.go new file mode 100644 index 00000000..bdb857e9 --- /dev/null +++ b/controllers/hpa/hpa_reconciler_test.go @@ -0,0 +1,157 @@ +//Copyright 2021 IBM Corporation +// +//Licensed under the Apache License, Version 2.0 (the "License"); +//you may not use this file except in compliance with the License. +//You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +//Unless required by applicable law or agreed to in writing, software +//distributed under the License is distributed on an "AS IS" BASIS, +//WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//See the License for the specific language governing permissions and +//limitations under the License. + +package hpa + +import ( + "fmt" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/kserve/kserve/pkg/constants" + mmcontstant "github.com/kserve/modelmesh-serving/pkg/constants" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func TestGetHPAMetrics(t *testing.T) { + servingRuntimeName := "my-model" + namespace := "test" + + testCases := []struct { + name string + servingRuntimeMetaData *metav1.ObjectMeta + expectedTargetUtilizationPercentage int32 + expectedAutoscalerMetrics corev1.ResourceName + }{ + { + name: "Check default HPAMetrics", + servingRuntimeMetaData: &metav1.ObjectMeta{ + Name: servingRuntimeName, + Namespace: namespace, + Annotations: map[string]string{}, + }, + expectedTargetUtilizationPercentage: int32(80), + expectedAutoscalerMetrics: corev1.ResourceName("cpu"), + }, + { + name: "Check HPAMetrics if annotations has " + constants.AutoscalerMetrics, + servingRuntimeMetaData: &metav1.ObjectMeta{ + Name: servingRuntimeName, + Namespace: namespace, + Annotations: map[string]string{constants.AutoscalerMetrics: "memory"}, + }, + expectedTargetUtilizationPercentage: int32(80), + expectedAutoscalerMetrics: corev1.ResourceName("memory"), + }, + { + name: "Check HPAMetrics if annotations has " + constants.TargetUtilizationPercentage, + servingRuntimeMetaData: &metav1.ObjectMeta{ + Name: servingRuntimeName, + Namespace: namespace, + Annotations: map[string]string{constants.TargetUtilizationPercentage: "50"}, + }, + expectedTargetUtilizationPercentage: int32(50), + expectedAutoscalerMetrics: corev1.ResourceName("cpu"), + }, + } + + for _, tt := range testCases { + t.Run(tt.name, func(t *testing.T) { + result := getHPAMetrics(*tt.servingRuntimeMetaData) + if diff := cmp.Diff(tt.expectedTargetUtilizationPercentage, *result[0].Resource.Target.AverageUtilization); diff != "" { + t.Errorf("Test %q unexpected result (-want +got): %v", t.Name(), diff) + } + if diff := cmp.Diff(tt.expectedAutoscalerMetrics, result[0].Resource.Name); diff != "" { + t.Errorf("Test %q unexpected result (-want +got): %v", t.Name(), diff) + } + }) + } +} + +func TestCreateHPA(t *testing.T) { + servingRuntimeName := "my-model" + namespace := "test" + deploymentName := fmt.Sprintf("%s-%s", servingRuntimeName, namespace) + + testCases := []struct { + name string + servingRuntimeMetaData *metav1.ObjectMeta + mmDeploymentName *string + mmNamespace *string + expectedMinReplicas int32 + expectedMaxReplicas int32 + }{ + { + name: "Check default HPA replicas", + servingRuntimeMetaData: &metav1.ObjectMeta{ + Name: servingRuntimeName, + Namespace: namespace, + Annotations: map[string]string{}, + }, + mmDeploymentName: &deploymentName, + mmNamespace: &namespace, + expectedMinReplicas: int32(1), + expectedMaxReplicas: int32(1), + }, + { + name: "Check HPA replicas if annotations has " + mmcontstant.MaxScaleAnnotationKey, + servingRuntimeMetaData: &metav1.ObjectMeta{ + Name: servingRuntimeName, + Namespace: namespace, + Annotations: map[string]string{mmcontstant.MaxScaleAnnotationKey: "2"}, + }, + mmDeploymentName: &deploymentName, + mmNamespace: &namespace, + expectedMinReplicas: int32(1), + expectedMaxReplicas: int32(2), + }, + { + name: "Check HPA replicas if annotations has " + mmcontstant.MinScaleAnnotationKey + ". max replicas should be the same as min replicas", + servingRuntimeMetaData: &metav1.ObjectMeta{ + Name: servingRuntimeName, + Namespace: namespace, + Annotations: map[string]string{mmcontstant.MinScaleAnnotationKey: "2"}, + }, + mmDeploymentName: &deploymentName, + mmNamespace: &namespace, + expectedMinReplicas: int32(2), + expectedMaxReplicas: int32(2), + }, + { + name: "Check HPA replicas if annotations set min/max replicas both", + servingRuntimeMetaData: &metav1.ObjectMeta{ + Name: servingRuntimeName, + Namespace: namespace, + Annotations: map[string]string{mmcontstant.MinScaleAnnotationKey: "2", mmcontstant.MaxScaleAnnotationKey: "3"}, + }, + mmDeploymentName: &deploymentName, + mmNamespace: &namespace, + expectedMinReplicas: int32(2), + expectedMaxReplicas: int32(3), + }, + } + + for _, tt := range testCases { + t.Run(tt.name, func(t *testing.T) { + hpa := createHPA(*tt.servingRuntimeMetaData, *tt.mmDeploymentName, *tt.mmNamespace) + if diff := cmp.Diff(tt.expectedMinReplicas, *hpa.Spec.MinReplicas); diff != "" { + t.Errorf("Test %q unexpected result (-want +got): %v", t.Name(), diff) + } + if diff := cmp.Diff(tt.expectedMaxReplicas, hpa.Spec.MaxReplicas); diff != "" { + t.Errorf("Test %q unexpected result (-want +got): %v", t.Name(), diff) + } + }) + } +} diff --git a/controllers/servingruntime_controller.go b/controllers/servingruntime_controller.go index 7fdefac8..adbfd46a 100644 --- a/controllers/servingruntime_controller.go +++ b/controllers/servingruntime_controller.go @@ -23,31 +23,30 @@ import ( "sync" "time" + "github.com/go-logr/logr" + mf "github.com/manifestival/manifestival" + + kserveapi "github.com/kserve/kserve/pkg/apis/serving/v1alpha1" "github.com/kserve/kserve/pkg/apis/serving/v1beta1" + api "github.com/kserve/modelmesh-serving/apis/serving/v1alpha1" + "github.com/kserve/modelmesh-serving/controllers/autoscaler" + "github.com/kserve/modelmesh-serving/controllers/modelmesh" "github.com/kserve/modelmesh-serving/pkg/config" - "github.com/kserve/modelmesh-serving/pkg/mmesh" "github.com/kserve/modelmesh-serving/pkg/predictor_source" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/event" - "sigs.k8s.io/controller-runtime/pkg/handler" "sigs.k8s.io/controller-runtime/pkg/reconcile" + "sigs.k8s.io/controller-runtime/pkg/source" - "github.com/go-logr/logr" - mf "github.com/manifestival/manifestival" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/source" - - kserveapi "github.com/kserve/kserve/pkg/apis/serving/v1alpha1" - api "github.com/kserve/modelmesh-serving/apis/serving/v1alpha1" - "github.com/kserve/modelmesh-serving/controllers/modelmesh" ) const ( @@ -184,6 +183,7 @@ func (r *ServingRuntimeReconciler) Reconcile(ctx context.Context, req ctrl.Reque // Reconcile this serving runtime rt := &kserveapi.ServingRuntime{} + crt := &kserveapi.ClusterServingRuntime{} var owner mf.Owner var spec *kserveapi.ServingRuntimeSpec @@ -197,7 +197,6 @@ func (r *ServingRuntimeReconciler) Reconcile(ctx context.Context, req ctrl.Reque return r.removeRuntimeFromInfoMap(req) } // try to find the runtime in cluster ServingRuntimes - crt := &kserveapi.ClusterServingRuntime{} if err = r.Client.Get(ctx, types.NamespacedName{Name: req.Name}, crt); err == nil { spec = &crt.Spec owner = crt @@ -270,11 +269,58 @@ func (r *ServingRuntimeReconciler) Reconcile(ctx context.Context, req ctrl.Reque return ctrl.Result{}, nil } + // At the moment, ModelMesh deployment name is the combined of ServingRuntime and deploymentObject name. + // TO-DO: refactor the mmDeploymentName to use mmDeployment object name. + mmDeploymentName := fmt.Sprintf("%s-%s", mmDeployment.ServiceName, mmDeployment.Name) + + var as *autoscaler.AutoscalerReconciler + if crt.GetName() != "" { + as, err = autoscaler.NewAutoscalerReconciler(r.Client, r.Client.Scheme(), crt, mmDeploymentName, mmDeployment.Namespace) + } else { + as, err = autoscaler.NewAutoscalerReconciler(r.Client, r.Client.Scheme(), rt, mmDeploymentName, mmDeployment.Namespace) + } + + if err != nil { + log.Error(err, "fails to create an autoscaler controller: %w", "skip to create HPA") + } + replicas, requeueDuration, err := r.determineReplicasAndRequeueDuration(ctx, log, cfg, spec, req.NamespacedName) if err != nil { return RequeueResult, fmt.Errorf("could not determine replicas: %w", err) } - mmDeployment.Replicas = replicas + + //ScaleToZero or None autoscaler case + if replicas == uint16(0) || as.Autoscaler.AutoscalerClass == autoscaler.AutoscalerClassNone { + mmDeployment.Replicas = replicas + if _, err = as.Reconcile(true); err != nil { + return ctrl.Result{}, fmt.Errorf("HPA reconcile error: %w", err) + } + } else { + //Autoscaler case + if as.Autoscaler != nil { + + // To-Do Skip changing replicas when the replicas of the runtime deployment is bigger than 0 + // Workaround - if deployment replica is 0, set HPA minReplicas. Else, it sets the same replicas of the deployment + existingDeployment := &appsv1.Deployment{} + if err = r.Client.Get(ctx, types.NamespacedName{ + Name: mmDeploymentName, + Namespace: req.Namespace, + }, existingDeployment); err != nil { + return ctrl.Result{}, fmt.Errorf("Could not get the deployment for the servingruntime : %w", err) + } + if *existingDeployment.Spec.Replicas == int32(0) { + mmDeployment.Replicas = uint16(*(as.Autoscaler.HPA.HPA).Spec.MinReplicas) + } else { + mmDeployment.Replicas = uint16(*(existingDeployment.Spec.Replicas)) + } + } + + //Create or Update HPA + if _, err = as.Reconcile(false); err != nil { + return ctrl.Result{}, fmt.Errorf("HPA reconcile error: %w", err) + } + } + if err = mmDeployment.Apply(ctx); err != nil { if errors.IsConflict(err) { // this can occur during normal operations if the deployment was updated diff --git a/docs/developer.md b/docs/developer.md index 524c97f1..8f4623b2 100644 --- a/docs/developer.md +++ b/docs/developer.md @@ -6,6 +6,8 @@ This document outlines some of the development practices with ModelMesh Serving. Local Kubernetes clusters can easily be set up using tools like [kind](https://kind.sigs.k8s.io/) and [minikube](https://minikube.sigs.k8s.io/docs/). +_(Note)_ Regarding webhook certificates, there are 2 options: `cert-manager` and `self-signed-certificate`. Please refer to [install help](install/install-script.md#installation-1). + For example, using `kind`: ```shell @@ -58,6 +60,16 @@ you will need to restart the controller pod. This can be done through the follow kubectl rollout restart deploy modelmesh-controller ``` +## Deploying a custom controller image + +If you have a custom controller image in your repository, set `MODELMESH_SERVING_IMAGE` to deploy it. The following example deploys the custom controller image `quay.io/$org/modelmesh-controller:custom` in the `modelmesh-serving` namespace with `fvt` dependencies: + +```shell +NAMESPACE=modelmesh-serving \ +MODELMESH_SERVING_IMAGE=quay.io/$org/modelmesh-controller:custom \ +make deploy-release-dev-mode-fvt +``` + ## Building the developer image A dockerized development environment is provided to help set up dependencies for testing, linting, and code generating. diff --git a/docs/install/install-script.md b/docs/install/install-script.md index fc33249d..1b525c3b 100644 --- a/docs/install/install-script.md +++ b/docs/install/install-script.md @@ -58,7 +58,7 @@ The `--quickstart` option can be specified to install and configure supporting d ```shell kubectl create namespace modelmesh-serving -./scripts/install.sh --namespace modelmesh-serving --quickstart +./scripts/install.sh --namespace modelmesh-serving --quickstart --enable-self-signed-ca ``` See the installation help below for detail: @@ -73,9 +73,11 @@ Flags: -d, --delete Delete any existing instances of ModelMesh Serving in Kube namespace before running install, including CRDs, RBACs, controller, older CRD with serving.kserve.io api group name, etc. -u, --user-namespaces Kubernetes namespaces to enable for ModelMesh Serving --quickstart Install and configure required supporting datastores in the same namespace (etcd and MinIO) - for experimentation/development - --fvt Install and configure required supporting datastores in the same namespace (etcd and MinIO) - for development with fvt enabled + --fvt Install and configure required supporting datastores in the same namespace (etcd and MinIO) and set `enable-self-signed-ca` - for development with fvt enabled -dev, --dev-mode-logging Enable dev mode logging (stacktraces on warning and no sampling) --namespace-scope-mode Run ModelMesh Serving in namespace scope mode + --modelmesh-serving-image Set a custom ModelMesh serving image + --enable-self-signed-ca Enable self-signed-ca, if the cluster doesn't have `cert-manager` installed Installs ModelMesh Serving CRDs, controller, and built-in runtimes into specified Kubernetes namespaces. @@ -92,6 +94,24 @@ The installation will create a secret named `storage-config` if it does not alre The `--namespace-scope-mode` will deploy `ServingRuntime`s confined to the same namespace, instead of the default cluster-scoped runtimes `ClusterServingRuntime`s. These serving runtimes are accessible to any user/namespace in the cluster. +You can optionally provide a custom ModelMesh Serving image with `--modelmesh-serving-image`. If not specified, it will pull the latest image. + +The ModelMesh controller uses a webhook that requires a certificate. We suggest using [cert-manager](https://github.com/cert-manager/cert-manager) to provision the certificates for the webhook server. Other solutions should also work as long as they put the certificates in the desired location. You can follow [the cert-manager documentation](https://cert-manager.io/docs/installation/) to install it. If you don't want to install `cert-manager`, use the `--enable-self-signed-ca` flag. It will execute a script to create a self-signed CA and patch it to the webhook config. + +- [cert-manager latest version](https://github.com/cert-manager/cert-manager/releases/latest) + + ```shell + CERT_MANAGER_VERSION="v1.11.0" # Use the latest version + + echo "Installing cert manager ..." + kubectl create namespace cert-manager + sleep 2 + kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/${CERT_MANAGER_VERSION}/cert-manager.yaml + + echo "Waiting for cert manager started ..." + kubectl wait --for=condition=ready pod -l 'app in (cert-manager,webhook)' --timeout=180s -n cert-manager + ``` + ## Setup additional namespaces To enable additional namespaces for ModelMesh after the initial installation, you need to add a label named `modelmesh-enabled`, and optionally setup the storage secret `storage-config` and built-in runtimes, in the user namespaces. diff --git a/docs/production-use/scaling.md b/docs/production-use/scaling.md index 1b2c8c9a..46c55ce7 100644 --- a/docs/production-use/scaling.md +++ b/docs/production-use/scaling.md @@ -14,3 +14,35 @@ Increasing the number of runtime replicas has two important effects: If a given `ServingRuntime` has no `InferenceService`s that it supports, the `Deployment` for that runtime can safely be scaled to 0 replicas to save on resources. By enabling `ScaleToZero` in the configuration, ModelMesh Serving will perform this scaling automatically. If an `InferenceService` is later added that requires the runtime, it will be scaled back up. To prevent unnecessary churn, the `ScaleToZero` behavior has a grace period that delays scaling down after the last `InferenceService` required by the runtime is deleted. If a new `InferenceService` is created in that window there will be no change to the scale. + +### Autoscaler + +In addition to the `ScaleToZero` to Zero feature, runtime pods can be autoscaled through HPA. This feature is disabled by default, but it can be enabled at any time by annotating each ServingRuntime/ClusterServingRuntime. +To enable the Autoscaler feature, add the following annotation. + +```shell +apiVersion: serving.kserve.io/v1alpha1 +kind: ServingRuntime +metadata: + annotations: + serving.kserve.io/autoscalerClass: hpa +``` + +Additional annotations: + +```shell +metadata: + annotations: + serving.kserve.io/autoscalerClass: hpa + serving.kserve.io/targetUtilizationPercentage: "75" + serving.kserve.io/metrics: "cpu" + serving.kserve.io/min-scale: "2" + serving.kserve.io/max-scale: "3" +``` + +You can disable the Autoscaler feature even if a runtime pod created based on that ServingRuntime is running. + +**NOTE** + +- If `serving.kserve.io/autoscalerClass: hpa` is not set, the other annotations will be ignored. +- If `ScaleToZero` is enabled and there are no `InferenceService`s, HPA will be deleted and the ServingRuntime deployment will be scaled down to 0. diff --git a/docs/quickstart.md b/docs/quickstart.md index 7eb52436..e0561cf8 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -8,6 +8,8 @@ To quickly get started using ModelMesh Serving, here is a brief guide. - [kubectl](https://kubernetes.io/docs/tasks/tools/#kubectl) and [kustomize](https://kubectl.docs.kubernetes.io/installation/kustomize/) (v3.2.0+) - At least 4 vCPU and 8 GB memory. For more details, please see [here](install/README.md#deployed-components). +_(Note)_ Regarding webhook certificates, there are 2 options: `cert-manager` and `self-signed-certificate`. Please refer to [install help](install/install-script.md#installation-1). + ## 1. Install ModelMesh Serving ### Get the latest release @@ -22,7 +24,7 @@ cd modelmesh-serving ```shell kubectl create namespace modelmesh-serving -./scripts/install.sh --namespace-scope-mode --namespace modelmesh-serving --quickstart +./scripts/install.sh --namespace-scope-mode --namespace modelmesh-serving --quickstart --enable-self-signed-ca ``` This will install ModelMesh Serving in the `modelmesh-serving` namespace, along with an etcd and MinIO instances. diff --git a/docs/release-process.md b/docs/release-process.md index 97af46c0..4822f260 100644 --- a/docs/release-process.md +++ b/docs/release-process.md @@ -141,7 +141,6 @@ with KServe. 1. Generate the release manifests on the `release-*` branch: ```Shell - kustomize build config/default > modelmesh.yaml kustomize build config/runtimes --load-restrictor LoadRestrictionsNone > modelmesh-runtimes.yaml cp config/dependencies/quickstart.yaml modelmesh-quickstart-dependencies.yaml ``` diff --git a/fvt/README.md b/fvt/README.md index ef557243..d6a9fe48 100644 --- a/fvt/README.md +++ b/fvt/README.md @@ -4,7 +4,7 @@ Functional Verification Test (FVT) suite for ModelMesh Serving using [Ginkgo](ht ## How the tests are structured -- The entry points for FVT suite are located in `predictor/predictor_suite_test.go` and `scaleToZero/scaleToZero_suite_test.go`. +- The entry points for FVT suite are located in `predictor/predictor_suite_test.go`, `scaleToZero/scaleToZero_suite_test.go` and `hpa/hpa_suite_test.go`. - Framework, utility, and helper functions for all suites are in the `fvt` package in this directory. - Manifests used to create predictors, inference services, and runtimes are in the `testdata` folder. diff --git a/fvt/fvtclient.go b/fvt/fvtclient.go index 97d50154..329072b1 100644 --- a/fvt/fvtclient.go +++ b/fvt/fvtclient.go @@ -39,6 +39,7 @@ import ( "google.golang.org/grpc/credentials" appsv1 "k8s.io/api/apps/v1" + hpav2beta2 "k8s.io/api/autoscaling/v2beta2" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" @@ -247,6 +248,11 @@ var ( Version: "v1", Resource: "pods", // this must be the plural form } + gvrHPA = schema.GroupVersionResource{ + Group: "autoscaling", + Version: "v2beta2", + Resource: "horizontalpodautoscalers", // this must be the plural form + } ) func (fvt *FVTClient) CreatePredictorExpectSuccess(resource *unstructured.Unstructured) *unstructured.Unstructured { @@ -284,6 +290,57 @@ func (fvt *FVTClient) ApplyPredictorExpectSuccess(predictor *unstructured.Unstru return obj } +func (fvt *FVTClient) ApplyServingRuntimeExpectSuccess(servingRuntime *unstructured.Unstructured) *unstructured.Unstructured { + // use server-side-apply with Patch + servingRuntime.SetManagedFields(nil) + patch, err := yaml.Marshal(servingRuntime) + Expect(err).ToNot(HaveOccurred()) + + obj, err := fvt.Resource(gvrRuntime).Namespace(fvt.namespace).Patch(context.TODO(), servingRuntime.GetName(), types.ApplyPatchType, patch, applyPatchOptions) + Expect(err).ToNot(HaveOccurred()) + Expect(obj).ToNot(BeNil()) + Expect(obj.GetKind()).To(Equal(ServingRuntimeKind)) + return obj +} + +func (fvt *FVTClient) ApplyClusterServingRuntimeExpectSuccess(clusterServingRuntime *unstructured.Unstructured) *unstructured.Unstructured { + // use server-side-apply with Patch + clusterServingRuntime.SetManagedFields(nil) + patch, err := yaml.Marshal(clusterServingRuntime) + Expect(err).ToNot(HaveOccurred()) + + obj, err := fvt.Resource(gvrCRuntime).Patch(context.TODO(), clusterServingRuntime.GetName(), types.ApplyPatchType, patch, applyPatchOptions) + Expect(err).ToNot(HaveOccurred()) + Expect(obj).ToNot(BeNil()) + Expect(obj.GetKind()).To(Equal(ClusterServingRuntimeKind)) + return obj +} + +func (fvt *FVTClient) SetServingRuntimeAnnotation(expectedRuntimeName string, annotations map[string]interface{}) { + fvt.log.Info("Set annotations for a runtime: "+expectedRuntimeName, "annotations", annotations) + + var srObject *unstructured.Unstructured + if NameSpaceScopeMode { + srObject = FVTClientInstance.GetServingRuntime(expectedRuntimeName) + } else { + srObject = FVTClientInstance.GetClusterServingRuntime(expectedRuntimeName) + } + + SetMap(srObject, annotations, "metadata", "annotations") + + if NameSpaceScopeMode { + FVTClientInstance.ApplyServingRuntimeExpectSuccess(srObject) + } else { + FVTClientInstance.ApplyClusterServingRuntimeExpectSuccess(srObject) + } +} + +func (fvt *FVTClient) GetClusterServingRuntime(name string) *unstructured.Unstructured { + obj, err := fvt.Resource(gvrCRuntime).Get(context.TODO(), name, metav1.GetOptions{}) + Expect(err).ToNot(HaveOccurred()) + return obj +} + func (fvt *FVTClient) GetServingRuntime(name string) *unstructured.Unstructured { obj, err := fvt.Resource(gvrRuntime).Namespace(fvt.namespace).Get(context.TODO(), name, metav1.GetOptions{}) Expect(err).ToNot(HaveOccurred()) @@ -382,6 +439,13 @@ func (fvt *FVTClient) PrintPredictors() { } } +func (fvt *FVTClient) PrintHPAs() { + err := fvt.RunKubectl("get", "hpa") + if err != nil { + fvt.log.Error(err, "Error running get hpa command") + } +} + func (fvt *FVTClient) PrintIsvcs() { err := fvt.RunKubectl("get", "inferenceservices") if err != nil { @@ -753,6 +817,24 @@ func (fvt *FVTClient) StartWatchingDeploys() watch.Interface { return deployWatcher } +func (fvt *FVTClient) ListHPAs() hpav2beta2.HorizontalPodAutoscalerList { + var err error + + listOptions := metav1.ListOptions{LabelSelector: "app.kubernetes.io/managed-by=modelmesh-controller", TimeoutSeconds: &DefaultTimeout} + u, err := fvt.Resource(gvrHPA).Namespace(fvt.namespace).List(context.TODO(), listOptions) + Expect(err).ToNot(HaveOccurred()) + + var hpaList hpav2beta2.HorizontalPodAutoscalerList + for _, uh := range u.Items { + var h hpav2beta2.HorizontalPodAutoscaler + err = runtime.DefaultUnstructuredConverter.FromUnstructured(uh.Object, &h) + Expect(err).ToNot(HaveOccurred()) + hpaList.Items = append(hpaList.Items, h) + } + + return hpaList +} + func (fvt *FVTClient) ListDeploys() appsv1.DeploymentList { var err error diff --git a/fvt/globals.go b/fvt/globals.go index e181993e..b4779b34 100644 --- a/fvt/globals.go +++ b/fvt/globals.go @@ -81,6 +81,7 @@ var MutualTLSConfig = map[string]interface{}{ const ( ServingRuntimeKind = "ServingRuntime" + ClusterServingRuntimeKind = "ClusterServingRuntime" PredictorKind = "Predictor" IsvcKind = "InferenceService" ConfigMapKind = "ConfigMap" diff --git a/fvt/helpers.go b/fvt/helpers.go index 84b4e6de..d978d610 100644 --- a/fvt/helpers.go +++ b/fvt/helpers.go @@ -437,6 +437,7 @@ func WaitForRuntimeDeploymentsToBeStable(timeToStabilize time.Duration, watcher } Expect(allReady).To(BeTrue(), fmt.Sprintf("Timed out before deployments were ready: %v", deploymentReady)) + } func logPredictorStatus(obj *unstructured.Unstructured) []interface{} { diff --git a/fvt/hpa/hpa_suite_test.go b/fvt/hpa/hpa_suite_test.go new file mode 100644 index 00000000..6f5aebd2 --- /dev/null +++ b/fvt/hpa/hpa_suite_test.go @@ -0,0 +1,118 @@ +// Copyright 2022 IBM Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package hpa + +import ( + "os" + "testing" + "time" + + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + + . "github.com/kserve/modelmesh-serving/fvt" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestHPASuite(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "HPA autoscaler suite") +} + +var _ = SynchronizedBeforeSuite(func() []byte { + // runs *only* on process #1 + return nil +}, func(_ []byte) { + // runs on *all* processes + Log = zap.New(zap.UseDevMode(true), zap.WriteTo(GinkgoWriter)) + Log.Info("Initializing test suite") + + namespace := os.Getenv("NAMESPACE") + if namespace == "" { + namespace = DefaultTestNamespace + } + serviceName := os.Getenv("SERVICENAME") + if serviceName == "" { + serviceName = DefaultTestServiceName + } + controllerNamespace := os.Getenv("CONTROLLERNAMESPACE") + if controllerNamespace == "" { + controllerNamespace = DefaultControllerNamespace + } + NameSpaceScopeMode = os.Getenv("NAMESPACESCOPEMODE") == "true" + Log.Info("Using environment variables", "NAMESPACE", namespace, "SERVICENAME", serviceName, + "CONTROLLERNAMESPACE", controllerNamespace, "NAMESPACESCOPEMODE", NameSpaceScopeMode) + + var err error + FVTClientInstance, err = GetFVTClient(Log, namespace, serviceName, controllerNamespace) + Expect(err).ToNot(HaveOccurred()) + Expect(FVTClientInstance).ToNot(BeNil()) + Log.Info("FVTClientInstance created", "client", FVTClientInstance) + + // confirm 3 cluster serving runtimes or serving runtimes + var list *unstructured.UnstructuredList + if NameSpaceScopeMode { + list, err = FVTClientInstance.ListServingRuntimes(metav1.ListOptions{}) + } else { + list, err = FVTClientInstance.ListClusterServingRuntimes(metav1.ListOptions{}) + } + Expect(err).ToNot(HaveOccurred()) + Expect(list.Items).To(HaveLen(4)) + + config := map[string]interface{}{ + "scaleToZero": map[string]interface{}{ + "enabled": true, + "gracePeriodSeconds": 5, + }, + "podsPerRuntime": 1, + } + FVTClientInstance.ApplyUserConfigMap(config) + + // cleanup any predictors and inference services if they exist + FVTClientInstance.DeleteAllPredictors() + FVTClientInstance.DeleteAllIsvcs() + + Log.Info("Setup completed") +}) + +var _ = SynchronizedAfterSuite(func() { + // runs on *all* processes + // ensure we cleanup any port-forward + FVTClientInstance.DisconnectFromModelServing() +}, func() { + // runs *only* on process #1 + // cleanup any predictors and inference services if they exist + FVTClientInstance.DeleteAllPredictors() + FVTClientInstance.DeleteAllIsvcs() +}) + +// register handlers for a failed test case to print info to the console +var startTime string +var _ = JustBeforeEach(func() { + startTime = time.Now().Format("2006-01-02T15:04:05Z") +}) +var _ = JustAfterEach(func() { + if CurrentSpecReport().Failed() { + FVTClientInstance.PrintPredictors() + FVTClientInstance.PrintIsvcs() + FVTClientInstance.PrintHPAs() + FVTClientInstance.PrintPods() + FVTClientInstance.PrintDescribeNodes() + FVTClientInstance.PrintEvents() + FVTClientInstance.TailPodLogs(startTime) + } +}) diff --git a/fvt/hpa/hpa_test.go b/fvt/hpa/hpa_test.go new file mode 100644 index 00000000..2fe632e0 --- /dev/null +++ b/fvt/hpa/hpa_test.go @@ -0,0 +1,249 @@ +// Copyright 2021 IBM Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package hpa + +import ( + "strings" + "time" + + "github.com/kserve/kserve/pkg/constants" + mmcontstant "github.com/kserve/modelmesh-serving/pkg/constants" + hpav2beta2 "k8s.io/api/autoscaling/v2beta2" + + . "github.com/kserve/modelmesh-serving/fvt" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" +) + +var _ = Describe("Scaling of runtime deployments with HPA Autoscaler", Ordered, func() { + // constants + testPredictorObject := NewPredictorForFVT("mlserver-sklearn-predictor.yaml") + // runtime expected to serve the test predictor + expectedRuntimeName := "mlserver-0.x" + + // checkDeploymentState returns the replicas value for the expected runtime + // and expects others to be scaled to zero + checkDeploymentState := func() int32 { + deployments := FVTClientInstance.ListDeploys() + var replicas int32 + for _, d := range deployments.Items { + Log.Info("Checking deployment scale", "name", d.ObjectMeta.Name) + // the service prefix may change + if strings.HasSuffix(d.ObjectMeta.Name, expectedRuntimeName) { + // since we list existing deploys Replicas should never be nil + replicas = *d.Spec.Replicas + } else { + Expect(*d.Spec.Replicas).To(BeEquivalentTo(int32(0))) + } + } + return replicas + } + expectScaledToTargetReplicas := func(targetReplicas int32) { + replicas := checkDeploymentState() + Expect(replicas).To(BeEquivalentTo(targetReplicas)) + } + + expectScaledToZero := func() { + replicas := checkDeploymentState() + Expect(replicas).To(BeEquivalentTo(int32(0))) + } + + checkHPAState := func() *hpav2beta2.HorizontalPodAutoscaler { + hpaList := FVTClientInstance.ListHPAs() + + var hpa *hpav2beta2.HorizontalPodAutoscaler + if len(hpaList.Items) == 0 { + hpa = nil + } else { + for _, d := range hpaList.Items { + Log.Info("Checking if HPA exist", "name", d.ObjectMeta.Name) + // the service prefix may change + if strings.HasSuffix(d.ObjectMeta.Name, expectedRuntimeName) { + hpa = &d + } + } + } + return hpa + } + + expectHPAExist := func(exist bool) { + hpa := checkHPAState() + if exist { + Expect(hpa).NotTo(BeNil()) + } else { + Expect(hpa).To(BeNil()) + } + } + + expectHPAMinReplicas := func(minReplicas int32) { + hpa := checkHPAState + Expect(*hpa().Spec.MinReplicas).To(Equal(minReplicas)) + } + + expectHPAMaxReplicas := func(maxReplicas int32) { + hpa := checkHPAState + Expect(hpa().Spec.MaxReplicas).To(Equal(maxReplicas)) + } + + expectHPATargetUtilizationPercentage := func(targetUtilizationPercentage int32) { + hpa := checkHPAState + Expect(*hpa().Spec.Metrics[0].Resource.Target.AverageUtilization).To(Equal(targetUtilizationPercentage)) + } + + expectHPAResourceName := func(resourceName corev1.ResourceName) { + hpa := checkHPAState + Expect(hpa().Spec.Metrics[0].Resource.Name).To(Equal(resourceName)) + } + + deployTestPredictorAndCheckDefaultHPA := func() { + CreatePredictorAndWaitAndExpectLoaded(testPredictorObject) + expectScaledToTargetReplicas(int32(constants.DefaultMinReplicas)) + + // check HPA object + expectHPAExist(true) + expectHPAMinReplicas(1) + expectHPAMaxReplicas(1) + expectHPATargetUtilizationPercentage(80) + expectHPAResourceName(corev1.ResourceCPU) + } + BeforeAll(func() { + srAnnotations := make(map[string]interface{}) + srAnnotations[constants.AutoscalerClass] = string(constants.AutoscalerClassHPA) + + FVTClientInstance.SetServingRuntimeAnnotation(expectedRuntimeName, srAnnotations) + }) + + BeforeEach(func() { + FVTClientInstance.DeleteAllPredictors() + // ensure a stable deploy state + WaitForStableActiveDeployState(10 * time.Second) + }) + + AfterAll(func() { + FVTClientInstance.DeleteAllPredictors() + + annotations := make(map[string]interface{}) + FVTClientInstance.SetServingRuntimeAnnotation(expectedRuntimeName, annotations) + }) + + Context("when there are no predictors", func() { + It("Scale all runtimes down", func() { + // check that all runtimes are scaled to zero + By("Check ScaleToZero and No HPA") + expectScaledToZero() + expectHPAExist(false) + }) + It("Scale all runtimes down after a created test predictor is deleted", func() { + By("Creating a test predictor for one Runtime") + deployTestPredictorAndCheckDefaultHPA() + + By("Delete all predictors") + FVTClientInstance.DeleteAllPredictors() + // ensure a stable deploy state + WaitForStableActiveDeployState(10 * time.Second) + + By("Check ScaleToZero and No HPA") + expectScaledToZero() + expectHPAExist(false) + }) + }) + Context("when there are predictors", func() { + It("Creating a predictor should create an HPA and scale up the runtime to minReplicas of HPA", func() { + By("Creating a test predictor for one Runtime") + deployTestPredictorAndCheckDefaultHPA() + }) + It("Scaleup/Scaledown and Change targetUtilizationPercentage by an annotation in ServingRuntime", func() { + By("Creating a test predictor for one Runtime") + deployTestPredictorAndCheckDefaultHPA() + + // ScaleUp Test + By("ScaleUp to min(2)/max(4): " + mmcontstant.MinScaleAnnotationKey) + By("Increase TargetUtilizationPercentage to 90: " + constants.TargetUtilizationPercentage) + By("Change Metrics to memory: " + constants.TargetUtilizationPercentage) + srAnnotationsScaleUp := make(map[string]interface{}) + srAnnotationsScaleUp[constants.AutoscalerClass] = string(constants.AutoscalerClassHPA) + srAnnotationsScaleUp[mmcontstant.MinScaleAnnotationKey] = "2" + srAnnotationsScaleUp[mmcontstant.MaxScaleAnnotationKey] = "4" + srAnnotationsScaleUp[constants.TargetUtilizationPercentage] = "90" + srAnnotationsScaleUp[constants.AutoscalerMetrics] = "memory" + + // set modified annotations + FVTClientInstance.SetServingRuntimeAnnotation(expectedRuntimeName, srAnnotationsScaleUp) + + // sleep to give time for changes to propagate to the deployment + time.Sleep(10 * time.Second) + WaitForStableActiveDeployState(time.Second * 30) + + // check that all runtimes except the one are scaled up to minimum replicas of HPA + expectScaledToTargetReplicas(2) + + // check HPA + expectHPAExist(true) + expectHPAMinReplicas(2) + expectHPAMaxReplicas(4) + expectHPATargetUtilizationPercentage(90) + expectHPAResourceName(corev1.ResourceMemory) + + // ScaleDown Test + By("ScaleDown to min(1)/max(1): " + mmcontstant.MinScaleAnnotationKey) + By("Decrease TargetUtilizationPercentage to 80: " + constants.TargetUtilizationPercentage) + By("Change Metrics to cpu: " + constants.TargetUtilizationPercentage) + srAnnotationsScaleDown := make(map[string]interface{}) + srAnnotationsScaleDown[constants.AutoscalerClass] = string(constants.AutoscalerClassHPA) + srAnnotationsScaleDown[mmcontstant.MinScaleAnnotationKey] = "1" + srAnnotationsScaleDown[mmcontstant.MaxScaleAnnotationKey] = "1" + srAnnotationsScaleDown[constants.TargetUtilizationPercentage] = "80" + srAnnotationsScaleDown[constants.AutoscalerMetrics] = "cpu" + + // set modified annotations + FVTClientInstance.SetServingRuntimeAnnotation(expectedRuntimeName, srAnnotationsScaleDown) + + // sleep to give time for changes to propagate to the deployment + time.Sleep(10 * time.Second) + WaitForStableActiveDeployState(time.Second * 30) + + // check that all runtimes except the one are scaled up to minimum replicas of HPA + expectScaledToTargetReplicas(1) + + // check HPA object + expectHPAExist(true) + expectHPAMinReplicas(1) + expectHPAMaxReplicas(1) + expectHPATargetUtilizationPercentage(80) + expectHPAResourceName(corev1.ResourceCPU) + }) + }) + // This test must be the last because it will remove hpa annotation from servingruntime/clusterservingruntime + Context("When the model does not need autoscaler anymore", func() { + It("Disable autoscaler", func() { + deployTestPredictorAndCheckDefaultHPA() + + // set modified annotations + By("Deleting this annotation: " + constants.AutoscalerClass) + srAnnotationsNone := make(map[string]interface{}) + FVTClientInstance.SetServingRuntimeAnnotation(expectedRuntimeName, srAnnotationsNone) + + // sleep to give time for changes to propagate to the deployment + time.Sleep(10 * time.Second) + WaitForStableActiveDeployState(time.Second * 30) + + // check that all runtimes except the one are scaled up to servingRuntime default replicas + expectScaledToTargetReplicas(1) + + // check if HPA deleted + expectHPAExist(false) + }) + }) +}) diff --git a/fvt/utils.go b/fvt/utils.go index efaedceb..82257e1f 100644 --- a/fvt/utils.go +++ b/fvt/utils.go @@ -122,6 +122,11 @@ func GetMap(obj *unstructured.Unstructured, fieldPath ...string) map[string]inte return value } +func SetMap(obj *unstructured.Unstructured, value map[string]interface{}, fieldPath ...string) { + err := unstructured.SetNestedMap(obj.Object, value, fieldPath...) + Expect(err).ToNot(HaveOccurred()) +} + func SetString(obj *unstructured.Unstructured, value string, fieldPath ...string) { err := unstructured.SetNestedField(obj.Object, value, fieldPath...) Expect(err).ToNot(HaveOccurred()) diff --git a/go.mod b/go.mod index 4ace4b1b..7e90c62a 100644 --- a/go.mod +++ b/go.mod @@ -14,6 +14,7 @@ require ( github.com/onsi/ginkgo/v2 v2.1.3 github.com/onsi/gomega v1.18.1 github.com/operator-framework/operator-lib v0.10.0 + github.com/pkg/errors v0.9.1 github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.55.0 github.com/spf13/viper v1.10.1 github.com/stretchr/testify v1.8.0 @@ -81,7 +82,6 @@ require ( github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect github.com/pelletier/go-toml v1.9.4 // indirect - github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/prometheus/client_golang v1.14.0 // indirect github.com/prometheus/client_model v0.3.0 // indirect diff --git a/main.go b/main.go index 84eafcea..fc3c071d 100644 --- a/main.go +++ b/main.go @@ -54,6 +54,8 @@ import ( "github.com/kserve/modelmesh-serving/controllers" "github.com/kserve/modelmesh-serving/controllers/modelmesh" "github.com/kserve/modelmesh-serving/pkg/mmesh" + + "sigs.k8s.io/controller-runtime/pkg/webhook" // +kubebuilder:scaffold:imports ) @@ -258,6 +260,15 @@ func main() { os.Exit(1) } + // Setup servingruntime validating webhook + hookServer := mgr.GetWebhookServer() + servingRuntimeWebhook := &webhook.Admission{ + Handler: &servingv1alpha1.ServingRuntimeWebhook{ + Client: mgr.GetClient(), + }, + } + hookServer.Register("/validate-serving-modelmesh-io-v1alpha1-servingruntime", servingRuntimeWebhook) + _, err = mmesh.InitGrpcResolver(ControllerNamespace, mgr) if err != nil { setupLog.Error(err, "Failed to Initialize Grpc Resolver, exit") diff --git a/pkg/constants/constants.go b/pkg/constants/constants.go new file mode 100644 index 00000000..6002a784 --- /dev/null +++ b/pkg/constants/constants.go @@ -0,0 +1,21 @@ +// Copyright 2021 IBM Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package constants + +import "github.com/kserve/kserve/pkg/constants" + +var ( + MinScaleAnnotationKey = constants.KServeAPIGroupName + "/min-scale" + MaxScaleAnnotationKey = constants.KServeAPIGroupName + "/max-scale" +) diff --git a/scripts/install.sh b/scripts/install.sh index f4662677..f5c65ef0 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -26,6 +26,8 @@ quickstart=false fvt=false user_ns_array= namespace_scope_mode=false # change to true to run in namespace scope +modelmesh_serving_image= +enable_self_signed_ca=false function showHelp() { echo "usage: $0 [flags]" @@ -39,6 +41,8 @@ function showHelp() { echo " --fvt Install and configure required supporting datastores in the same namespace (etcd and MinIO) - for development with fvt enabled" echo " -dev, --dev-mode-logging Enable dev mode logging (stacktraces on warning and no sampling)" echo " --namespace-scope-mode Run ModelMesh Serving in namespace scope mode" + echo " --modelmesh-serving-image Set a custom modelmesh serving image" + echo " --enable-self-signed-ca Enable self-signed-ca, if you don't have cert-manager in the cluster" echo echo "Installs ModelMesh Serving CRDs, controller, and built-in runtimes into specified" echo "Kubernetes namespaces." @@ -166,10 +170,18 @@ while (($# > 0)); do ;; --fvt) fvt=true + enable_self_signed_ca=true ;; --namespace-scope-mode) namespace_scope_mode=true ;; + --modelmesh-serving-image) + shift + modelmesh_serving_image="$1" + ;; + --enable-self-signed-ca ) + enable_self_signed_ca=true + ;; -*) die "Unknown option: '${1}'" ;; @@ -290,6 +302,41 @@ else fi info "Installing ModelMesh Serving CRDs and controller" +if [[ -n $modelmesh_serving_image ]]; then + info "Custom ModelMesh Serving Image: $modelmesh_serving_image" + if [[ ! -f manager/kustomization.yaml.ori ]]; then + cp manager/kustomization.yaml manager/kustomization.yaml.ori + fi + cd manager; kustomize edit set image modelmesh-controller=${modelmesh_serving_image}; cd ../ +fi + +if [[ $enable_self_signed_ca == "true" ]]; then + info "Enabled Self Signed CA: Update manifest" + if [[ ! -f certmanager/kustomization.yaml.ori ]]; then + cp certmanager/kustomization.yaml certmanager/kustomization.yaml.ori + fi + cd certmanager; kustomize edit remove resource certificate.yaml; cd ../ + + if [[ ! -f default/kustomization.yaml.ori ]]; then + cp default/kustomization.yaml default/kustomization.yaml.ori + fi + cd default; kustomize edit remove resource ../certmanager; cd ../ + + # comment out vars + configMapGeneratorStartLine=$(grep -n configMapGenerator ./default/kustomization.yaml |cut -d':' -f1) + configMapGeneratorBeforeLine=$((configMapGeneratorStartLine-1)) + sed "1,${configMapGeneratorBeforeLine}s/^/#/g" -i default/kustomization.yaml + + # remove webhookcainjection_patch.yaml + sed 's+- webhookcainjection_patch.yaml++g' -i default/kustomization.yaml + + # create dummy secret 'modelmesh-webhook-server-cert' + secretExist=$(kubectl get secret modelmesh-webhook-server-cert --ignore-not-found|wc -l) + if [[ $secretExist == 0 ]]; then + kubectl create secret generic modelmesh-webhook-server-cert + fi +fi + kustomize build default | kubectl apply -f - if [[ $dev_mode_logging == "true" ]]; then @@ -305,7 +352,26 @@ if [[ $namespace_scope_mode == "true" ]]; then rm crd/kustomization.yaml.bak fi -info "Waiting for ModelMesh Serving controller pod to be up ..." +if [[ -n $modelmesh_serving_image ]]; then + cp manager/kustomization.yaml.ori manager/kustomization.yaml + rm manager/kustomization.yaml.ori +fi + +if [[ $enable_self_signed_ca == "true" ]]; then + cp certmanager/kustomization.yaml.ori certmanager/kustomization.yaml + cp default/kustomization.yaml.ori default/kustomization.yaml + rm certmanager/kustomization.yaml.ori default/kustomization.yaml.ori + + info "Enabled Self Signed CA: Generate certificates and restart controller" + + # Delete dummy secret for webhook server + kubectl delete secret modelmesh-webhook-server-cert + + ../scripts/self-signed-ca.sh --namespace $namespace + +fi + +info "Waiting for ModelMesh Serving controller pod to be up..." wait_for_pods_ready "-l control-plane=modelmesh-controller" # Older versions of kustomize have different load restrictor flag formats. diff --git a/scripts/self-signed-ca.sh b/scripts/self-signed-ca.sh new file mode 100755 index 00000000..de104cd9 --- /dev/null +++ b/scripts/self-signed-ca.sh @@ -0,0 +1,169 @@ +#!/usr/bin/env bash +# Copyright 2021 IBM Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License.# + +# Install ModelMesh Serving CRDs, controller, and built-in runtimes into specified Kubernetes namespaces. +# Expect cluster-admin authority and Kube cluster access to be configured prior to running. + +set -e + +usage() { + cat <> ${tmpdir}/csr.conf +[req] +req_extensions = v3_req +distinguished_name = req_distinguished_name +[req_distinguished_name] +[ v3_req ] +basicConstraints = CA:FALSE +keyUsage = nonRepudiation, digitalSignature, keyEncipherment +extendedKeyUsage = serverAuth +subjectAltName = @alt_names +[alt_names] +DNS.1 = ${service} +DNS.2 = ${service}.${namespace} +DNS.3 = ${service}.${namespace}.svc +DNS.4 = ${service}.${namespace}.svc.cluster +DNS.5 = ${service}.${namespace}.svc.cluster.local + +EOF +# Create CA and Server key/certificate +openssl genrsa -out ${tmpdir}/ca.key 2048 +openssl req -x509 -newkey rsa:2048 -key ${tmpdir}/ca.key -out ${tmpdir}/ca.crt -days 365 -nodes -subj "/CN=${service}.${namespace}.svc" + +openssl genrsa -out ${tmpdir}/server.key 2048 +openssl req -new -key ${tmpdir}/server.key -subj "/CN=${service}.${namespace}.svc" -out ${tmpdir}/server.csr -config ${tmpdir}/csr.conf + +# Self sign +openssl x509 -extensions v3_req -req -days 365 -in ${tmpdir}/server.csr -CA ${tmpdir}/ca.crt -CAkey ${tmpdir}/ca.key -CAcreateserial -out ${tmpdir}/server.crt -extfile ${tmpdir}/csr.conf +# create the secret with server cert/key +kubectl create secret generic ${secret} \ + --from-file=tls.key=${tmpdir}/server.key \ + --from-file=tls.crt=${tmpdir}/server.crt \ + --dry-run -o yaml | + kubectl -n ${namespace} apply -f - +# Webhook pod needs to be restarted so that the service reload the secret +# http://github.com/kueflow/kubeflow/issues/3227 +webhookPod=$(kubectl get pods -n ${namespace} |grep ${webhookDeploymentName} |awk '{print $1;}') +# ignore error if webhook pod does not exist +kubectl delete pod ${webhookPod} -n ${namespace} 2>/dev/null || true +echo "webhook ${webhookPod} is restarted to utilize the new secret" + +echo "CA Certificate:" +cat ${tmpdir}/ca.crt + +# -a means base64 encode +caBundle=$(cat ${tmpdir}/ca.crt | openssl enc -a -A) +echo "Encoded CA:" +echo -e "${caBundle} \n" + +# check if jq is installed +if [ ! -x "$(command -v jq)" ]; then + echo "jq not found" + exit 1 +fi +# # Patch CA Certificate to mutatingWebhook +# mutatingWebhookCount=$(kubectl get mutatingwebhookconfiguration ${webhookConfigName} -ojson | jq -r '.webhooks' | jq length) +# # build patchstring based on webhook counts +# mutatingPatchString='[' +# for i in $(seq 0 $(($mutatingWebhookCount-1))) +# do +# mutatingPatchString=$mutatingPatchString'{"op": "replace", "path": "/webhooks/'$i'/clientConfig/caBundle", "value":"{{CA_BUNDLE}}"}, ' +# done +# # strip ', ' +# mutatingPatchString=${mutatingPatchString%, }']' +# mutatingPatchString=$(echo ${mutatingPatchString} | sed "s|{{CA_BUNDLE}}|${caBundle}|g") + +# echo "patching ca bundle for mutating webhook configuration..." +# kubectl patch mutatingwebhookconfiguration ${webhookConfigName} \ +# --type='json' -p="${mutatingPatchString}" + +# Patch CA Certificate to validatingWebhook +validatingWebhookCount=$(kubectl get validatingwebhookconfiguration ${webhookConfigName} -ojson | jq -r '.webhooks' | jq length) +validatingPatchString='[' +for i in $(seq 0 $(($validatingWebhookCount-1))) +do + validatingPatchString=$validatingPatchString'{"op": "replace", "path": "/webhooks/'$i'/clientConfig/caBundle", "value":"{{CA_BUNDLE}}"}, ' +done +validatingPatchString=${validatingPatchString%, }']' +validatingPatchString=$(echo ${validatingPatchString} | sed "s|{{CA_BUNDLE}}|${caBundle}|g") + +echo "patching ca bundle for validating webhook configuration..." +kubectl patch validatingwebhookconfiguration ${webhookConfigName} \ + --type='json' -p="${validatingPatchString}"