diff --git a/.github/workflows/run-fvt.yml b/.github/workflows/run-fvt.yml index 69c2634d6..ddc13024c 100644 --- a/.github/workflows/run-fvt.yml +++ b/.github/workflows/run-fvt.yml @@ -66,6 +66,7 @@ jobs: ./scripts/install.sh --namespace modelmesh-serving --fvt --dev-mode-logging - name: Free up disk space run: | + eval $(minikube -p minikube docker-env) echo "Pruning images" docker image prune -a -f docker system df @@ -82,6 +83,7 @@ jobs: docker pull kserve/modelmesh - name: Check installation run: | + eval $(minikube -p minikube docker-env) docker images kubectl get pods kubectl get clusterservingruntimes @@ -91,4 +93,4 @@ jobs: export PATH=/root/go/bin/:$PATH export NAMESPACE=modelmesh-serving export NAMESPACESCOPEMODE=false - make fvt \ No newline at end of file + make fvt diff --git a/.gitignore b/.gitignore index ebda98b64..a5e668181 100644 --- a/.gitignore +++ b/.gitignore @@ -33,4 +33,4 @@ bin # Modelmesh development related artifacts devbuild .develop_image_name -.dev/ \ No newline at end of file +.dev/ diff --git a/apis/serving/v1alpha1/servingruntime_webhook.go b/apis/serving/v1alpha1/servingruntime_webhook.go index a1890a1e4..3303f2901 100644 --- a/apis/serving/v1alpha1/servingruntime_webhook.go +++ b/apis/serving/v1alpha1/servingruntime_webhook.go @@ -17,12 +17,14 @@ package v1alpha1 import ( "context" "fmt" + "math" "net/http" "strconv" kservev1alpha "github.com/kserve/kserve/pkg/apis/serving/v1alpha1" "github.com/kserve/kserve/pkg/constants" "github.com/kserve/modelmesh-serving/controllers/autoscaler" + mmcontstant "github.com/kserve/modelmesh-serving/pkg/constants" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/webhook/admission" ) @@ -35,7 +37,8 @@ type ServingRuntimeWebhook struct { func (s *ServingRuntimeWebhook) Handle(ctx context.Context, req admission.Request) admission.Response { var srAnnotations map[string]string - srReplicas := uint16(65535) + srReplicas := uint16(math.MaxUint16) + multiModel := false if req.Kind.Kind == "ServingRuntime" { servingRuntime := &kservev1alpha.ServingRuntime{} @@ -44,9 +47,15 @@ func (s *ServingRuntimeWebhook) Handle(ctx context.Context, req admission.Reques return admission.Errored(http.StatusBadRequest, err) } srAnnotations = servingRuntime.ObjectMeta.Annotations + if (*servingRuntime).Spec.Replicas != nil { srReplicas = uint16(*servingRuntime.Spec.Replicas) } + + if (*servingRuntime).Spec.MultiModel != nil { + multiModel = *servingRuntime.Spec.MultiModel + } + } else { clusterServingRuntime := &kservev1alpha.ClusterServingRuntime{} err := s.decoder.Decode(req, clusterServingRuntime) @@ -54,9 +63,18 @@ func (s *ServingRuntimeWebhook) Handle(ctx context.Context, req admission.Reques return admission.Errored(http.StatusBadRequest, err) } srAnnotations = clusterServingRuntime.ObjectMeta.Annotations + if (*clusterServingRuntime).Spec.Replicas != nil { srReplicas = uint16(*clusterServingRuntime.Spec.Replicas) } + + if (*clusterServingRuntime).Spec.MultiModel != nil { + multiModel = *clusterServingRuntime.Spec.MultiModel + } + } + + if !multiModel { + return admission.Allowed("Not validating ServingRuntime because it is not ModelMesh compatible") } if err := validateServingRuntimeAutoscaler(srAnnotations); err != nil { @@ -130,7 +148,7 @@ func validateAutoScalingReplicas(annotations map[string]string, srReplicas uint1 switch autoscalerClassType { case string(constants.AutoscalerClassHPA): - if srReplicas != 65535 { + if srReplicas != math.MaxUint16 { return fmt.Errorf("Autoscaler is enabled and also replicas variable set. You can not set both.") } return validateScalingHPA(annotations) @@ -146,7 +164,7 @@ func validateScalingHPA(annotations map[string]string) error { } minReplicas := 1 - if value, ok := annotations[constants.MinScaleAnnotationKey]; ok { + if value, ok := annotations[mmcontstant.MinScaleAnnotationKey]; ok { if valueInt, err := strconv.Atoi(value); err != nil { return fmt.Errorf("The min replicas should be a integer.") } else if valueInt < 1 { @@ -157,7 +175,7 @@ func validateScalingHPA(annotations map[string]string) error { } maxReplicas := 1 - if value, ok := annotations[constants.MaxScaleAnnotationKey]; ok { + if value, ok := annotations[mmcontstant.MaxScaleAnnotationKey]; ok { if valueInt, err := strconv.Atoi(value); err != nil { return fmt.Errorf("The max replicas should be a integer.") } else { diff --git a/apis/serving/v1alpha1/servingruntime_webhook_test.go b/apis/serving/v1alpha1/servingruntime_webhook_test.go index 09f3eeb14..27e4b9bc4 100644 --- a/apis/serving/v1alpha1/servingruntime_webhook_test.go +++ b/apis/serving/v1alpha1/servingruntime_webhook_test.go @@ -16,8 +16,7 @@ limitations under the License. package v1alpha1 import ( - // "fmt" - + "math" "testing" "github.com/onsi/gomega" @@ -25,6 +24,7 @@ import ( kservev1alpha "github.com/kserve/kserve/pkg/apis/serving/v1alpha1" "github.com/kserve/kserve/pkg/constants" + mmcontstant "github.com/kserve/modelmesh-serving/pkg/constants" ) func makeTestRawServingRuntime() kservev1alpha.ServingRuntime { @@ -36,8 +36,8 @@ func makeTestRawServingRuntime() kservev1alpha.ServingRuntime { "serving.kserve.io/autoscalerClass": "hpa", "serving.kserve.io/metrics": "cpu", "serving.kserve.io/targetUtilizationPercentage": "75", - "autoscaling.knative.dev/min-scale": "2", - "autoscaling.knative.dev/max-scale": "3", + "serving.kserve.io/min-scale": "2", + "serving.kserve.io/max-scale": "3", }, }, } @@ -74,16 +74,16 @@ func TestInvalidAutoscalerTargetUtilizationPercentageHighValue(t *testing.T) { func TestInvalidAutoscalerLowMinReplicas(t *testing.T) { g := gomega.NewGomegaWithT(t) sr := makeTestRawServingRuntime() - sr.ObjectMeta.Annotations[constants.MinScaleAnnotationKey] = "0" + sr.ObjectMeta.Annotations[mmcontstant.MinScaleAnnotationKey] = "0" g.Expect(validateScalingHPA(sr.Annotations)).ShouldNot(gomega.Succeed()) } func TestInvalidAutoscalerMaxReplicasMustBiggerThanMixReplicas(t *testing.T) { g := gomega.NewGomegaWithT(t) sr := makeTestRawServingRuntime() - sr.ObjectMeta.Annotations[constants.MinScaleAnnotationKey] = "4" - sr.ObjectMeta.Annotations[constants.MaxScaleAnnotationKey] = "3" - g.Expect(validateAutoScalingReplicas(sr.Annotations, 65535)).ShouldNot(gomega.Succeed()) + sr.ObjectMeta.Annotations[mmcontstant.MinScaleAnnotationKey] = "4" + sr.ObjectMeta.Annotations[mmcontstant.MaxScaleAnnotationKey] = "3" + g.Expect(validateAutoScalingReplicas(sr.Annotations, math.MaxUint16)).ShouldNot(gomega.Succeed()) } func TestDuplicatedReplicas(t *testing.T) { g := gomega.NewGomegaWithT(t) diff --git a/apis/serving/v1alpha1/zz_generated.deepcopy.go b/apis/serving/v1alpha1/zz_generated.deepcopy.go index 73421bf5b..c2c0e74ff 100644 --- a/apis/serving/v1alpha1/zz_generated.deepcopy.go +++ b/apis/serving/v1alpha1/zz_generated.deepcopy.go @@ -260,6 +260,21 @@ func (in *S3StorageSource) DeepCopy() *S3StorageSource { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ServingRuntimeWebhook) DeepCopyInto(out *ServingRuntimeWebhook) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServingRuntimeWebhook. +func (in *ServingRuntimeWebhook) DeepCopy() *ServingRuntimeWebhook { + if in == nil { + return nil + } + out := new(ServingRuntimeWebhook) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Storage) DeepCopyInto(out *Storage) { *out = *in diff --git a/config/certmanager/certificate.yaml b/config/certmanager/certificate.yaml index 100bd5f5a..5782dfe35 100644 --- a/config/certmanager/certificate.yaml +++ b/config/certmanager/certificate.yaml @@ -26,13 +26,13 @@ spec: apiVersion: cert-manager.io/v1 kind: Certificate metadata: - name: serving-cert # this name should match the one appeared in kustomizeconfig.yaml + name: modelmesh-webhook-server-cert # this name should match the one appeared in kustomizeconfig.yaml namespace: system spec: # SERVICE_NAME_PLACEHOLDER and SERVICE_NAMESPACE_PLACEHOLDER will be substituted by kustomize dnsNames: - - SERVICE_NAME_PLACEHOLDER.SERVICE_NAMESPACE_PLACEHOLDER.svc - - SERVICE_NAME_PLACEHOLDER.SERVICE_NAMESPACE_PLACEHOLDER.svc.cluster.local + - $(SERVICE_NAME_PLACEHOLDER).$(SERVICE_NAMESPACE_PLACEHOLDER).svc + - $(SERVICE_NAME_PLACEHOLDER).$(SERVICE_NAMESPACE_PLACEHOLDER).svc.cluster.local issuerRef: kind: Issuer name: selfsigned-issuer diff --git a/config/default/kustomization.yaml b/config/default/kustomization.yaml index c22359ada..86d1622d0 100644 --- a/config/default/kustomization.yaml +++ b/config/default/kustomization.yaml @@ -11,111 +11,36 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# Adds namespace to all resources. -#namespace: model-serving -# Value of this field is prepended to the -# names of all resources, e.g. a deployment named -# "wordpress" becomes "alices-wordpress". -# Note that it should also match with the prefix (text before '-') of the namespace -# field above. -#namePrefix: model-serving- - -# Labels to add to all resources and selectors. -#commonLabels: -# someName: someValue - -# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in -# crd/kustomization.yaml -#- ../webhook -# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. 'WEBHOOK' components are required. -#- ../certmanager -# [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'. -#- ../prometheus - -#patchesStrategicMerge: -# Protect the /metrics endpoint by putting it behind auth. -# If you want your controller-manager to expose the /metrics -# endpoint w/o any authn/z, please comment the following line. -#- manager_auth_proxy_patch.yaml - -# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in -# crd/kustomization.yaml -#- manager_webhook_patch.yaml - -# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. -# Uncomment 'CERTMANAGER' sections in crd/kustomization.yaml to enable the CA injection in the admission webhooks. -# 'CERTMANAGER' needs to be enabled to use ca injection -#- webhookcainjection_patch.yaml - -# the following config is for teaching kustomize how to do var substitution -replacements: - - source: +vars: + - fieldref: fieldPath: metadata.namespace + name: CERTIFICATE_NAMESPACE_PLACEHOLDER + objref: + group: cert-manager.io kind: Certificate name: serving-cert - - source: + version: v1 + - fieldref: {} + name: CERTIFICATE_NAME_PLACEHOLDER + objref: + group: cert-manager.io kind: Certificate name: serving-cert - - source: + version: v1 + - fieldref: fieldPath: metadata.namespace + name: SERVICE_NAMESPACE_PLACEHOLDER + objref: kind: Service name: modelmesh-webhook-server-service - targets: - - fieldPaths: - - |- - spec.# $(SERVICE_NAME) and $(SERVICE_NAMESPACE) will be substituted by kustomize - dnsNames.0 - options: - delimiter: . - index: 1 - select: - group: cert-manager.io - kind: Certificate - name: serving-cert - namespace: system - version: v1 - - fieldPaths: - - |- - spec.# $(SERVICE_NAME) and $(SERVICE_NAMESPACE) will be substituted by kustomize - dnsNames.1 - options: - delimiter: . - index: 1 - select: - group: cert-manager.io - kind: Certificate - name: serving-cert - namespace: system - version: v1 - - source: + version: v1 + - fieldref: {} + name: SERVICE_NAME_PLACEHOLDER + objref: kind: Service name: modelmesh-webhook-server-service - targets: - - fieldPaths: - - |- - spec.# $(SERVICE_NAME) and SERVICE_NAMESPACE_PLACEHOLDER will be substituted by kustomize - dnsNames.0 - options: - delimiter: . - select: - group: cert-manager.io - kind: Certificate - name: serving-cert - namespace: system - version: v1 - - fieldPaths: - - |- - spec.# $(SERVICE_NAME) and SERVICE_NAMESPACE_PLACEHOLDER will be substituted by kustomize - dnsNames.1 - options: - delimiter: . - select: - group: cert-manager.io - kind: Certificate - name: serving-cert - namespace: system - version: v1 + version: v1 configMapGenerator: - files: @@ -135,7 +60,6 @@ resources: - ../webhook - ../certmanager -patchesStrategicMerge: - - manager_webhook_patch.yaml - - webhookcainjection_patch.yaml -namespace: modelmesh-serving +patches: + - path: manager_webhook_patch.yaml + - path: webhookcainjection_patch.yaml diff --git a/config/default/webhookcainjection_patch.yaml b/config/default/webhookcainjection_patch.yaml index e05ef4e8d..33e8bcba4 100644 --- a/config/default/webhookcainjection_patch.yaml +++ b/config/default/webhookcainjection_patch.yaml @@ -12,10 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. # This patch add annotation to admission webhook config and -# the variables $(CERTIFICATE_NAMESPACE) and $(CERTIFICATE_NAME) will be substituted by kustomize. +# the string CERTIFICATE_NAMESPACE_PLACEHOLDER and CERTIFICATE_NAME_PLACEHOLDER will be replaced by kustomize. apiVersion: admissionregistration.k8s.io/v1 kind: ValidatingWebhookConfiguration metadata: name: servingruntime.serving.kserve.io annotations: - cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME) + cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE_PLACEHOLDER)/$(CERTIFICATE_NAME_PLACEHOLDER) diff --git a/config/rbac/cluster-scope/kustomization.yaml b/config/rbac/cluster-scope/kustomization.yaml index 29e47e5e7..53af06455 100644 --- a/config/rbac/cluster-scope/kustomization.yaml +++ b/config/rbac/cluster-scope/kustomization.yaml @@ -17,4 +17,3 @@ resources: - role_binding.yaml apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization -namespace: modelmesh-serving diff --git a/config/rbac/namespace-scope/kustomization.yaml b/config/rbac/namespace-scope/kustomization.yaml index 29e47e5e7..53af06455 100644 --- a/config/rbac/namespace-scope/kustomization.yaml +++ b/config/rbac/namespace-scope/kustomization.yaml @@ -17,4 +17,3 @@ resources: - role_binding.yaml apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization -namespace: modelmesh-serving diff --git a/config/webhook/kustomization.yaml b/config/webhook/kustomization.yaml index 9b1813655..79e0fef7d 100644 --- a/config/webhook/kustomization.yaml +++ b/config/webhook/kustomization.yaml @@ -1,10 +1,20 @@ +# Copyright 2021 IBM Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. --- resources: - manifests.yaml - service.yaml -# Adds namespace to all resources. -namespace: modelmesh-serving - configurations: - kustomizeconfig.yaml diff --git a/config/webhook/kustomizeconfig.yaml b/config/webhook/kustomizeconfig.yaml index 919027219..487da1e65 100644 --- a/config/webhook/kustomizeconfig.yaml +++ b/config/webhook/kustomizeconfig.yaml @@ -1,3 +1,16 @@ +# Copyright 2021 IBM Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. # the following config is for teaching kustomize where to look at when substituting vars. # It requires kustomize v2.1.0 or newer to work properly. nameReference: diff --git a/config/webhook/manifests.yaml b/config/webhook/manifests.yaml index 65b6c1d89..784c6bb62 100644 --- a/config/webhook/manifests.yaml +++ b/config/webhook/manifests.yaml @@ -1,3 +1,16 @@ +# Copyright 2021 IBM Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. apiVersion: admissionregistration.k8s.io/v1 kind: ValidatingWebhookConfiguration metadata: diff --git a/config/webhook/service.yaml b/config/webhook/service.yaml index 9197435fb..b1f4d3db6 100644 --- a/config/webhook/service.yaml +++ b/config/webhook/service.yaml @@ -1,3 +1,16 @@ +# Copyright 2021 IBM Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. apiVersion: v1 kind: Service metadata: diff --git a/controllers/hpa/hpa_reconciler.go b/controllers/hpa/hpa_reconciler.go index 2c429ce84..daef07bc6 100644 --- a/controllers/hpa/hpa_reconciler.go +++ b/controllers/hpa/hpa_reconciler.go @@ -21,6 +21,7 @@ import ( "github.com/kserve/kserve/pkg/apis/serving/v1beta1" "github.com/kserve/kserve/pkg/constants" "github.com/kserve/kserve/pkg/utils" + mmcontstant "github.com/kserve/modelmesh-serving/pkg/constants" v2beta2 "k8s.io/api/autoscaling/v2beta2" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/equality" @@ -83,18 +84,17 @@ func getHPAMetrics(metadata metav1.ObjectMeta) []v2beta2.MetricSpec { return metrics } -func createHPA(runtimeMeta metav1.ObjectMeta, mmDeploymentName string, mmNamespace string) *v2beta2.HorizontalPodAutoscaler { - var minReplicas int32 - minReplicas = int32(constants.DefaultMinReplicas) +func createHPA(runtimeMeta metav1.ObjectMeta, mmDeploymentName string, mmNamespace string) *v2beta2.HorizontalPodAutoscaler { + minReplicas := int32(constants.DefaultMinReplicas) maxReplicas := int32(constants.DefaultMinReplicas) annotations := runtimeMeta.Annotations - if value, ok := annotations[constants.MinScaleAnnotationKey]; ok { + if value, ok := annotations[mmcontstant.MinScaleAnnotationKey]; ok { minReplicasInt, _ := strconv.Atoi(value) minReplicas = int32(minReplicasInt) } - if value, ok := annotations[constants.MaxScaleAnnotationKey]; ok { + if value, ok := annotations[mmcontstant.MaxScaleAnnotationKey]; ok { maxReplicasInt, _ := strconv.Atoi(value) maxReplicas = int32(maxReplicasInt) } diff --git a/controllers/hpa/hpa_reconciler_test.go b/controllers/hpa/hpa_reconciler_test.go index c4075df98..bdb857e92 100644 --- a/controllers/hpa/hpa_reconciler_test.go +++ b/controllers/hpa/hpa_reconciler_test.go @@ -20,6 +20,7 @@ import ( "github.com/google/go-cmp/cmp" "github.com/kserve/kserve/pkg/constants" + mmcontstant "github.com/kserve/modelmesh-serving/pkg/constants" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -105,11 +106,11 @@ func TestCreateHPA(t *testing.T) { expectedMaxReplicas: int32(1), }, { - name: "Check HPA replicas if annotations has " + constants.MaxScaleAnnotationKey, + name: "Check HPA replicas if annotations has " + mmcontstant.MaxScaleAnnotationKey, servingRuntimeMetaData: &metav1.ObjectMeta{ Name: servingRuntimeName, Namespace: namespace, - Annotations: map[string]string{constants.MaxScaleAnnotationKey: "2"}, + Annotations: map[string]string{mmcontstant.MaxScaleAnnotationKey: "2"}, }, mmDeploymentName: &deploymentName, mmNamespace: &namespace, @@ -117,11 +118,11 @@ func TestCreateHPA(t *testing.T) { expectedMaxReplicas: int32(2), }, { - name: "Check HPA replicas if annotations has " + constants.MinScaleAnnotationKey + ". max replicas should be the same as min replicas", + name: "Check HPA replicas if annotations has " + mmcontstant.MinScaleAnnotationKey + ". max replicas should be the same as min replicas", servingRuntimeMetaData: &metav1.ObjectMeta{ Name: servingRuntimeName, Namespace: namespace, - Annotations: map[string]string{constants.MinScaleAnnotationKey: "2"}, + Annotations: map[string]string{mmcontstant.MinScaleAnnotationKey: "2"}, }, mmDeploymentName: &deploymentName, mmNamespace: &namespace, @@ -133,7 +134,7 @@ func TestCreateHPA(t *testing.T) { servingRuntimeMetaData: &metav1.ObjectMeta{ Name: servingRuntimeName, Namespace: namespace, - Annotations: map[string]string{constants.MinScaleAnnotationKey: "2", constants.MaxScaleAnnotationKey: "3"}, + Annotations: map[string]string{mmcontstant.MinScaleAnnotationKey: "2", mmcontstant.MaxScaleAnnotationKey: "3"}, }, mmDeploymentName: &deploymentName, mmNamespace: &namespace, diff --git a/docs/developer.md b/docs/developer.md index 7a3abb5b1..8f4623b2a 100644 --- a/docs/developer.md +++ b/docs/developer.md @@ -6,11 +6,7 @@ This document outlines some of the development practices with ModelMesh Serving. Local Kubernetes clusters can easily be set up using tools like [kind](https://kind.sigs.k8s.io/) and [minikube](https://minikube.sigs.k8s.io/docs/). -Modelmesh controller is using webhook that requires certificate. We suggest using [cert manager](https://github.com/cert-manager/cert-manager) for provisioning the certificates for the webhook server. Other solutions should also work as long as they put the certificates in the desired location. You can follow [the cert manager documentation](https://cert-manager.io/docs/installation/) to install it. - -If you don't want to install cert manager, you can set the `--enable-self-signed-ca`. It will execute a script to create a self-signed CA and patch it to the webhook config. - -_(Note)_ The `--fvt` option automatically sets `--enable-self-signed-ca`, so you do not need to set it explicitly. +_(Note)_ Regarding webhook certificates, there are 2 options: `cert-manager` and `self-signed-certificate`. Please refer to [install help](install/install-script.md#installation-1). For example, using `kind`: @@ -64,11 +60,9 @@ you will need to restart the controller pod. This can be done through the follow kubectl rollout restart deploy modelmesh-controller ``` -## Deploy a custom controller image +## Deploying a custom controller image -If you have a custom controller image in your repository, you simply set `MODELMESH_SERVING_IMAGE` to deploy it. The following Makefile command will deploy the controller image with fvt dependencies. - -For example: +If you have a custom controller image in your repository, set `MODELMESH_SERVING_IMAGE` to deploy it. The following example deploys the custom controller image `quay.io/$org/modelmesh-controller:custom` in the `modelmesh-serving` namespace with `fvt` dependencies: ```shell NAMESPACE=modelmesh-serving \ @@ -76,8 +70,6 @@ MODELMESH_SERVING_IMAGE=quay.io/$org/modelmesh-controller:custom \ make deploy-release-dev-mode-fvt ``` -This command will deploy your custom controller image `quay.io/$org/modelmesh-controller:custom` under `modelmesh-serving` namespace. - ## Building the developer image A dockerized development environment is provided to help set up dependencies for testing, linting, and code generating. diff --git a/docs/install/install-script.md b/docs/install/install-script.md index c7da043ee..1b525c3b3 100644 --- a/docs/install/install-script.md +++ b/docs/install/install-script.md @@ -58,7 +58,7 @@ The `--quickstart` option can be specified to install and configure supporting d ```shell kubectl create namespace modelmesh-serving -./scripts/install.sh --namespace modelmesh-serving --quickstart --enable-self-signed-ca +./scripts/install.sh --namespace modelmesh-serving --quickstart --enable-self-signed-ca ``` See the installation help below for detail: @@ -73,11 +73,11 @@ Flags: -d, --delete Delete any existing instances of ModelMesh Serving in Kube namespace before running install, including CRDs, RBACs, controller, older CRD with serving.kserve.io api group name, etc. -u, --user-namespaces Kubernetes namespaces to enable for ModelMesh Serving --quickstart Install and configure required supporting datastores in the same namespace (etcd and MinIO) - for experimentation/development - --fvt Install and configure required supporting datastores in the same namespace (etcd and MinIO) and also sets enable-self-signed-ca - for development with fvt enabled + --fvt Install and configure required supporting datastores in the same namespace (etcd and MinIO) and set `enable-self-signed-ca` - for development with fvt enabled -dev, --dev-mode-logging Enable dev mode logging (stacktraces on warning and no sampling) --namespace-scope-mode Run ModelMesh Serving in namespace scope mode - --modelmesh-serving-image Set a custom modelmesh serving image - --enable-self-signed-ca Enable self-signed-ca, if you don't have cert-manager in the cluster + --modelmesh-serving-image Set a custom ModelMesh serving image + --enable-self-signed-ca Enable self-signed-ca, if the cluster doesn't have `cert-manager` installed Installs ModelMesh Serving CRDs, controller, and built-in runtimes into specified Kubernetes namespaces. @@ -94,9 +94,23 @@ The installation will create a secret named `storage-config` if it does not alre The `--namespace-scope-mode` will deploy `ServingRuntime`s confined to the same namespace, instead of the default cluster-scoped runtimes `ClusterServingRuntime`s. These serving runtimes are accessible to any user/namespace in the cluster. -You can optionally provide a custom modelmesh serving image url with `--modelmesh-serving-image`. If not specified, it will pull the latest modelmesh image. +You can optionally provide a custom ModelMesh Serving image with `--modelmesh-serving-image`. If not specified, it will pull the latest image. -Modelmesh controller is using webhook that requires certificates. We suggest using [cert manager](https://github.com/cert-manager/cert-manager) for provisioning the certificates for the webhook server. Other solutions should also work as long as they put the certificates in the desired location. You can follow [the cert manager documentation](https://cert-manager.io/docs/installation/) to install it. If you don't want to install cert manager, you can set the `--enable-self-signed-ca`. It will execute a script to create a self-signed CA and patch it to the webhook config. +The ModelMesh controller uses a webhook that requires a certificate. We suggest using [cert-manager](https://github.com/cert-manager/cert-manager) to provision the certificates for the webhook server. Other solutions should also work as long as they put the certificates in the desired location. You can follow [the cert-manager documentation](https://cert-manager.io/docs/installation/) to install it. If you don't want to install `cert-manager`, use the `--enable-self-signed-ca` flag. It will execute a script to create a self-signed CA and patch it to the webhook config. + +- [cert-manager latest version](https://github.com/cert-manager/cert-manager/releases/latest) + + ```shell + CERT_MANAGER_VERSION="v1.11.0" # Use the latest version + + echo "Installing cert manager ..." + kubectl create namespace cert-manager + sleep 2 + kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/${CERT_MANAGER_VERSION}/cert-manager.yaml + + echo "Waiting for cert manager started ..." + kubectl wait --for=condition=ready pod -l 'app in (cert-manager,webhook)' --timeout=180s -n cert-manager + ``` ## Setup additional namespaces diff --git a/docs/production-use/scaling.md b/docs/production-use/scaling.md index 59f17d911..46c55ce7b 100644 --- a/docs/production-use/scaling.md +++ b/docs/production-use/scaling.md @@ -17,8 +17,8 @@ To prevent unnecessary churn, the `ScaleToZero` behavior has a grace period that ### Autoscaler -In addition to the Scale to Zero feature, runtime pods can be autoscaled through HPA. This feature is disabled by default, but it can be enabled at any time by annotating each ServingRuntime/ClusterServingRuntime. -To enable Autoscaler feature, add the following annotation. +In addition to the `ScaleToZero` to Zero feature, runtime pods can be autoscaled through HPA. This feature is disabled by default, but it can be enabled at any time by annotating each ServingRuntime/ClusterServingRuntime. +To enable the Autoscaler feature, add the following annotation. ```shell apiVersion: serving.kserve.io/v1alpha1 @@ -36,13 +36,13 @@ metadata: serving.kserve.io/autoscalerClass: hpa serving.kserve.io/targetUtilizationPercentage: "75" serving.kserve.io/metrics: "cpu" - autoscaling.knative.dev/min-scale: "2" - autoscaling.knative.dev/max-scale: "3" + serving.kserve.io/min-scale: "2" + serving.kserve.io/max-scale: "3" ``` You can disable the Autoscaler feature even if a runtime pod created based on that ServingRuntime is running. **NOTE** -- If `serving.kserve.io/autoscalerClass: hpa` is not set, the other annotations would be ignored. -- If `ScaleToZero` is enabled and there are no `InferenceService`, HPA will be deleted and the ServingRuntime deployment will be scaled down to 0. +- If `serving.kserve.io/autoscalerClass: hpa` is not set, the other annotations will be ignored. +- If `ScaleToZero` is enabled and there are no `InferenceService`s, HPA will be deleted and the ServingRuntime deployment will be scaled down to 0. diff --git a/docs/quickstart.md b/docs/quickstart.md index 39ad9614c..e0561cf86 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -8,27 +8,7 @@ To quickly get started using ModelMesh Serving, here is a brief guide. - [kubectl](https://kubernetes.io/docs/tasks/tools/#kubectl) and [kustomize](https://kubectl.docs.kubernetes.io/installation/kustomize/) (v3.2.0+) - At least 4 vCPU and 8 GB memory. For more details, please see [here](install/README.md#deployed-components). -## Recommendation - -- [cert-manager](https://github.com/cert-manager/cert-manager) - - ```shell - CERT_MANAGER_VERSION="v1.11.0" - - echo "Installing cert manager ..." - kubectl create namespace cert-manager - sleep 2 - kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.11.0/cert-manager.yaml - - echo "Waiting for cert manager started ..." - kubectl wait --for=condition=ready pod -l 'app in (cert-manager,webhook)' --timeout=180s -n cert-manager - ``` - - - Modelmesh controller is using webhook that requires certificate. We suggest using [cert manager](https://github.com/cert-manager/cert-manager) for provisioning the certificates for the webhook server. Other solutions should also work as long as they put the certificates in the desired location. You can follow [the cert manager documentation](https://cert-manager.io/docs/installation/) to install it. - -If you don't want to install cert manager, you can set the `--enable-self-signed-ca`. It will execute a script to create a self-signed CA and patch it to the webhook config. - -_(Note)_ The `--fvt` option automatically sets `--enable-self-signed-ca`, so you do not need to set it explicitly. +_(Note)_ Regarding webhook certificates, there are 2 options: `cert-manager` and `self-signed-certificate`. Please refer to [install help](install/install-script.md#installation-1). ## 1. Install ModelMesh Serving diff --git a/fvt/hpa/hpa_test.go b/fvt/hpa/hpa_test.go index 294384eab..0465e06d5 100644 --- a/fvt/hpa/hpa_test.go +++ b/fvt/hpa/hpa_test.go @@ -18,6 +18,7 @@ import ( "time" "github.com/kserve/kserve/pkg/constants" + mmcontstant "github.com/kserve/modelmesh-serving/pkg/constants" hpav2beta2 "k8s.io/api/autoscaling/v2beta2" . "github.com/kserve/modelmesh-serving/fvt" @@ -168,20 +169,22 @@ var _ = Describe("Scaling of runtime deployments with HPA Autoscaler", Ordered, deployTestPredictorAndCheckDefaultHPA() // ScaleUp Test - By("ScaleUp to min(2)/max(4): " + constants.MinScaleAnnotationKey) + By("ScaleUp to min(2)/max(4): " + mmcontstant.MinScaleAnnotationKey) By("Increase TargetUtilizationPercentage to 90: " + constants.TargetUtilizationPercentage) By("Change Metrics to memory: " + constants.TargetUtilizationPercentage) srAnnotationsScaleUp := make(map[string]interface{}) srAnnotationsScaleUp[constants.AutoscalerClass] = string(constants.AutoscalerClassHPA) - srAnnotationsScaleUp[constants.MinScaleAnnotationKey] = "2" - srAnnotationsScaleUp[constants.MaxScaleAnnotationKey] = "4" + srAnnotationsScaleUp[mmcontstant.MinScaleAnnotationKey] = "2" + srAnnotationsScaleUp[mmcontstant.MaxScaleAnnotationKey] = "4" srAnnotationsScaleUp[constants.TargetUtilizationPercentage] = "90" srAnnotationsScaleUp[constants.AutoscalerMetrics] = "memory" // set modified annotations FVTClientInstance.SetServingRuntimeAnnotation(expectedRuntimeName, srAnnotationsScaleUp) + // sleep to give time for changes to propagate to the deployment time.Sleep(10 * time.Second) + WaitForStableActiveDeployState(time.Second * 30) // check that all runtimes except the one are scaled up to minimum replicas of HPA expectScaledToTargetReplicas(2) @@ -194,20 +197,23 @@ var _ = Describe("Scaling of runtime deployments with HPA Autoscaler", Ordered, expectHPAResourceName(corev1.ResourceMemory) // ScaleDown Test - By("ScaleDown to min(1)/max(1): " + constants.MinScaleAnnotationKey) + By("ScaleDown to min(1)/max(1): " + mmcontstant.MinScaleAnnotationKey) By("Decrease TargetUtilizationPercentage to 80: " + constants.TargetUtilizationPercentage) By("Change Metrics to cpu: " + constants.TargetUtilizationPercentage) srAnnotationsScaleDown := make(map[string]interface{}) srAnnotationsScaleDown[constants.AutoscalerClass] = string(constants.AutoscalerClassHPA) - srAnnotationsScaleDown[constants.MinScaleAnnotationKey] = "1" - srAnnotationsScaleDown[constants.MaxScaleAnnotationKey] = "1" + srAnnotationsScaleDown[mmcontstant.MinScaleAnnotationKey] = "1" + srAnnotationsScaleDown[mmcontstant.MaxScaleAnnotationKey] = "1" srAnnotationsScaleDown[constants.TargetUtilizationPercentage] = "80" srAnnotationsScaleDown[constants.AutoscalerMetrics] = "cpu" // set modified annotations FVTClientInstance.SetServingRuntimeAnnotation(expectedRuntimeName, srAnnotationsScaleDown) + // sleep to give time for changes to propagate to the deployment time.Sleep(10 * time.Second) + WaitForStableActiveDeployState(time.Second * 30) + // check that all runtimes except the one are scaled up to minimum replicas of HPA expectScaledToTargetReplicas(1) @@ -229,7 +235,10 @@ var _ = Describe("Scaling of runtime deployments with HPA Autoscaler", Ordered, srAnnotationsNone := make(map[string]interface{}) FVTClientInstance.SetServingRuntimeAnnotation(expectedRuntimeName, srAnnotationsNone) + // sleep to give time for changes to propagate to the deployment time.Sleep(10 * time.Second) + WaitForStableActiveDeployState(time.Second * 30) + // check that all runtimes except the one are scaled up to servingRuntime default replicas expectScaledToTargetReplicas(1) diff --git a/pkg/constants/constants.go b/pkg/constants/constants.go new file mode 100644 index 000000000..6002a7842 --- /dev/null +++ b/pkg/constants/constants.go @@ -0,0 +1,21 @@ +// Copyright 2021 IBM Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package constants + +import "github.com/kserve/kserve/pkg/constants" + +var ( + MinScaleAnnotationKey = constants.KServeAPIGroupName + "/min-scale" + MaxScaleAnnotationKey = constants.KServeAPIGroupName + "/max-scale" +) diff --git a/scripts/install.sh b/scripts/install.sh index 8fae36dae..f5c65ef04 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -302,12 +302,12 @@ else fi info "Installing ModelMesh Serving CRDs and controller" -if [[ ! -z $modelmesh_serving_image ]]; then +if [[ -n $modelmesh_serving_image ]]; then info "Custom ModelMesh Serving Image: $modelmesh_serving_image" if [[ ! -f manager/kustomization.yaml.ori ]]; then cp manager/kustomization.yaml manager/kustomization.yaml.ori fi - cd manager; kustomize edit set image modelmesh-controller=${modelmesh_serving_image} ; cd ../ + cd manager; kustomize edit set image modelmesh-controller=${modelmesh_serving_image}; cd ../ fi if [[ $enable_self_signed_ca == "true" ]]; then @@ -352,8 +352,7 @@ if [[ $namespace_scope_mode == "true" ]]; then rm crd/kustomization.yaml.bak fi - -if [[ ! -z $modelmesh_serving_image ]]; then +if [[ -n $modelmesh_serving_image ]]; then cp manager/kustomization.yaml.ori manager/kustomization.yaml rm manager/kustomization.yaml.ori fi @@ -362,11 +361,19 @@ if [[ $enable_self_signed_ca == "true" ]]; then cp certmanager/kustomization.yaml.ori certmanager/kustomization.yaml cp default/kustomization.yaml.ori default/kustomization.yaml rm certmanager/kustomization.yaml.ori default/kustomization.yaml.ori -else - info "Waiting for ModelMesh Serving controller pod to be up..." - wait_for_pods_ready "-l control-plane=modelmesh-controller" + + info "Enabled Self Signed CA: Generate certificates and restart controller" + + # Delete dummy secret for webhook server + kubectl delete secret modelmesh-webhook-server-cert + + ../scripts/self-signed-ca.sh --namespace $namespace + fi +info "Waiting for ModelMesh Serving controller pod to be up..." +wait_for_pods_ready "-l control-plane=modelmesh-controller" + # Older versions of kustomize have different load restrictor flag formats. # Can be removed once Kubeflow installation stops requiring v3.2. kustomize_version=$(kustomize version --short | grep -o -E "[0-9]\.[0-9]\.[0-9]") @@ -377,18 +384,6 @@ elif [[ -n "$kustomize_version" && "$kustomize_version" < "4.0.1" ]]; then kustomize_load_restrictor_arg="--load_restrictor LoadRestrictionsNone" fi -if [[ $enable_self_signed_ca == "true" ]]; then - info "Enabled Self Signed CA: Generate certificates and restart controller" - - # Delete dummy secret for webhook server - kubectl delete secret modelmesh-webhook-server-cert - - ../scripts/self-signed-ca.sh --namespace $namespace - - info "Restarting ModelMesh Serving controller pod..." - wait_for_pods_ready "-l control-plane=modelmesh-controller" -fi - info "Installing ModelMesh Serving built-in runtimes" if [[ $namespace_scope_mode == "true" ]]; then kustomize build namespace-runtimes ${kustomize_load_restrictor_arg} | kubectl apply -f - diff --git a/scripts/self-signed-ca.sh b/scripts/self-signed-ca.sh index 6adad816b..de104cd9f 100755 --- a/scripts/self-signed-ca.sh +++ b/scripts/self-signed-ca.sh @@ -1,4 +1,20 @@ -#!/bin/bash +#!/usr/bin/env bash +# Copyright 2021 IBM Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License.# + +# Install ModelMesh Serving CRDs, controller, and built-in runtimes into specified Kubernetes namespaces. +# Expect cluster-admin authority and Kube cluster access to be configured prior to running. set -e