Skip to content

Commit

Permalink
add HPA feature/unit/fvt/docs/script
Browse files Browse the repository at this point in the history
Signed-off-by: jooho <jlee@redhat.com>
  • Loading branch information
Jooho committed Mar 9, 2023
1 parent f794ffc commit 5eff85a
Show file tree
Hide file tree
Showing 35 changed files with 1,843 additions and 61 deletions.
11 changes: 10 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ test:

# Run fvt tests. This requires an etcd, kubernetes connection, and model serving installation. Ginkgo CLI is used to run them in parallel
fvt:
ginkgo -v -p -progress --fail-fast fvt/predictor fvt/scaleToZero --timeout=40m
ginkgo -v -p -progress --fail-fast fvt/predictor fvt/scaleToZero fvt/hpa --timeout=50m

# Command to regenerate the grpc go files from the proto files
fvt-protoc:
Expand Down Expand Up @@ -87,6 +87,15 @@ deploy-release:
deploy-release-dev-mode:
./scripts/install.sh --namespace ${NAMESPACE} --install-config-path config --dev-mode-logging

deploy-release-dev-mode-fvt:
ifdef MODELMESH_SERVING_IMAGE
$(eval extra_options += --modelmesh-serving-image ${MODELMESH_SERVING_IMAGE})
endif
ifdef NAMESPACE_SCOPE_MODE
$(eval extra_options += --namespace-scope-mode)
endif
./scripts/install.sh --namespace ${NAMESPACE} --install-config-path config --dev-mode-logging --fvt ${extra_options}

delete: oc-login
./scripts/delete.sh --namespace ${NAMESPACE} --local-config-path config

Expand Down
189 changes: 189 additions & 0 deletions apis/serving/v1alpha1/servingruntime_webhook.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
/*
Copyright 2021 IBM Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package v1alpha1

import (
"context"
"fmt"
"net/http"
"strconv"

kservev1alpha "github.com/kserve/kserve/pkg/apis/serving/v1alpha1"
"github.com/kserve/kserve/pkg/constants"
"github.com/kserve/modelmesh-serving/controllers/autoscaler"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
)

//+kubebuilder:webhook:path=/validate-serving-modelmesh-io-v1alpha1-servingruntime,mutating=false,failurePolicy=fail,sideEffects=None,groups=serving.kserve.io,resources=servingruntimes;clusterservingruntimes,verbs=create;update,versions=v1alpha1,name=servingruntime.modelmesh-webhook-server.default,admissionReviewVersions=v1
type ServingRuntimeWebhook struct {
Client client.Client
decoder *admission.Decoder
}

func (s *ServingRuntimeWebhook) Handle(ctx context.Context, req admission.Request) admission.Response {
var srAnnotations map[string]string

if req.Kind.Kind == "ServingRuntime" {
servingRuntime := &kservev1alpha.ServingRuntime{}
err := s.decoder.Decode(req, servingRuntime)
if err != nil {
return admission.Errored(http.StatusBadRequest, err)
}
srAnnotations = servingRuntime.ObjectMeta.Annotations
} else {
clusterServingRuntime := &kservev1alpha.ClusterServingRuntime{}
err := s.decoder.Decode(req, clusterServingRuntime)
if err != nil {
return admission.Errored(http.StatusBadRequest, err)
}
srAnnotations = clusterServingRuntime.ObjectMeta.Annotations
}

if err := validateServingRuntimeAutoscaler(srAnnotations); err != nil {
return admission.Denied(err.Error())
}

if err := validateAutoscalerTargetUtilizationPercentage(srAnnotations); err != nil {
return admission.Denied(err.Error())
}

if err := validateAutoScalingReplicas(srAnnotations); err != nil {
return admission.Denied(err.Error())
}

return admission.Allowed("Passed all validation checks for ServingRuntime")
}

// InjectDecoder injects the decoder.
func (s *ServingRuntimeWebhook) InjectDecoder(d *admission.Decoder) error {
s.decoder = d
return nil
}

// Validation of servingruntime autoscaler class
func validateServingRuntimeAutoscaler(annotations map[string]string) error {
value, ok := annotations[constants.AutoscalerClass]
class := constants.AutoscalerClassType(value)
if ok {
for _, item := range constants.AutoscalerAllowedClassList {
if class == item {
switch class {
case constants.AutoscalerClassHPA:
if metric, ok := annotations[constants.AutoscalerMetrics]; ok {
return validateHPAMetrics(constants.AutoscalerMetricsType(metric))
} else {
return nil
}
default:
return fmt.Errorf("unknown autoscaler class [%s]", class)
}
}
}
return fmt.Errorf("[%s] is not a supported autoscaler class type.\n", value)
}

return nil
}

// Validate of autoscaler targetUtilizationPercentage
func validateAutoscalerTargetUtilizationPercentage(annotations map[string]string) error {
if value, ok := annotations[constants.TargetUtilizationPercentage]; ok {
t, err := strconv.Atoi(value)
if err != nil {
return fmt.Errorf("The target utilization percentage should be a [1-100] integer.")
} else {
if t < 1 || t > 100 {
return fmt.Errorf("The target utilization percentage should be a [1-100] integer.")
}
}
}

return nil
}

// Validate scaling options
func validateAutoScalingReplicas(annotations map[string]string) error {
autoscalerClassType := autoscaler.AutoscalerClassNone
if value, ok := annotations[constants.AutoscalerClass]; ok {
autoscalerClassType = value
}

switch autoscalerClassType {
case string(constants.AutoscalerClassHPA):
return validateScalingHPA(annotations)
default:
return nil
}
}

func validateScalingHPA(annotations map[string]string) error {
metric := constants.AutoScalerMetricsCPU
if value, ok := annotations[constants.AutoscalerMetrics]; ok {
metric = constants.AutoscalerMetricsType(value)
}

minReplicas := 1
if value, ok := annotations[constants.MinScaleAnnotationKey]; ok {
if valueInt, err := strconv.Atoi(value); err != nil {
return fmt.Errorf("The min replicas should be a integer.")
} else if valueInt < 0 {
return fmt.Errorf("The min replicas should be more than -1")
} else {
minReplicas = valueInt
}
}

maxReplicas := 1
if value, ok := annotations[constants.MaxScaleAnnotationKey]; ok {
if valueInt, err := strconv.Atoi(value); err != nil {
return fmt.Errorf("The max replicas should be a integer.")
} else {
maxReplicas = valueInt
}
}

if minReplicas > maxReplicas {
return fmt.Errorf("The max replicas should be bigger than min replicas.")
}

err := validateHPAMetrics(metric)
if err != nil {
return err
}

if value, ok := annotations[constants.TargetUtilizationPercentage]; ok {
t, err := strconv.Atoi(value)
if err != nil {
return fmt.Errorf("The target utilization percentage should be a [1-100] integer.")
} else if metric == constants.AutoScalerMetricsMemory && t < 1 {
return fmt.Errorf("The target memory should be greater than 1 MiB")
}
}

return nil
}

// Validate of autoscaler HPA metrics
func validateHPAMetrics(metric constants.AutoscalerMetricsType) error {
for _, item := range constants.AutoscalerAllowedMetricsList {
if item == metric {
return nil
}
}
return fmt.Errorf("[%s] is not a supported metric.\n", metric)

}
100 changes: 100 additions & 0 deletions apis/serving/v1alpha1/servingruntime_webhook_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
/*
Copyright 2021 IBM Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package v1alpha1

import (
// "fmt"
"testing"

"github.com/onsi/gomega"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

kservev1alpha "github.com/kserve/kserve/pkg/apis/serving/v1alpha1"
"github.com/kserve/kserve/pkg/constants"
)

func makeTestRawServingRuntime() kservev1alpha.ServingRuntime {
servingRuntime := kservev1alpha.ServingRuntime{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
Namespace: "default",
Annotations: map[string]string{
"serving.kserve.io/autoscalerClass": "hpa",
"serving.kserve.io/metrics": "cpu",
"serving.kserve.io/targetUtilizationPercentage": "75",
"autoscaling.knative.dev/min-scale": "2",
"autoscaling.knative.dev/max-scale": "3",
},
},
}

return servingRuntime
}

func TestValidAutoscalerTypeAndHPAMetrics(t *testing.T) {
g := gomega.NewGomegaWithT(t)
sr := makeTestRawServingRuntime()
g.Expect(validateServingRuntimeAutoscaler(sr.Annotations)).Should(gomega.Succeed())
}
func TestInvalidAutoscalerClassType(t *testing.T) {
g := gomega.NewGomegaWithT(t)
sr := makeTestRawServingRuntime()
sr.ObjectMeta.Annotations[constants.AutoscalerClass] = "test"
g.Expect(validateServingRuntimeAutoscaler(sr.Annotations)).ShouldNot(gomega.Succeed())
}

func TestInvalidAutoscalerTargetUtilizationPercentageLowValue(t *testing.T) {
g := gomega.NewGomegaWithT(t)
sr := makeTestRawServingRuntime()
sr.ObjectMeta.Annotations[constants.TargetUtilizationPercentage] = "-1"
g.Expect(validateAutoscalerTargetUtilizationPercentage(sr.Annotations)).ShouldNot(gomega.Succeed())
}

func TestInvalidAutoscalerTargetUtilizationPercentageHighValue(t *testing.T) {
g := gomega.NewGomegaWithT(t)
sr := makeTestRawServingRuntime()
sr.ObjectMeta.Annotations[constants.TargetUtilizationPercentage] = "101"
g.Expect(validateAutoscalerTargetUtilizationPercentage(sr.Annotations)).ShouldNot(gomega.Succeed())
}

func TestInvalidAutoscalerLowMinReplicas(t *testing.T) {
g := gomega.NewGomegaWithT(t)
sr := makeTestRawServingRuntime()
sr.ObjectMeta.Annotations[constants.MinScaleAnnotationKey] = "-1"
g.Expect(validateScalingHPA(sr.Annotations)).ShouldNot(gomega.Succeed())
}

func TestInvalidAutoscalerMaxReplicasMustBiggerThanMixReplicas(t *testing.T) {
g := gomega.NewGomegaWithT(t)
sr := makeTestRawServingRuntime()
sr.ObjectMeta.Annotations[constants.MinScaleAnnotationKey] = "4"
sr.ObjectMeta.Annotations[constants.MaxScaleAnnotationKey] = "3"
g.Expect(validateAutoScalingReplicas(sr.Annotations)).ShouldNot(gomega.Succeed())
}

func TestValidAutoscalerMetricsType(t *testing.T) {
g := gomega.NewGomegaWithT(t)
sr := makeTestRawServingRuntime()
sr.ObjectMeta.Annotations[constants.AutoscalerMetrics] = "memory"
g.Expect(validateHPAMetrics(constants.AutoscalerMetricsType("memory"))).Should(gomega.Succeed())
}

func TestInvalidAutoscalerMetricsType(t *testing.T) {
g := gomega.NewGomegaWithT(t)
sr := makeTestRawServingRuntime()
sr.ObjectMeta.Annotations[constants.AutoscalerMetrics] = "conccurrency"
g.Expect(validateHPAMetrics(constants.AutoscalerMetricsType("conccurrency"))).ShouldNot(gomega.Succeed())
}
2 changes: 1 addition & 1 deletion config/certmanager/certificate.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,4 @@ spec:
issuerRef:
kind: Issuer
name: selfsigned-issuer
secretName: webhook-server-cert # this secret will not be prefixed, since it's not managed by kustomize
secretName: modelmesh-webhook-server-cert # this secret will not be prefixed, since it's not managed by kustomize
63 changes: 35 additions & 28 deletions config/default/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,34 +49,35 @@
#- webhookcainjection_patch.yaml

# the following config is for teaching kustomize how to do var substitution
#vars:
# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER' prefix.
#- name: CERTIFICATE_NAMESPACE # namespace of the certificate CR
# objref:
# kind: Certificate
# group: cert-manager.io
# version: v1alpha2
# name: serving-cert # this name should match the one in certificate.yaml
# fieldref:
# fieldpath: metadata.namespace
#- name: CERTIFICATE_NAME
# objref:
# kind: Certificate
# group: cert-manager.io
# version: v1alpha2
# name: serving-cert # this name should match the one in certificate.yaml
#- name: SERVICE_NAMESPACE # namespace of the service
# objref:
# kind: Service
# version: v1
# name: webhook-service
# fieldref:
# fieldpath: metadata.namespace
#- name: SERVICE_NAME
# objref:
# kind: Service
# version: v1
# name: webhook-service
vars:
- fieldref:
fieldPath: metadata.namespace
name: CERTIFICATE_NAMESPACE
objref:
group: cert-manager.io
kind: Certificate
name: serving-cert
version: v1
- fieldref: {}
name: CERTIFICATE_NAME
objref:
group: cert-manager.io
kind: Certificate
name: serving-cert
version: v1
- fieldref:
fieldPath: metadata.namespace
name: SERVICE_NAMESPACE
objref:
kind: Service
name: modelmesh-webhook-server-service
version: v1
- fieldref: {}
name: SERVICE_NAME
objref:
kind: Service
name: modelmesh-webhook-server-service
version: v1

configMapGenerator:
- files:
Expand All @@ -93,3 +94,9 @@ kind: Kustomization
resources:
- ../crd
- ../manager
- ../webhook
- ../certmanager

patchesStrategicMerge:
- manager_webhook_patch.yaml
- webhookcainjection_patch.yaml
Loading

0 comments on commit 5eff85a

Please sign in to comment.