Skip to content

Commit

Permalink
add HPA feature/unit/fvt/docs/script
Browse files Browse the repository at this point in the history
Signed-off-by: jooho <jlee@redhat.com>
  • Loading branch information
Jooho committed Mar 8, 2023
1 parent f794ffc commit 80f72ce
Show file tree
Hide file tree
Showing 37 changed files with 1,852 additions and 74 deletions.
14 changes: 13 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ test:

# Run fvt tests. This requires an etcd, kubernetes connection, and model serving installation. Ginkgo CLI is used to run them in parallel
fvt:
ginkgo -v -p -progress --fail-fast fvt/predictor fvt/scaleToZero --timeout=40m
ginkgo -v -p -progress --fail-fast fvt/predictor fvt/scaleToZero fvt/hpa --timeout=50m

# Command to regenerate the grpc go files from the proto files
fvt-protoc:
Expand Down Expand Up @@ -87,6 +87,18 @@ deploy-release:
deploy-release-dev-mode:
./scripts/install.sh --namespace ${NAMESPACE} --install-config-path config --dev-mode-logging

deploy-release-dev-mode-fvt:
ifdef MODELMESH_SERVING_IMAGE
$(eval extra_options += --modelmesh-serving-image ${MODELMESH_SERVING_IMAGE})
endif
ifdef ENABLE_SELF_SIGNED_CA
$(eval extra_options += --enable-self-signed-ca)
endif
ifdef NAMESPACE_SCOPE_MODE
$(eval extra_options += --namespace-scope-mode)
endif
./scripts/install.sh --namespace ${NAMESPACE} --install-config-path config --dev-mode-logging --fvt ${extra_options}

delete: oc-login
./scripts/delete.sh --namespace ${NAMESPACE} --local-config-path config

Expand Down
193 changes: 193 additions & 0 deletions apis/serving/v1alpha1/servingruntime_webhook.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
/*
Copyright 2021 IBM Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package v1alpha1

import (
"context"
"fmt"
"net/http"
"strconv"

kservev1alpha "github.com/kserve/kserve/pkg/apis/serving/v1alpha1"
"github.com/kserve/kserve/pkg/constants"
"github.com/kserve/modelmesh-serving/controllers/autoscaler"
"sigs.k8s.io/controller-runtime/pkg/client"
logf "sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
)

// log is for logging in this package.
var servingruntimeWebhookLog = logf.Log.WithName("servingruntime-webhook")

//+kubebuilder:webhook:path=/validate-serving-modelmesh-io-v1alpha1-servingruntime,mutating=false,failurePolicy=fail,sideEffects=None,groups=serving.kserve.io,resources=servingruntimes;clusterservingruntimes,verbs=create;update,versions=v1alpha1,name=servingruntime.modelmesh-webhook-server.default,admissionReviewVersions=v1
type ServingRuntimeWebhook struct {
Client client.Client
decoder *admission.Decoder
}

func (s *ServingRuntimeWebhook) Handle(ctx context.Context, req admission.Request) admission.Response {
var srAnnotations map[string]string

if req.Kind.Kind == "ServingRuntime"{
servingRuntime := &kservev1alpha.ServingRuntime{}
err := s.decoder.Decode(req, servingRuntime)
if err != nil {
return admission.Errored(http.StatusBadRequest, err)
}
srAnnotations = servingRuntime.ObjectMeta.Annotations
}else{
clusterServingRuntime := &kservev1alpha.ClusterServingRuntime{}
err := s.decoder.Decode(req, clusterServingRuntime)
if err != nil {
return admission.Errored(http.StatusBadRequest, err)
}
srAnnotations = clusterServingRuntime.ObjectMeta.Annotations
}

if err := validateServingRuntimeAutoscaler(srAnnotations); err != nil {
return admission.Denied(err.Error())
}

if err := validateAutoscalerTargetUtilizationPercentage(srAnnotations); err != nil {
return admission.Denied(err.Error())
}

if err := validateAutoScalingReplicas(srAnnotations); err != nil {
return admission.Denied(err.Error())
}

return admission.Allowed("Passed all validation checks for ServingRuntime")
}

// InjectDecoder injects the decoder.
func (s *ServingRuntimeWebhook) InjectDecoder(d *admission.Decoder) error {
s.decoder = d
return nil
}

// Validation of servingruntime autoscaler class
func validateServingRuntimeAutoscaler(annotations map[string]string) error {
value, ok := annotations[constants.AutoscalerClass]
class := constants.AutoscalerClassType(value)
if ok {
for _, item := range constants.AutoscalerAllowedClassList {
if class == item {
switch class {
case constants.AutoscalerClassHPA:
if metric, ok := annotations[constants.AutoscalerMetrics]; ok {
return validateHPAMetrics(constants.AutoscalerMetricsType(metric))
} else {
return nil
}
default:
return fmt.Errorf("unknown autoscaler class [%s]", class)
}
}
}
return fmt.Errorf("[%s] is not a supported autoscaler class type.\n", value)
}

return nil
}

// Validate of autoscaler targetUtilizationPercentage
func validateAutoscalerTargetUtilizationPercentage(annotations map[string]string) error {
if value, ok := annotations[constants.TargetUtilizationPercentage]; ok {
t, err := strconv.Atoi(value)
if err != nil {
return fmt.Errorf("The target utilization percentage should be a [1-100] integer.")
} else {
if t < 1 || t > 100 {
return fmt.Errorf("The target utilization percentage should be a [1-100] integer.")
}
}
}

return nil
}

// Validate scaling options
func validateAutoScalingReplicas(annotations map[string]string) error {
autoscalerClassType := autoscaler.AutoscalerClassNone
if value, ok := annotations[constants.AutoscalerClass]; ok {
autoscalerClassType = value
}

switch autoscalerClassType {
case string(constants.AutoscalerClassHPA):
return validateScalingHPA(annotations)
default:
return nil
}
}

func validateScalingHPA(annotations map[string]string) error {
metric := constants.AutoScalerMetricsCPU
if value, ok := annotations[constants.AutoscalerMetrics]; ok {
metric = constants.AutoscalerMetricsType(value)
}

minReplicas := 1
if value, ok := annotations[constants.MinScaleAnnotationKey]; ok {
if valueInt, err := strconv.Atoi(value); err != nil {
return fmt.Errorf("The min replicas should be a integer.")
} else if valueInt < 0 {
return fmt.Errorf("The min replicas should be more than -1")
} else {
minReplicas = valueInt
}
}

maxReplicas := 1
if value, ok := annotations[constants.MaxScaleAnnotationKey]; ok {
if valueInt, err := strconv.Atoi(value); err != nil {
return fmt.Errorf("The max replicas should be a integer.")
} else {
maxReplicas = valueInt
}
}

if minReplicas > maxReplicas {
return fmt.Errorf("The max replicas should be bigger than min replicas.")
}

err := validateHPAMetrics(metric)
if err != nil {
return err
}

if value, ok := annotations[constants.TargetUtilizationPercentage]; ok {
t, err := strconv.Atoi(value)
if err != nil {
return fmt.Errorf("The target utilization percentage should be a [1-100] integer.")
} else if metric == constants.AutoScalerMetricsMemory && t < 1 {
return fmt.Errorf("The target memory should be greater than 1 MiB")
}
}

return nil
}

// Validate of autoscaler HPA metrics
func validateHPAMetrics(metric constants.AutoscalerMetricsType) error {
for _, item := range constants.AutoscalerAllowedMetricsList {
if item == metric {
return nil
}
}
return fmt.Errorf("[%s] is not a supported metric.\n", metric)

}
101 changes: 101 additions & 0 deletions apis/serving/v1alpha1/servingruntime_webhook_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
/*
Copyright 2021 IBM Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package v1alpha1

import (
// "fmt"
"testing"

"github.com/onsi/gomega"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

kservev1alpha "github.com/kserve/kserve/pkg/apis/serving/v1alpha1"
"github.com/kserve/kserve/pkg/constants"
)

func makeTestRawServingRuntime() kservev1alpha.ServingRuntime {
servingRuntime := kservev1alpha.ServingRuntime{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
Namespace: "default",
Annotations: map[string]string{
"serving.kserve.io/autoscalerClass": "hpa",
"serving.kserve.io/metrics": "cpu",
"serving.kserve.io/targetUtilizationPercentage": "75",
"autoscaling.knative.dev/min-scale": "2",
"autoscaling.knative.dev/max-scale": "3",
},
},
}

return servingRuntime
}

func TestValidAutoscalerTypeAndHPAMetrics(t *testing.T) {
g := gomega.NewGomegaWithT(t)
sr := makeTestRawServingRuntime()
g.Expect(validateServingRuntimeAutoscaler(sr.Annotations)).Should(gomega.Succeed())
}
func TestInvalidAutoscalerClassType(t *testing.T) {
g := gomega.NewGomegaWithT(t)
sr := makeTestRawServingRuntime()
sr.ObjectMeta.Annotations[constants.AutoscalerClass] = "test"
g.Expect(validateServingRuntimeAutoscaler(sr.Annotations)).ShouldNot(gomega.Succeed())
}

func TestInvalidAutoscalerTargetUtilizationPercentageLowValue(t *testing.T) {
g := gomega.NewGomegaWithT(t)
sr := makeTestRawServingRuntime()
sr.ObjectMeta.Annotations[constants.TargetUtilizationPercentage] = "-1"
g.Expect(validateAutoscalerTargetUtilizationPercentage(sr.Annotations)).ShouldNot(gomega.Succeed())
}

func TestInvalidAutoscalerTargetUtilizationPercentageHighValue(t *testing.T) {
g := gomega.NewGomegaWithT(t)
sr := makeTestRawServingRuntime()
sr.ObjectMeta.Annotations[constants.TargetUtilizationPercentage] = "101"
g.Expect(validateAutoscalerTargetUtilizationPercentage(sr.Annotations)).ShouldNot(gomega.Succeed())
}

func TestInvalidAutoscalerLowMinReplicas(t *testing.T) {
g := gomega.NewGomegaWithT(t)
sr := makeTestRawServingRuntime()
sr.ObjectMeta.Annotations[constants.MinScaleAnnotationKey] = "-1"
g.Expect(validateScalingHPA(sr.Annotations)).ShouldNot(gomega.Succeed())
}

func TestInvalidAutoscalerMaxReplicasMustBiggerThanMixReplicas(t *testing.T) {
g := gomega.NewGomegaWithT(t)
sr := makeTestRawServingRuntime()
sr.ObjectMeta.Annotations[constants.MinScaleAnnotationKey] = "4"
sr.ObjectMeta.Annotations[constants.MaxScaleAnnotationKey] = "3"
g.Expect(validateAutoScalingReplicas(sr.Annotations)).ShouldNot(gomega.Succeed())
}

func TestValidAutoscalerMetricsType(t *testing.T) {
g := gomega.NewGomegaWithT(t)
sr := makeTestRawServingRuntime()
sr.ObjectMeta.Annotations[constants.AutoscalerMetrics] = "memory"
g.Expect(validateHPAMetrics(constants.AutoscalerMetricsType("memory"))).Should(gomega.Succeed())
}

func TestInvalidAutoscalerMetricsType(t *testing.T) {
g := gomega.NewGomegaWithT(t)
sr := makeTestRawServingRuntime()
sr.ObjectMeta.Annotations[constants.AutoscalerMetrics] = "conccurrency"
g.Expect(validateHPAMetrics(constants.AutoscalerMetricsType("conccurrency"))).ShouldNot(gomega.Succeed())
}

2 changes: 1 addition & 1 deletion config/certmanager/certificate.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,4 @@ spec:
issuerRef:
kind: Issuer
name: selfsigned-issuer
secretName: webhook-server-cert # this secret will not be prefixed, since it's not managed by kustomize
secretName: modelmesh-webhook-server-cert # this secret will not be prefixed, since it's not managed by kustomize
Loading

0 comments on commit 80f72ce

Please sign in to comment.