Skip to content

Commit

Permalink
add HPA feature/unit/fvt/docs/script
Browse files Browse the repository at this point in the history
Update run-fvt.yaml to support 1.25 kubernetes(hpa v2)

Signed-off-by: jooho <jlee@redhat.com>
  • Loading branch information
Jooho committed Mar 10, 2023
1 parent f794ffc commit acca297
Show file tree
Hide file tree
Showing 40 changed files with 1,914 additions and 77 deletions.
22 changes: 13 additions & 9 deletions .github/workflows/run-fvt.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,16 @@ jobs:
- uses: actions/checkout@v2
- uses: actions/setup-go@v2
with:
go-version: '1.18.7'
- name: Setup Minikube
run: |
wget --no-verbose https://github.com/kubernetes/minikube/releases/download/v1.25.1/minikube-linux-amd64
sudo cp minikube-linux-amd64 /usr/local/bin/minikube
sudo chmod 755 /usr/local/bin/minikube
sudo apt-get install -y conntrack socat
minikube start --driver=none --kubernetes-version v1.22.10
go-version: '1.18.7'
- name: Start Minikube
uses: medyagh/setup-minikube@latest
id: minikube
with:
minikube-version: 1.27.1
container-runtime: docker
kubernetes-version: v1.25.2
cpus: max
memory: max
- name: Check pods
run: |
sleep 30
Expand Down Expand Up @@ -55,6 +57,7 @@ jobs:
echo -e '\n disabled: true' >> config/runtimes/torchserve-0.x.yaml
- name: Build Controller image
run: |
eval $(minikube -p minikube docker-env)
make build.develop
./scripts/build_docker.sh --target runtime --tag ${{ env.IMAGE_TAG }}
- name: Install ModelMesh Serving
Expand All @@ -69,6 +72,7 @@ jobs:
df -h
- name: Pre-pull runtime images
run: |
eval $(minikube -p minikube docker-env)
docker pull nvcr.io/nvidia/tritonserver:21.06.1-py3
docker pull seldonio/mlserver:0.5.2
docker pull openvino/model_server:2022.1
Expand All @@ -86,4 +90,4 @@ jobs:
export PATH=/root/go/bin/:$PATH
export NAMESPACE=modelmesh-serving
export NAMESPACESCOPEMODE=false
make fvt
make fvt
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

public/
target/
vendor/

# Binaries for programs and plugins
*.exe
Expand Down Expand Up @@ -32,4 +33,4 @@ bin
# Modelmesh development related artifacts
devbuild
.develop_image_name
.dev/
.dev/
11 changes: 10 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ test:

# Run fvt tests. This requires an etcd, kubernetes connection, and model serving installation. Ginkgo CLI is used to run them in parallel
fvt:
ginkgo -v -p -progress --fail-fast fvt/predictor fvt/scaleToZero --timeout=40m
ginkgo -v -p -progress --fail-fast fvt/predictor fvt/scaleToZero fvt/hpa --timeout=50m

# Command to regenerate the grpc go files from the proto files
fvt-protoc:
Expand Down Expand Up @@ -87,6 +87,15 @@ deploy-release:
deploy-release-dev-mode:
./scripts/install.sh --namespace ${NAMESPACE} --install-config-path config --dev-mode-logging

deploy-release-dev-mode-fvt:
ifdef MODELMESH_SERVING_IMAGE
$(eval extra_options += --modelmesh-serving-image ${MODELMESH_SERVING_IMAGE})
endif
ifdef NAMESPACE_SCOPE_MODE
$(eval extra_options += --namespace-scope-mode)
endif
./scripts/install.sh --namespace ${NAMESPACE} --install-config-path config --dev-mode-logging --fvt ${extra_options}

delete: oc-login
./scripts/delete.sh --namespace ${NAMESPACE} --local-config-path config

Expand Down
189 changes: 189 additions & 0 deletions apis/serving/v1alpha1/servingruntime_webhook.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
/*
Copyright 2021 IBM Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package v1alpha1

import (
"context"
"fmt"
"net/http"
"strconv"

kservev1alpha "github.com/kserve/kserve/pkg/apis/serving/v1alpha1"
"github.com/kserve/kserve/pkg/constants"
"github.com/kserve/modelmesh-serving/controllers/autoscaler"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
)

//+kubebuilder:webhook:path=/validate-serving-modelmesh-io-v1alpha1-servingruntime,mutating=false,failurePolicy=fail,sideEffects=None,groups=serving.kserve.io,resources=servingruntimes;clusterservingruntimes,verbs=create;update,versions=v1alpha1,name=servingruntime.modelmesh-webhook-server.default,admissionReviewVersions=v1
type ServingRuntimeWebhook struct {
Client client.Client
decoder *admission.Decoder
}

func (s *ServingRuntimeWebhook) Handle(ctx context.Context, req admission.Request) admission.Response {
var srAnnotations map[string]string

if req.Kind.Kind == "ServingRuntime" {
servingRuntime := &kservev1alpha.ServingRuntime{}
err := s.decoder.Decode(req, servingRuntime)
if err != nil {
return admission.Errored(http.StatusBadRequest, err)
}
srAnnotations = servingRuntime.ObjectMeta.Annotations
} else {
clusterServingRuntime := &kservev1alpha.ClusterServingRuntime{}
err := s.decoder.Decode(req, clusterServingRuntime)
if err != nil {
return admission.Errored(http.StatusBadRequest, err)
}
srAnnotations = clusterServingRuntime.ObjectMeta.Annotations
}

if err := validateServingRuntimeAutoscaler(srAnnotations); err != nil {
return admission.Denied(err.Error())
}

if err := validateAutoscalerTargetUtilizationPercentage(srAnnotations); err != nil {
return admission.Denied(err.Error())
}

if err := validateAutoScalingReplicas(srAnnotations); err != nil {
return admission.Denied(err.Error())
}

return admission.Allowed("Passed all validation checks for ServingRuntime")
}

// InjectDecoder injects the decoder.
func (s *ServingRuntimeWebhook) InjectDecoder(d *admission.Decoder) error {
s.decoder = d
return nil
}

// Validation of servingruntime autoscaler class
func validateServingRuntimeAutoscaler(annotations map[string]string) error {
value, ok := annotations[constants.AutoscalerClass]
class := constants.AutoscalerClassType(value)
if ok {
for _, item := range constants.AutoscalerAllowedClassList {
if class == item {
switch class {
case constants.AutoscalerClassHPA:
if metric, ok := annotations[constants.AutoscalerMetrics]; ok {
return validateHPAMetrics(constants.AutoscalerMetricsType(metric))
} else {
return nil
}
default:
return fmt.Errorf("unknown autoscaler class [%s]", class)
}
}
}
return fmt.Errorf("[%s] is not a supported autoscaler class type.\n", value)
}

return nil
}

// Validate of autoscaler targetUtilizationPercentage
func validateAutoscalerTargetUtilizationPercentage(annotations map[string]string) error {
if value, ok := annotations[constants.TargetUtilizationPercentage]; ok {
t, err := strconv.Atoi(value)
if err != nil {
return fmt.Errorf("The target utilization percentage should be a [1-100] integer.")
} else {
if t < 1 || t > 100 {
return fmt.Errorf("The target utilization percentage should be a [1-100] integer.")
}
}
}

return nil
}

// Validate scaling options
func validateAutoScalingReplicas(annotations map[string]string) error {
autoscalerClassType := autoscaler.AutoscalerClassNone
if value, ok := annotations[constants.AutoscalerClass]; ok {
autoscalerClassType = value
}

switch autoscalerClassType {
case string(constants.AutoscalerClassHPA):
return validateScalingHPA(annotations)
default:
return nil
}
}

func validateScalingHPA(annotations map[string]string) error {
metric := constants.AutoScalerMetricsCPU
if value, ok := annotations[constants.AutoscalerMetrics]; ok {
metric = constants.AutoscalerMetricsType(value)
}

minReplicas := 1
if value, ok := annotations[constants.MinScaleAnnotationKey]; ok {
if valueInt, err := strconv.Atoi(value); err != nil {
return fmt.Errorf("The min replicas should be a integer.")
} else if valueInt < 1 {
return fmt.Errorf("The min replicas should be more than 0")
} else {
minReplicas = valueInt
}
}

maxReplicas := 1
if value, ok := annotations[constants.MaxScaleAnnotationKey]; ok {
if valueInt, err := strconv.Atoi(value); err != nil {
return fmt.Errorf("The max replicas should be a integer.")
} else {
maxReplicas = valueInt
}
}

if minReplicas > maxReplicas {
return fmt.Errorf("The max replicas should be same or bigger than min replicas.")
}

err := validateHPAMetrics(metric)
if err != nil {
return err
}

if value, ok := annotations[constants.TargetUtilizationPercentage]; ok {
t, err := strconv.Atoi(value)
if err != nil {
return fmt.Errorf("The target utilization percentage should be a [1-100] integer.")
} else if metric == constants.AutoScalerMetricsMemory && t < 1 {
return fmt.Errorf("The target memory should be greater than 1 MiB")
}
}

return nil
}

// Validate of autoscaler HPA metrics
func validateHPAMetrics(metric constants.AutoscalerMetricsType) error {
for _, item := range constants.AutoscalerAllowedMetricsList {
if item == metric {
return nil
}
}
return fmt.Errorf("[%s] is not a supported metric.\n", metric)

}
100 changes: 100 additions & 0 deletions apis/serving/v1alpha1/servingruntime_webhook_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
/*
Copyright 2021 IBM Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package v1alpha1

import (
// "fmt"
"testing"

"github.com/onsi/gomega"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

kservev1alpha "github.com/kserve/kserve/pkg/apis/serving/v1alpha1"
"github.com/kserve/kserve/pkg/constants"
)

func makeTestRawServingRuntime() kservev1alpha.ServingRuntime {
servingRuntime := kservev1alpha.ServingRuntime{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
Namespace: "default",
Annotations: map[string]string{
"serving.kserve.io/autoscalerClass": "hpa",
"serving.kserve.io/metrics": "cpu",
"serving.kserve.io/targetUtilizationPercentage": "75",
"autoscaling.knative.dev/min-scale": "2",
"autoscaling.knative.dev/max-scale": "3",
},
},
}

return servingRuntime
}

func TestValidAutoscalerTypeAndHPAMetrics(t *testing.T) {
g := gomega.NewGomegaWithT(t)
sr := makeTestRawServingRuntime()
g.Expect(validateServingRuntimeAutoscaler(sr.Annotations)).Should(gomega.Succeed())
}
func TestInvalidAutoscalerClassType(t *testing.T) {
g := gomega.NewGomegaWithT(t)
sr := makeTestRawServingRuntime()
sr.ObjectMeta.Annotations[constants.AutoscalerClass] = "test"
g.Expect(validateServingRuntimeAutoscaler(sr.Annotations)).ShouldNot(gomega.Succeed())
}

func TestInvalidAutoscalerTargetUtilizationPercentageLowValue(t *testing.T) {
g := gomega.NewGomegaWithT(t)
sr := makeTestRawServingRuntime()
sr.ObjectMeta.Annotations[constants.TargetUtilizationPercentage] = "-1"
g.Expect(validateAutoscalerTargetUtilizationPercentage(sr.Annotations)).ShouldNot(gomega.Succeed())
}

func TestInvalidAutoscalerTargetUtilizationPercentageHighValue(t *testing.T) {
g := gomega.NewGomegaWithT(t)
sr := makeTestRawServingRuntime()
sr.ObjectMeta.Annotations[constants.TargetUtilizationPercentage] = "101"
g.Expect(validateAutoscalerTargetUtilizationPercentage(sr.Annotations)).ShouldNot(gomega.Succeed())
}

func TestInvalidAutoscalerLowMinReplicas(t *testing.T) {
g := gomega.NewGomegaWithT(t)
sr := makeTestRawServingRuntime()
sr.ObjectMeta.Annotations[constants.MinScaleAnnotationKey] = "0"
g.Expect(validateScalingHPA(sr.Annotations)).ShouldNot(gomega.Succeed())
}

func TestInvalidAutoscalerMaxReplicasMustBiggerThanMixReplicas(t *testing.T) {
g := gomega.NewGomegaWithT(t)
sr := makeTestRawServingRuntime()
sr.ObjectMeta.Annotations[constants.MinScaleAnnotationKey] = "4"
sr.ObjectMeta.Annotations[constants.MaxScaleAnnotationKey] = "3"
g.Expect(validateAutoScalingReplicas(sr.Annotations)).ShouldNot(gomega.Succeed())
}

func TestValidAutoscalerMetricsType(t *testing.T) {
g := gomega.NewGomegaWithT(t)
sr := makeTestRawServingRuntime()
sr.ObjectMeta.Annotations[constants.AutoscalerMetrics] = "memory"
g.Expect(validateHPAMetrics(constants.AutoscalerMetricsType("memory"))).Should(gomega.Succeed())
}

func TestInvalidAutoscalerMetricsType(t *testing.T) {
g := gomega.NewGomegaWithT(t)
sr := makeTestRawServingRuntime()
sr.ObjectMeta.Annotations[constants.AutoscalerMetrics] = "conccurrency"
g.Expect(validateHPAMetrics(constants.AutoscalerMetricsType("conccurrency"))).ShouldNot(gomega.Succeed())
}
8 changes: 4 additions & 4 deletions config/certmanager/certificate.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,11 @@ metadata:
name: serving-cert # this name should match the one appeared in kustomizeconfig.yaml
namespace: system
spec:
# $(SERVICE_NAME) and $(SERVICE_NAMESPACE) will be substituted by kustomize
# SERVICE_NAME_PLACEHOLDER and SERVICE_NAMESPACE_PLACEHOLDER will be substituted by kustomize
dnsNames:
- $(SERVICE_NAME).$(SERVICE_NAMESPACE).svc
- $(SERVICE_NAME).$(SERVICE_NAMESPACE).svc.cluster.local
- SERVICE_NAME_PLACEHOLDER.SERVICE_NAMESPACE_PLACEHOLDER.svc
- SERVICE_NAME_PLACEHOLDER.SERVICE_NAMESPACE_PLACEHOLDER.svc.cluster.local
issuerRef:
kind: Issuer
name: selfsigned-issuer
secretName: webhook-server-cert # this secret will not be prefixed, since it's not managed by kustomize
secretName: modelmesh-webhook-server-cert # this secret will not be prefixed, since it's not managed by kustomize
Loading

0 comments on commit acca297

Please sign in to comment.