Skip to content

Commit

Permalink
feat: Enable Horizontal Pod Autoscaling for ServingRuntime/ClusterSer…
Browse files Browse the repository at this point in the history
…vingRuntime (#342)

Enable Horizontal Pod Autoscaling for ServingRuntime/ClusterServingRuntime
by adding annotation `serving.kserve.io/autoscalerClass: hpa`

- Add auto-scaling, HPA controller 
- Add ServingRuntime Webhook
- Update deployment manifests
- Add script to generate self-signed certificate
- Add option to enable self-signed certificate to install script
- Add deploy-release-dev-mode-fvt target to Makefile
- Add FVT and unit tests
- Upgrade FVT minikube version from 1.25 to 1.27
- Enabe FVT deployment on OpenShift (etcd --data-dir)
- Update Docs

Resolves #329

Signed-off-by: Jooho Lee <ljhiyh@gmail.com>
  • Loading branch information
Jooho authored Apr 27, 2023
1 parent e9d9366 commit dd0229f
Show file tree
Hide file tree
Showing 49 changed files with 1,994 additions and 120 deletions.
22 changes: 14 additions & 8 deletions .github/workflows/run-fvt.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,16 @@ jobs:
- uses: actions/checkout@v2
- uses: actions/setup-go@v2
with:
go-version: '1.18.7'
- name: Setup Minikube
run: |
wget --no-verbose https://github.com/kubernetes/minikube/releases/download/v1.25.1/minikube-linux-amd64
sudo cp minikube-linux-amd64 /usr/local/bin/minikube
sudo chmod 755 /usr/local/bin/minikube
sudo apt-get install -y conntrack socat
minikube start --driver=none --kubernetes-version v1.22.10
go-version: '1.18.7'
- name: Start Minikube
uses: medyagh/setup-minikube@v0.0.11
id: minikube
with:
minikube-version: 1.27.1
container-runtime: docker
kubernetes-version: v1.25.2
cpus: max
memory: max
- name: Check pods
run: |
sleep 30
Expand Down Expand Up @@ -55,6 +57,7 @@ jobs:
echo -e '\n disabled: true' >> config/runtimes/torchserve-0.x.yaml
- name: Build Controller image
run: |
eval $(minikube -p minikube docker-env)
make build.develop
./scripts/build_docker.sh --target runtime --tag ${{ env.IMAGE_TAG }}
- name: Install ModelMesh Serving
Expand All @@ -63,12 +66,14 @@ jobs:
./scripts/install.sh --namespace modelmesh-serving --fvt --dev-mode-logging
- name: Free up disk space
run: |
eval $(minikube -p minikube docker-env)
echo "Pruning images"
docker image prune -a -f
docker system df
df -h
- name: Pre-pull runtime images
run: |
eval $(minikube -p minikube docker-env)
docker pull nvcr.io/nvidia/tritonserver:21.06.1-py3
docker pull seldonio/mlserver:0.5.2
docker pull openvino/model_server:2022.2
Expand All @@ -78,6 +83,7 @@ jobs:
docker pull kserve/modelmesh
- name: Check installation
run: |
eval $(minikube -p minikube docker-env)
docker images
kubectl get pods
kubectl get clusterservingruntimes
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

public/
target/
vendor/

# Binaries for programs and plugins
*.exe
Expand Down
1 change: 1 addition & 0 deletions Dockerfile.develop
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ RUN microdnf install \
tar \
vim \
git \
jq \
python38 \
nodejs && \
pip3 install pre-commit && \
Expand Down
12 changes: 11 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ test:

# Run fvt tests. This requires an etcd, kubernetes connection, and model serving installation. Ginkgo CLI is used to run them in parallel
fvt:
ginkgo -v -procs=2 --progress --fail-fast fvt/predictor fvt/scaleToZero fvt/storage --timeout=50m
ginkgo -v -procs=2 --progress --fail-fast fvt/predictor fvt/scaleToZero fvt/storage fvt/hpa --timeout=50m


# Command to regenerate the grpc go files from the proto files
fvt-protoc:
Expand Down Expand Up @@ -87,6 +88,15 @@ deploy-release:
deploy-release-dev-mode:
./scripts/install.sh --namespace ${NAMESPACE} --install-config-path config --dev-mode-logging

deploy-release-dev-mode-fvt:
ifdef MODELMESH_SERVING_IMAGE
$(eval extra_options += --modelmesh-serving-image ${MODELMESH_SERVING_IMAGE})
endif
ifdef NAMESPACE_SCOPE_MODE
$(eval extra_options += --namespace-scope-mode)
endif
./scripts/install.sh --namespace ${NAMESPACE} --install-config-path config --dev-mode-logging --fvt ${extra_options}

delete: oc-login
./scripts/delete.sh --namespace ${NAMESPACE} --local-config-path config

Expand Down
216 changes: 216 additions & 0 deletions apis/serving/v1alpha1/servingruntime_webhook.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
//Copyright 2021 IBM Corporation
//
//Licensed under the Apache License, Version 2.0 (the "License");
//you may not use this file except in compliance with the License.
//You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
//Unless required by applicable law or agreed to in writing, software
//distributed under the License is distributed on an "AS IS" BASIS,
//WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//See the License for the specific language governing permissions and
//limitations under the License.
//
package v1alpha1

import (
"context"
"fmt"
"math"
"net/http"
"strconv"

kservev1alpha "github.com/kserve/kserve/pkg/apis/serving/v1alpha1"
"github.com/kserve/kserve/pkg/constants"
"github.com/kserve/modelmesh-serving/controllers/autoscaler"
mmcontstant "github.com/kserve/modelmesh-serving/pkg/constants"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
)

//+kubebuilder:webhook:path=/validate-serving-modelmesh-io-v1alpha1-servingruntime,mutating=false,failurePolicy=fail,sideEffects=None,groups=serving.kserve.io,resources=servingruntimes;clusterservingruntimes,verbs=create;update,versions=v1alpha1,name=servingruntime.modelmesh-webhook-server.default,admissionReviewVersions=v1
type ServingRuntimeWebhook struct {
Client client.Client
decoder *admission.Decoder
}

func (s *ServingRuntimeWebhook) Handle(ctx context.Context, req admission.Request) admission.Response {
var srAnnotations map[string]string
srReplicas := uint16(math.MaxUint16)
multiModel := false

if req.Kind.Kind == "ServingRuntime" {
servingRuntime := &kservev1alpha.ServingRuntime{}
err := s.decoder.Decode(req, servingRuntime)
if err != nil {
return admission.Errored(http.StatusBadRequest, err)
}
srAnnotations = servingRuntime.ObjectMeta.Annotations

if (*servingRuntime).Spec.Replicas != nil {
srReplicas = uint16(*servingRuntime.Spec.Replicas)
}

if (*servingRuntime).Spec.MultiModel != nil {
multiModel = *servingRuntime.Spec.MultiModel
}

} else {
clusterServingRuntime := &kservev1alpha.ClusterServingRuntime{}
err := s.decoder.Decode(req, clusterServingRuntime)
if err != nil {
return admission.Errored(http.StatusBadRequest, err)
}
srAnnotations = clusterServingRuntime.ObjectMeta.Annotations

if (*clusterServingRuntime).Spec.Replicas != nil {
srReplicas = uint16(*clusterServingRuntime.Spec.Replicas)
}

if (*clusterServingRuntime).Spec.MultiModel != nil {
multiModel = *clusterServingRuntime.Spec.MultiModel
}
}

if !multiModel {
return admission.Allowed("Not validating ServingRuntime because it is not ModelMesh compatible")
}

if err := validateServingRuntimeAutoscaler(srAnnotations); err != nil {
return admission.Denied(err.Error())
}

if err := validateAutoscalerTargetUtilizationPercentage(srAnnotations); err != nil {
return admission.Denied(err.Error())
}

if err := validateAutoScalingReplicas(srAnnotations, srReplicas); err != nil {
return admission.Denied(err.Error())
}

return admission.Allowed("Passed all validation checks for ServingRuntime")
}

// InjectDecoder injects the decoder.
func (s *ServingRuntimeWebhook) InjectDecoder(d *admission.Decoder) error {
s.decoder = d
return nil
}

// Validation of servingruntime autoscaler class
func validateServingRuntimeAutoscaler(annotations map[string]string) error {
value, ok := annotations[constants.AutoscalerClass]
class := constants.AutoscalerClassType(value)
if ok {
for _, item := range constants.AutoscalerAllowedClassList {
if class == item {
switch class {
case constants.AutoscalerClassHPA:
if metric, ok := annotations[constants.AutoscalerMetrics]; ok {
return validateHPAMetrics(constants.AutoscalerMetricsType(metric))
} else {
return nil
}
default:
return fmt.Errorf("unknown autoscaler class [%s]", class)
}
}
}
return fmt.Errorf("[%s] is not a supported autoscaler class type.\n", value)
}

return nil
}

// Validate of autoscaler targetUtilizationPercentage
func validateAutoscalerTargetUtilizationPercentage(annotations map[string]string) error {
if value, ok := annotations[constants.TargetUtilizationPercentage]; ok {
t, err := strconv.Atoi(value)
if err != nil {
return fmt.Errorf("The target utilization percentage should be a [1-100] integer.")
} else {
if t < 1 || t > 100 {
return fmt.Errorf("The target utilization percentage should be a [1-100] integer.")
}
}
}

return nil
}

// Validate scaling options
func validateAutoScalingReplicas(annotations map[string]string, srReplicas uint16) error {
autoscalerClassType := autoscaler.AutoscalerClassNone
if value, ok := annotations[constants.AutoscalerClass]; ok {
autoscalerClassType = value
}

switch autoscalerClassType {
case string(constants.AutoscalerClassHPA):
if srReplicas != math.MaxUint16 {
return fmt.Errorf("Autoscaler is enabled and also replicas variable set. You can not set both.")
}
return validateScalingHPA(annotations)
default:
return nil
}
}

func validateScalingHPA(annotations map[string]string) error {
metric := constants.AutoScalerMetricsCPU
if value, ok := annotations[constants.AutoscalerMetrics]; ok {
metric = constants.AutoscalerMetricsType(value)
}

minReplicas := 1
if value, ok := annotations[mmcontstant.MinScaleAnnotationKey]; ok {
if valueInt, err := strconv.Atoi(value); err != nil {
return fmt.Errorf("The min replicas should be a integer.")
} else if valueInt < 1 {
return fmt.Errorf("The min replicas should be more than 0")
} else {
minReplicas = valueInt
}
}

maxReplicas := 1
if value, ok := annotations[mmcontstant.MaxScaleAnnotationKey]; ok {
if valueInt, err := strconv.Atoi(value); err != nil {
return fmt.Errorf("The max replicas should be a integer.")
} else {
maxReplicas = valueInt
}
}

if minReplicas > maxReplicas {
return fmt.Errorf("The max replicas should be same or bigger than min replicas.")
}

err := validateHPAMetrics(metric)
if err != nil {
return err
}

if value, ok := annotations[constants.TargetUtilizationPercentage]; ok {
t, err := strconv.Atoi(value)
if err != nil {
return fmt.Errorf("The target utilization percentage should be a [1-100] integer.")
} else if metric == constants.AutoScalerMetricsMemory && t < 1 {
return fmt.Errorf("The target memory should be greater than 1 MiB")
}
}

return nil
}

// Validate of autoscaler HPA metrics
func validateHPAMetrics(metric constants.AutoscalerMetricsType) error {
for _, item := range constants.AutoscalerAllowedMetricsList {
if item == metric {
return nil
}
}
return fmt.Errorf("[%s] is not a supported metric.\n", metric)

}
Loading

0 comments on commit dd0229f

Please sign in to comment.