Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(api,ui,sdk): Make CPU limits configurable #586

Merged
merged 26 commits into from
Jun 3, 2024
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
03baa77
Add new unit test helper method to sort env vars in isvc specs
deadlycoconuts May 17, 2024
ffd4e9a
Extend merge env vars helper function to merge more than 2 sets of en…
deadlycoconuts May 17, 2024
3c9306b
Add new configs to set default env vars when cpu limits are not speci…
deadlycoconuts May 17, 2024
fb3669d
Add unit tests to test parsing of default env vars when cpu limits ar…
deadlycoconuts May 17, 2024
7fe8aa8
Fix environment service unit tests
deadlycoconuts May 17, 2024
8efd86f
Fix model service deployment unit tests
deadlycoconuts May 17, 2024
7f2e7c4
Add config parsing tests
deadlycoconuts May 17, 2024
c6ff954
Make env var setters return errors that will then be checked
deadlycoconuts May 17, 2024
1d4661d
Add component to make cpu limits configurable in ui
deadlycoconuts May 20, 2024
58a6b84
Update swagger docs and autogenerated openapi files
deadlycoconuts May 20, 2024
b22ab30
Make model page display cpu limits if configured
deadlycoconuts May 20, 2024
9c82fd2
Update SDK to expose cpu limit field
deadlycoconuts May 20, 2024
b69c772
Fix sdk deploy method
deadlycoconuts May 20, 2024
8a49a81
Revert redundant changes to cpu limit of default configs in sdk tests
deadlycoconuts May 20, 2024
42c751d
Fix incorrect change to sdk integration test
deadlycoconuts May 20, 2024
80248a1
Fix integration tests
deadlycoconuts May 20, 2024
ad7d653
Update tool tip and form group descriptions for cpu limit form
deadlycoconuts May 21, 2024
450bd94
Update docs
deadlycoconuts May 21, 2024
eff5126
Refactor default env vars to use a different struct
deadlycoconuts May 23, 2024
a255f43
Add cpu limit regex check
deadlycoconuts May 23, 2024
ac9cb63
Refactor cpu limit as nullable field
deadlycoconuts May 24, 2024
54b2a2d
Add cpu limits form group to transformer step and cleanup code
deadlycoconuts May 24, 2024
4f3e569
Rename cpu limit section
deadlycoconuts May 24, 2024
4d817ba
Add codecov config file that adds a threshold to allow random code co…
deadlycoconuts May 27, 2024
251cb67
Refactor code to determine cpu limit into single helper function
deadlycoconuts May 31, 2024
6f1f13b
Simplify cpu limit description
deadlycoconuts May 31, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions api/client/model_resource_request.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

85 changes: 57 additions & 28 deletions api/cluster/resource/templater.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,20 +194,29 @@ func (t *InferenceServiceTemplater) CreateInferenceServiceSpec(modelService *mod
func (t *InferenceServiceTemplater) createPredictorSpec(modelService *models.Service) (kservev1beta1.PredictorSpec, error) {
envVars := modelService.EnvVars

// Set resource limits to request * userContainerCPULimitRequestFactor or UserContainerMemoryLimitRequestFactor
// Set resource limits to request * userContainerCPULimitRequestFactor or userContainerMemoryLimitRequestFactor
limits := map[corev1.ResourceName]resource.Quantity{}
if t.deploymentConfig.UserContainerCPULimitRequestFactor != 0 {
limits[corev1.ResourceCPU] = ScaleQuantity(
modelService.ResourceRequest.CPURequest, t.deploymentConfig.UserContainerCPULimitRequestFactor,
)
} else {
// TODO: Remove this else-block when KServe finally allows default CPU limits to be removed
var err error
limits[corev1.ResourceCPU], err = resource.ParseQuantity(t.deploymentConfig.UserContainerCPUDefaultLimit)
if err != nil {
return kservev1beta1.PredictorSpec{}, err

// Set cpu resource limits automatically if they have not been set
if modelService.ResourceRequest.CPULimit == nil || modelService.ResourceRequest.CPULimit.IsZero() {
if t.deploymentConfig.UserContainerCPULimitRequestFactor != 0 {
limits[corev1.ResourceCPU] = ScaleQuantity(
modelService.ResourceRequest.CPURequest, t.deploymentConfig.UserContainerCPULimitRequestFactor,
)
} else {
// TODO: Remove this else-block when KServe finally allows default CPU limits to be removed
var err error
limits[corev1.ResourceCPU], err = resource.ParseQuantity(t.deploymentConfig.UserContainerCPUDefaultLimit)
if err != nil {
return kservev1beta1.PredictorSpec{}, err
}
// Set additional env vars to manage concurrency so model performance improves when no CPU limits are set
envVars = models.MergeEnvVars(ParseEnvVars(t.deploymentConfig.DefaultEnvVarsWithoutCPULimits), envVars)
}
} else {
limits[corev1.ResourceCPU] = *modelService.ResourceRequest.CPULimit
}

if t.deploymentConfig.UserContainerMemoryLimitRequestFactor != 0 {
limits[corev1.ResourceMemory] = ScaleQuantity(
modelService.ResourceRequest.MemoryRequest, t.deploymentConfig.UserContainerMemoryLimitRequestFactor,
Expand Down Expand Up @@ -329,7 +338,7 @@ func (t *InferenceServiceTemplater) createPredictorSpec(modelService *models.Ser
// 1. PyFunc default env
// 2. User environment variable
// 3. Default env variable that can be override by user environment
higherPriorityEnvVars := models.MergeEnvVars(modelService.EnvVars, pyfuncDefaultEnv)
higherPriorityEnvVars := models.MergeEnvVars(envVars, pyfuncDefaultEnv)
lowerPriorityEnvVars := models.EnvVars{}
if modelService.Protocol == protocol.UpiV1 {
lowerPriorityEnvVars = append(lowerPriorityEnvVars, models.EnvVar{Name: envGRPCOptions, Value: t.deploymentConfig.PyfuncGRPCOptions})
Expand Down Expand Up @@ -364,7 +373,7 @@ func (t *InferenceServiceTemplater) createPredictorSpec(modelService *models.Ser
}

case models.ModelTypeCustom:
predictorSpec = createCustomPredictorSpec(modelService, resources, nodeSelector, tolerations)
predictorSpec = createCustomPredictorSpec(modelService, envVars, resources, nodeSelector, tolerations)
}

if len(nodeSelector) > 0 {
Expand Down Expand Up @@ -392,28 +401,36 @@ func (t *InferenceServiceTemplater) createTransformerSpec(
modelService *models.Service,
transformer *models.Transformer,
) (*kservev1beta1.TransformerSpec, error) {
envVars := transformer.EnvVars

// Set resource limits to request * userContainerCPULimitRequestFactor or UserContainerMemoryLimitRequestFactor
limits := map[corev1.ResourceName]resource.Quantity{}
if t.deploymentConfig.UserContainerCPULimitRequestFactor != 0 {
limits[corev1.ResourceCPU] = ScaleQuantity(
transformer.ResourceRequest.CPURequest, t.deploymentConfig.UserContainerCPULimitRequestFactor,
)
} else {
// TODO: Remove this else-block when KServe finally allows default CPU limits to be removed
var err error
limits[corev1.ResourceCPU], err = resource.ParseQuantity(t.deploymentConfig.UserContainerCPUDefaultLimit)
if err != nil {
return nil, err
// Set cpu resource limits automatically if they have not been set
if transformer.ResourceRequest.CPULimit == nil || transformer.ResourceRequest.CPULimit.IsZero() {
deadlycoconuts marked this conversation as resolved.
Show resolved Hide resolved
if t.deploymentConfig.UserContainerCPULimitRequestFactor != 0 {
limits[corev1.ResourceCPU] = ScaleQuantity(
transformer.ResourceRequest.CPURequest, t.deploymentConfig.UserContainerCPULimitRequestFactor,
)
} else {
// TODO: Remove this else-block when KServe finally allows default CPU limits to be removed
var err error
limits[corev1.ResourceCPU], err = resource.ParseQuantity(t.deploymentConfig.UserContainerCPUDefaultLimit)
if err != nil {
return nil, err
}
// Set additional env vars to manage concurrency so model performance improves when no CPU limits are set
envVars = models.MergeEnvVars(ParseEnvVars(t.deploymentConfig.DefaultEnvVarsWithoutCPULimits), envVars)
}
} else {
limits[corev1.ResourceCPU] = *transformer.ResourceRequest.CPULimit
}

if t.deploymentConfig.UserContainerMemoryLimitRequestFactor != 0 {
limits[corev1.ResourceMemory] = ScaleQuantity(
transformer.ResourceRequest.MemoryRequest, t.deploymentConfig.UserContainerMemoryLimitRequestFactor,
)
}

envVars := transformer.EnvVars

// Put in defaults if not provided by users (user's input is used)
if transformer.TransformerType == models.StandardTransformerType {
transformer.Image = t.deploymentConfig.StandardTransformer.ImageName
Expand Down Expand Up @@ -780,9 +797,13 @@ func createDefaultPredictorEnvVars(modelService *models.Service) models.EnvVars
return defaultEnvVars
}

func createCustomPredictorSpec(modelService *models.Service, resources corev1.ResourceRequirements, nodeSelector map[string]string, tolerations []corev1.Toleration) kservev1beta1.PredictorSpec {
envVars := modelService.EnvVars

func createCustomPredictorSpec(
modelService *models.Service,
envVars models.EnvVars,
resources corev1.ResourceRequirements,
nodeSelector map[string]string,
tolerations []corev1.Toleration,
) kservev1beta1.PredictorSpec {
// Add default env var (Overwrite by user not allowed)
defaultEnvVar := createDefaultPredictorEnvVars(modelService)
envVars = models.MergeEnvVars(envVars, defaultEnvVar)
Expand Down Expand Up @@ -910,3 +931,11 @@ func (t *InferenceServiceTemplater) applyDefaults(service *models.Service) {
}
}
}

func ParseEnvVars(envVars []corev1.EnvVar) models.EnvVars {
var parsedEnvVars models.EnvVars
for _, envVar := range envVars {
parsedEnvVars = append(parsedEnvVars, models.EnvVar{Name: envVar.Name, Value: envVar.Value})
}
return parsedEnvVars
deadlycoconuts marked this conversation as resolved.
Show resolved Hide resolved
}
2 changes: 1 addition & 1 deletion api/cluster/resource/templater_gpu_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ var (
"nvidia.com/gpu": resource.MustParse("1"),
},
Limits: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("8"),
corev1.ResourceCPU: resource.MustParse("10"),
corev1.ResourceMemory: ScaleQuantity(defaultModelResourceRequests.MemoryRequest, 2),
"nvidia.com/gpu": resource.MustParse("1"),
},
Expand Down
Loading