Skip to content

Commit

Permalink
Add batch-scheduler option, deprecate enable-batch-scheduler option (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
yangwwei committed Sep 8, 2024
1 parent 13eb7b2 commit e1edb4c
Show file tree
Hide file tree
Showing 10 changed files with 377 additions and 81 deletions.
5 changes: 5 additions & 0 deletions helm-chart/kuberay-operator/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,14 @@ spec:
args:
{{- $argList := list -}}
{{- $argList = append $argList (include "kuberay.featureGates" . | trim) -}}
{{- if .Values.batchScheduler -}}
{{- if .Values.batchScheduler.enabled -}}
{{- $argList = append $argList "--enable-batch-scheduler" -}}
{{- end -}}
{{- if .Values.batchScheduler.name -}}
{{- $argList = append $argList (printf "--batch-scheduler=%s" .Values.batchScheduler.name) -}}
{{- end -}}
{{- end -}}
{{- $watchNamespace := "" -}}
{{- if and .Values.singleNamespaceInstall (not .Values.watchNamespace) -}}
{{- $watchNamespace = .Release.Namespace -}}
Expand Down
25 changes: 25 additions & 0 deletions helm-chart/kuberay-operator/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,33 @@ readinessProbe:
periodSeconds: 5
failureThreshold: 5

# Enable customized Kubernetes scheduler integration. If enabled, Ray workloads will be scheduled
# by the customized scheduler.
# * "enabled" is the legacy option and will be deprecated soon.
# * "name" is the standard option, expecting a scheduler name, supported values are
# "default", "volcano", and "yunikorn".
#
# Examples:
# 1. Use volcano (deprecated)
# batchScheduler:
# enabled: true
#
# 2. Use volcano
# batchScheduler:
# name: volcano
#
# 3. Use yunikorn
# batchScheduler:
# name: yunikorn
#
batchScheduler:
# Deprecated. This option will be removed in the future.
# Note, for backwards compatibility. When it sets to true, it enables volcano scheduler integration.
enabled: false
# Name of the scheduler, currently supported "default", "volcano" and "yunikorn",
# set the customized scheduler name, e.g "volcano" or "yunikorn", do not set
# "batchScheduler.enabled=true" at the same time as it will override this option.
name: default

featureGates:
- name: RayClusterStatusConditions
Expand Down
34 changes: 34 additions & 0 deletions ray-operator/apis/config/v1alpha1/config_utils.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package v1alpha1

import (
"fmt"

"github.com/go-logr/logr"

"github.com/ray-project/kuberay/ray-operator/controllers/ray/batchscheduler/volcano"
"github.com/ray-project/kuberay/ray-operator/controllers/ray/batchscheduler/yunikorn"
)

func ValidateBatchSchedulerConfig(logger logr.Logger, config Configuration) error {
if config.EnableBatchScheduler {
logger.Info("Feature flag enable-batch-scheduler is deprecated and will not be supported soon. " +
"Use batch-scheduler instead. ")
return nil
}

if len(config.BatchScheduler) > 0 {
// default option, no-opt.
if config.BatchScheduler == "default" {
return nil
}

// if a customized scheduler is configured, check it is supported
if config.BatchScheduler == volcano.GetPluginName() || config.BatchScheduler == yunikorn.GetPluginName() {
logger.Info("Feature flag batch-scheduler is enabled",
"scheduler name", config.BatchScheduler)
} else {
return fmt.Errorf("scheduler is not supported, name=%s", config.BatchScheduler)
}
}
return nil
}
84 changes: 84 additions & 0 deletions ray-operator/apis/config/v1alpha1/config_utils_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
package v1alpha1

import (
"testing"

"github.com/go-logr/logr"
"github.com/go-logr/logr/testr"

"github.com/ray-project/kuberay/ray-operator/controllers/ray/batchscheduler/volcano"
"github.com/ray-project/kuberay/ray-operator/controllers/ray/batchscheduler/yunikorn"
)

func TestValidateBatchSchedulerConfig(t *testing.T) {
type args struct {
logger logr.Logger
config Configuration
}
tests := []struct {
name string
args args
wantErr bool
}{
{
name: "legacy option, enable-batch-scheduler=false",
args: args{
logger: testr.New(t),
config: Configuration{
EnableBatchScheduler: false,
},
},
wantErr: false,
},
{
name: "legacy option, enable-batch-scheduler=true",
args: args{
logger: testr.New(t),
config: Configuration{
EnableBatchScheduler: true,
},
},
wantErr: false,
},
{
name: "valid option, batch-scheduler=yunikorn",
args: args{
logger: testr.New(t),
config: Configuration{
BatchScheduler: yunikorn.GetPluginName(),
},
},
wantErr: false,
},
{
name: "valid option, batch-scheduler=volcano",
args: args{
logger: testr.New(t),
config: Configuration{
BatchScheduler: volcano.GetPluginName(),
},
},
wantErr: false,
},
{
name: "invalid option, invalid scheduler name",
args: args{
logger: testr.New(t),
config: Configuration{
EnableBatchScheduler: false,
BatchScheduler: "unknown-scheduler-name",
},
},
wantErr: true,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Logf(tt.name)
if err := ValidateBatchSchedulerConfig(tt.args.logger, tt.args.config); (err != nil) != tt.wantErr {
t.Errorf("ValidateBatchSchedulerConfig() error = %v, wantErr %v", err, tt.wantErr)
}
})
}
}
4 changes: 4 additions & 0 deletions ray-operator/apis/config/v1alpha1/configuration_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ type Configuration struct {
// Defaults to `json` if empty.
LogStdoutEncoder string `json:"logStdoutEncoder,omitempty"`

// BatchScheduler enables the batch scheduler integration with a specific scheduler
// based on the given name, currently, supported values are volcano and yunikorn.
BatchScheduler string `json:"batchScheduler,omitempty"`

// HeadSidecarContainers includes specification for a sidecar container
// to inject into every Head pod.
HeadSidecarContainers []corev1.Container `json:"headSidecarContainers,omitempty"`
Expand Down
121 changes: 60 additions & 61 deletions ray-operator/controllers/ray/batchscheduler/schedulermanager.go
Original file line number Diff line number Diff line change
@@ -1,93 +1,92 @@
package batchscheduler

import (
"fmt"
"sync"

"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/rest"
"sigs.k8s.io/controller-runtime/pkg/builder"

rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1"
schedulerinterface "github.com/ray-project/kuberay/ray-operator/controllers/ray/batchscheduler/interface"
configapi "github.com/ray-project/kuberay/ray-operator/apis/config/v1alpha1"
"github.com/ray-project/kuberay/ray-operator/controllers/ray/batchscheduler/volcano"
"github.com/ray-project/kuberay/ray-operator/controllers/ray/batchscheduler/yunikorn"

"k8s.io/client-go/rest"

rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1"
schedulerinterface "github.com/ray-project/kuberay/ray-operator/controllers/ray/batchscheduler/interface"
"github.com/ray-project/kuberay/ray-operator/controllers/ray/utils"
)

var schedulerContainers = map[string]schedulerinterface.BatchSchedulerFactory{
schedulerinterface.GetDefaultPluginName(): &schedulerinterface.DefaultBatchSchedulerFactory{},
volcano.GetPluginName(): &volcano.VolcanoBatchSchedulerFactory{},
yunikorn.GetPluginName(): &yunikorn.YuniKornSchedulerFactory{},
}

func GetRegisteredNames() []string {
var pluginNames []string
for key := range schedulerContainers {
pluginNames = append(pluginNames, key)
}
return pluginNames
type SchedulerManager struct {
config *rest.Config
factory schedulerinterface.BatchSchedulerFactory
scheduler schedulerinterface.BatchScheduler
rayConfigs configapi.Configuration
sync.Mutex
}

func ConfigureReconciler(b *builder.Builder) *builder.Builder {
for _, factory := range schedulerContainers {
b = factory.ConfigureReconciler(b)
// NewSchedulerManager maintains a specific scheduler plugin based on config
func NewSchedulerManager(rayConfigs configapi.Configuration, config *rest.Config) (*SchedulerManager, error) {
// init the scheduler factory from config
factory := getSchedulerFactory(rayConfigs)
scheduler, err := factory.New(config)
if err != nil {
return nil, err
}
return b
}

func AddToScheme(scheme *runtime.Scheme) {
for _, factory := range schedulerContainers {
factory.AddToScheme(scheme)
manager := SchedulerManager{
rayConfigs: rayConfigs,
config: config,
factory: factory,
scheduler: scheduler,
}
}

type SchedulerManager struct {
config *rest.Config
plugins map[string]schedulerinterface.BatchScheduler
sync.Mutex
return &manager, nil
}

func NewSchedulerManager(config *rest.Config) *SchedulerManager {
manager := SchedulerManager{
config: config,
plugins: make(map[string]schedulerinterface.BatchScheduler),
func getSchedulerFactory(rayConfigs configapi.Configuration) schedulerinterface.BatchSchedulerFactory {
var factory schedulerinterface.BatchSchedulerFactory
// init with the default factory
factory = &schedulerinterface.DefaultBatchSchedulerFactory{}
// when a batch scheduler name is provided
if len(rayConfigs.BatchScheduler) > 0 {
switch rayConfigs.BatchScheduler {
case volcano.GetPluginName():
factory = &volcano.VolcanoBatchSchedulerFactory{}
case yunikorn.GetPluginName():
factory = &yunikorn.YuniKornSchedulerFactory{}
default:
factory = &schedulerinterface.DefaultBatchSchedulerFactory{}
}
}
return &manager
}

func (batch *SchedulerManager) GetSchedulerForCluster(app *rayv1.RayCluster) (schedulerinterface.BatchScheduler, error) {
if schedulerName, ok := app.ObjectMeta.Labels[utils.RaySchedulerName]; ok {
return batch.GetScheduler(schedulerName)
// legacy option, if this is enabled, register volcano
// this is for backwards compatibility
if rayConfigs.EnableBatchScheduler {
factory = &volcano.VolcanoBatchSchedulerFactory{}
}

// no scheduler provided
return &schedulerinterface.DefaultBatchScheduler{}, nil
return factory
}

func (batch *SchedulerManager) GetScheduler(schedulerName string) (schedulerinterface.BatchScheduler, error) {
factory, registered := schedulerContainers[schedulerName]
if !registered {
return nil, fmt.Errorf("unregistered scheduler plugin %s", schedulerName)
func (batch *SchedulerManager) GetSchedulerForCluster(app *rayv1.RayCluster) (schedulerinterface.BatchScheduler, error) {
// for backwards compatibility
if batch.rayConfigs.EnableBatchScheduler {
if schedulerName, ok := app.ObjectMeta.Labels[utils.RaySchedulerName]; ok {
if schedulerName == volcano.GetPluginName() {
return batch.scheduler, nil
}
}
}

batch.Lock()
defer batch.Unlock()
return batch.scheduler, nil
}

plugin, existed := batch.plugins[schedulerName]
func (batch *SchedulerManager) ConfigureReconciler(b *builder.Builder) *builder.Builder {
batch.factory.ConfigureReconciler(b)
return b
}

if existed && plugin != nil {
return plugin, nil
}
if existed && plugin == nil {
return nil, fmt.Errorf(
"failed to get scheduler plugin %s, previous initialization has failed", schedulerName)
}
plugin, err := factory.New(batch.config)
if err != nil {
batch.plugins[schedulerName] = nil
return nil, err
}
batch.plugins[schedulerName] = plugin
return plugin, nil
func (batch *SchedulerManager) AddToScheme(scheme *runtime.Scheme) {
batch.factory.AddToScheme(scheme)
}
Loading

0 comments on commit e1edb4c

Please sign in to comment.