diff --git a/helm-chart/kuberay-operator/values.yaml b/helm-chart/kuberay-operator/values.yaml index 75a343c681..6a06b4714a 100644 --- a/helm-chart/kuberay-operator/values.yaml +++ b/helm-chart/kuberay-operator/values.yaml @@ -78,10 +78,9 @@ batchScheduler: # Deprecated. This option will be removed in the future. # Note, for backwards compatibility. When it sets to true, it enables volcano scheduler integration. enabled: false - # Name of the scheduler, currently supported "default", "volcano" and "yunikorn", - # set the customized scheduler name, e.g "volcano" or "yunikorn", do not set + # Set the customized scheduler name, supported values are "volcano" or "yunikorn", do not set # "batchScheduler.enabled=true" at the same time as it will override this option. - name: default + name: "" featureGates: - name: RayClusterStatusConditions diff --git a/ray-operator/apis/config/v1alpha1/config_utils.go b/ray-operator/apis/config/v1alpha1/config_utils.go index 4ef3ecf303..6ad6c7887c 100644 --- a/ray-operator/apis/config/v1alpha1/config_utils.go +++ b/ray-operator/apis/config/v1alpha1/config_utils.go @@ -17,11 +17,6 @@ func ValidateBatchSchedulerConfig(logger logr.Logger, config Configuration) erro } if len(config.BatchScheduler) > 0 { - // default option, no-opt. - if config.BatchScheduler == "default" { - return nil - } - // if a customized scheduler is configured, check it is supported if config.BatchScheduler == volcano.GetPluginName() || config.BatchScheduler == yunikorn.GetPluginName() { logger.Info("Feature flag batch-scheduler is enabled", @@ -30,5 +25,6 @@ func ValidateBatchSchedulerConfig(logger logr.Logger, config Configuration) erro return fmt.Errorf("scheduler is not supported, name=%s", config.BatchScheduler) } } + return nil } diff --git a/ray-operator/apis/config/v1alpha1/config_utils_test.go b/ray-operator/apis/config/v1alpha1/config_utils_test.go index 062a192898..41bdf50391 100644 --- a/ray-operator/apis/config/v1alpha1/config_utils_test.go +++ b/ray-operator/apis/config/v1alpha1/config_utils_test.go @@ -71,6 +71,17 @@ func TestValidateBatchSchedulerConfig(t *testing.T) { }, wantErr: true, }, + { + name: "invalid option, invalid scheduler name default", + args: args{ + logger: testr.New(t), + config: Configuration{ + EnableBatchScheduler: false, + BatchScheduler: "default", + }, + }, + wantErr: true, + }, } for _, tt := range tests { diff --git a/ray-operator/controllers/ray/batchscheduler/schedulermanager.go b/ray-operator/controllers/ray/batchscheduler/schedulermanager.go index 058e0a9cfd..89c306d180 100644 --- a/ray-operator/controllers/ray/batchscheduler/schedulermanager.go +++ b/ray-operator/controllers/ray/batchscheduler/schedulermanager.go @@ -1,6 +1,7 @@ package batchscheduler import ( + "fmt" "sync" "k8s.io/apimachinery/pkg/runtime" @@ -28,7 +29,11 @@ type SchedulerManager struct { // NewSchedulerManager maintains a specific scheduler plugin based on config func NewSchedulerManager(rayConfigs configapi.Configuration, config *rest.Config) (*SchedulerManager, error) { // init the scheduler factory from config - factory := getSchedulerFactory(rayConfigs) + factory, err := getSchedulerFactory(rayConfigs) + if err != nil { + return nil, err + } + scheduler, err := factory.New(config) if err != nil { return nil, err @@ -44,11 +49,12 @@ func NewSchedulerManager(rayConfigs configapi.Configuration, config *rest.Config return &manager, nil } -func getSchedulerFactory(rayConfigs configapi.Configuration) schedulerinterface.BatchSchedulerFactory { +func getSchedulerFactory(rayConfigs configapi.Configuration) (schedulerinterface.BatchSchedulerFactory, error) { var factory schedulerinterface.BatchSchedulerFactory - // init with the default factory - factory = &schedulerinterface.DefaultBatchSchedulerFactory{} + // when a batch scheduler name is provided + // only support a white list of names, empty value is the default value + // it throws error if an unknown name is provided if len(rayConfigs.BatchScheduler) > 0 { switch rayConfigs.BatchScheduler { case volcano.GetPluginName(): @@ -56,8 +62,12 @@ func getSchedulerFactory(rayConfigs configapi.Configuration) schedulerinterface. case yunikorn.GetPluginName(): factory = &yunikorn.YuniKornSchedulerFactory{} default: - factory = &schedulerinterface.DefaultBatchSchedulerFactory{} + return nil, fmt.Errorf("the scheduler is not supported, name=%s", rayConfigs.BatchScheduler) } + } else { + // empty is the default value, when not set + // use DefaultBatchSchedulerFactory, it's a no-opt factory + factory = &schedulerinterface.DefaultBatchSchedulerFactory{} } // legacy option, if this is enabled, register volcano @@ -66,7 +76,7 @@ func getSchedulerFactory(rayConfigs configapi.Configuration) schedulerinterface. factory = &volcano.VolcanoBatchSchedulerFactory{} } - return factory + return factory, nil } func (batch *SchedulerManager) GetSchedulerForCluster(app *rayv1.RayCluster) (schedulerinterface.BatchScheduler, error) { diff --git a/ray-operator/controllers/ray/batchscheduler/schedulermanager_test.go b/ray-operator/controllers/ray/batchscheduler/schedulermanager_test.go index 59f922986b..1eb18255f1 100644 --- a/ray-operator/controllers/ray/batchscheduler/schedulermanager_test.go +++ b/ray-operator/controllers/ray/batchscheduler/schedulermanager_test.go @@ -4,6 +4,8 @@ import ( "reflect" "testing" + "github.com/stretchr/testify/assert" + "github.com/ray-project/kuberay/ray-operator/apis/config/v1alpha1" schedulerinterface "github.com/ray-project/kuberay/ray-operator/controllers/ray/batchscheduler/interface" "github.com/ray-project/kuberay/ray-operator/controllers/ray/batchscheduler/volcano" @@ -19,16 +21,17 @@ func TestGetSchedulerFactory(t *testing.T) { rayConfigs v1alpha1.Configuration } tests := []struct { - want reflect.Type - name string - args args + want reflect.Type + name string + expectedErrMsg string + args args }{ { - name: "enableBatchScheduler=false, batchScheduler set to default", + name: "enableBatchScheduler=false, batchScheduler=''", args: args{ rayConfigs: v1alpha1.Configuration{ EnableBatchScheduler: false, - BatchScheduler: schedulerinterface.GetDefaultPluginName(), + BatchScheduler: "", }, }, want: reflect.TypeOf(DefaultFactory), @@ -80,6 +83,15 @@ func TestGetSchedulerFactory(t *testing.T) { }, want: reflect.TypeOf(VolcanoFactory), }, + { + name: "enableBatchScheduler not set, batchScheduler set to unknown value", + args: args{ + rayConfigs: v1alpha1.Configuration{ + BatchScheduler: "unknown-scheduler-name", + }, + }, + expectedErrMsg: "the scheduler is not supported, name=unknown-scheduler-name", + }, { // for backwards compatibility, if enableBatchScheduler=true, always use volcano name: "enableBatchScheduler=true, batchScheduler set to yunikorn", @@ -108,7 +120,7 @@ func TestGetSchedulerFactory(t *testing.T) { args: args{ rayConfigs: v1alpha1.Configuration{ EnableBatchScheduler: true, - BatchScheduler: schedulerinterface.GetDefaultPluginName(), + BatchScheduler: "", }, }, want: reflect.TypeOf(VolcanoFactory), @@ -117,7 +129,13 @@ func TestGetSchedulerFactory(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - if got := getSchedulerFactory(tt.args.rayConfigs); reflect.TypeOf(got) != tt.want { + got, err := getSchedulerFactory(tt.args.rayConfigs) + if len(tt.expectedErrMsg) > 0 { + assert.Errorf(t, err, tt.expectedErrMsg) + return + } + + if reflect.TypeOf(got) != tt.want { t.Errorf("getSchedulerFactory() = %v, want %v", got, tt.want) } }) diff --git a/ray-operator/main.go b/ray-operator/main.go index 172183040a..3c24f6d2d1 100644 --- a/ray-operator/main.go +++ b/ray-operator/main.go @@ -91,9 +91,9 @@ func main() { flag.StringVar(&logStdoutEncoder, "log-stdout-encoder", "json", "Encoder to use for logging stdout. Valid values are 'json' and 'console'. Defaults to 'json'") flag.BoolVar(&enableBatchScheduler, "enable-batch-scheduler", false, - "(Deprecated) Enable batch scheduler. Currently is volcano, which supports gang scheduler policy.") - flag.StringVar(&batchScheduler, "batch-scheduler", "default", - "Batch scheduler name, supported values are default, volcano, yunikorn.") + "(Deprecated) Enable batch scheduler. Currently is volcano, which supports gang scheduler policy. Please use --batch-scheduler instead.") + flag.StringVar(&batchScheduler, "batch-scheduler", "", + "Batch scheduler name, supported values are volcano and yunikorn.") flag.StringVar(&configFile, "config", "", "Path to structured config file. Flags are ignored if config file is set.") flag.BoolVar(&useKubernetesProxy, "use-kubernetes-proxy", false, "Use Kubernetes proxy subresource when connecting to the Ray Head node.")