Skip to content

Commit

Permalink
Implement KatibConfig API
Browse files Browse the repository at this point in the history
Signed-off-by: Yuki Iwai <yuki.iwai.tz@gmail.com>
  • Loading branch information
tenzen-y committed Jul 21, 2023
1 parent 067c119 commit 6595c5b
Show file tree
Hide file tree
Showing 32 changed files with 1,646 additions and 717 deletions.
7 changes: 6 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,18 @@ endif
sync-go-mod:
go mod tidy -go $(GO_VERSION)

CONTROLLER_GEN = $(shell pwd)/bin/controller-gen
.PHONY: controller-gen
controller-gen:
@GOBIN=$(shell pwd)/bin GO111MODULE=on go install sigs.k8s.io/controller-tools/cmd/controller-gen@v0.10.0

# Run this if you update any existing controller APIs.
# 1. Generate deepcopy, clientset, listers, informers for the APIs (hack/update-codegen.sh)
# 2. Generate open-api for the APIs (hack/update-openapigen)
# 3. Generate Python SDK for Katib (hack/gen-python-sdk/gen-sdk.sh)
# 4. Generate gRPC manager APIs (pkg/apis/manager/v1beta1/build.sh and pkg/apis/manager/health/build.sh)
# 5. Generate Go mock codes
generate:
generate: controller-gen
ifndef GOPATH
$(error GOPATH not defined, please define GOPATH. Run "go help gopath" to learn more about GOPATH)
endif
Expand Down
84 changes: 43 additions & 41 deletions cmd/katib-controller/v1beta1/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"os"

"github.com/spf13/viper"
"k8s.io/apimachinery/pkg/runtime"
_ "k8s.io/client-go/plugin/pkg/client/auth/gcp"
"sigs.k8s.io/controller-runtime/pkg/client/config"
"sigs.k8s.io/controller-runtime/pkg/healthz"
Expand All @@ -32,38 +33,32 @@ import (
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/manager/signals"

configapi "github.com/kubeflow/katib/pkg/apis/config/v1beta1"
apis "github.com/kubeflow/katib/pkg/apis/controller"
controller "github.com/kubeflow/katib/pkg/controller.v1beta1"
"github.com/kubeflow/katib/pkg/controller.v1beta1"
"github.com/kubeflow/katib/pkg/controller.v1beta1/consts"
trialutil "github.com/kubeflow/katib/pkg/controller.v1beta1/trial/util"
"github.com/kubeflow/katib/pkg/util/v1beta1/katibconfig"
webhook "github.com/kubeflow/katib/pkg/webhook/v1beta1"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
)

var scheme = runtime.NewScheme()

func init() {
utilruntime.Must(apis.AddToScheme(scheme))
utilruntime.Must(configapi.AddToScheme(scheme))
utilruntime.Must(clientgoscheme.AddToScheme(scheme))
}

func main() {
logf.SetLogger(zap.New())
log := logf.Log.WithName("entrypoint")

var experimentSuggestionName string
var metricsAddr string
var healthzAddr string
var webhookPort int
var injectSecurityContext bool
var enableGRPCProbeInSuggestion bool
var trialResources trialutil.GvkListFlag
var enableLeaderElection bool
var leaderElectionID string

flag.StringVar(&experimentSuggestionName, "experiment-suggestion-name",
"default", "The implementation of suggestion interface in experiment controller (default)")
flag.StringVar(&metricsAddr, "metrics-addr", ":8080", "The address the metric endpoint binds to.")
flag.StringVar(&healthzAddr, "healthz-addr", ":18080", "The address the healthz endpoint binds to.")
flag.BoolVar(&injectSecurityContext, "webhook-inject-securitycontext", false, "Inject the securityContext of container[0] in the sidecar")
flag.BoolVar(&enableGRPCProbeInSuggestion, "enable-grpc-probe-in-suggestion", true, "enable grpc probe in suggestions")
flag.Var(&trialResources, "trial-resources", "The list of resources that can be used as trial template, in the form: Kind.version.group (e.g. TFJob.v1.kubeflow.org)")
flag.IntVar(&webhookPort, "webhook-port", 8443, "The port number to be used for admission webhook server.")
// For leader election
flag.BoolVar(&enableLeaderElection, "enable-leader-election", false, "Enable leader election for katib-controller. Enabling this will ensure there is only one active katib-controller.")
flag.StringVar(&leaderElectionID, "leader-election-id", "3fbc96e9.katib.kubeflow.org", "The ID for leader election.")
var katibConfigFile string
flag.StringVar(&katibConfigFile, "katib-config", "",
"The katib-controller will load its initial configuration from this file. "+
"Omit this flag to use the default configuration values. ")

// TODO (andreyvelich): Currently it is not possible to set different webhook service name.
// flag.StringVar(&serviceName, "webhook-service-name", "katib-controller", "The service name which will be used in webhook")
Expand All @@ -72,21 +67,33 @@ func main() {

flag.Parse()

inintConfig, err := katibconfig.GetInitConfigData(scheme, katibConfigFile)
if err != nil {
log.Error(err, "Failed to get KatibConfig")
os.Exit(1)
}

// Set the config in viper.
viper.Set(consts.ConfigExperimentSuggestionName, experimentSuggestionName)
viper.Set(consts.ConfigInjectSecurityContext, injectSecurityContext)
viper.Set(consts.ConfigEnableGRPCProbeInSuggestion, enableGRPCProbeInSuggestion)
viper.Set(consts.ConfigTrialResources, trialResources)
viper.Set(consts.ConfigExperimentSuggestionName, inintConfig.ControllerConfig.ExperimentSuggestionName)
viper.Set(consts.ConfigInjectSecurityContext, inintConfig.ControllerConfig.InjectSecurityContext)
viper.Set(consts.ConfigEnableGRPCProbeInSuggestion, inintConfig.ControllerConfig.EnableGRPCProbeInSuggestion)

trialGVKs, err := katibconfig.TrialResourcesToGVKs(inintConfig.ControllerConfig.TrialResources)
if err != nil {
log.Error(err, "Failed to parse trialResources")
os.Exit(1)
}
viper.Set(consts.ConfigTrialResources, trialGVKs)

log.Info("Config:",
consts.ConfigExperimentSuggestionName,
viper.GetString(consts.ConfigExperimentSuggestionName),
"webhook-port",
webhookPort,
inintConfig.ControllerConfig.WebhookPort,
"metrics-addr",
metricsAddr,
inintConfig.ControllerConfig.MetricsAddr,
"healthz-addr",
healthzAddr,
inintConfig.ControllerConfig.HealthzAddr,
consts.ConfigInjectSecurityContext,
viper.GetBool(consts.ConfigInjectSecurityContext),
consts.ConfigEnableGRPCProbeInSuggestion,
Expand All @@ -104,10 +111,11 @@ func main() {

// Create a new katib controller to provide shared dependencies and start components
mgr, err := manager.New(cfg, manager.Options{
MetricsBindAddress: metricsAddr,
HealthProbeBindAddress: healthzAddr,
LeaderElection: enableLeaderElection,
LeaderElectionID: leaderElectionID,
MetricsBindAddress: inintConfig.ControllerConfig.MetricsAddr,
HealthProbeBindAddress: inintConfig.ControllerConfig.HealthzAddr,
LeaderElection: *inintConfig.ControllerConfig.EnableLeaderElection,
LeaderElectionID: inintConfig.ControllerConfig.LeaderElectionID,
Scheme: scheme,
})
if err != nil {
log.Error(err, "Failed to create the manager")
Expand All @@ -116,12 +124,6 @@ func main() {

log.Info("Registering Components.")

// Setup Scheme for all resources
if err := apis.AddToScheme(mgr.GetScheme()); err != nil {
log.Error(err, "Unable to add APIs to scheme")
os.Exit(1)
}

// Setup all Controllers
log.Info("Setting up controller.")
if err := controller.AddToManager(mgr); err != nil {
Expand All @@ -130,7 +132,7 @@ func main() {
}

log.Info("Setting up webhooks.")
if err := webhook.AddToManager(mgr, webhookPort); err != nil {
if err := webhook.AddToManager(mgr, *inintConfig.ControllerConfig.WebhookPort); err != nil {
log.Error(err, "Unable to register webhooks to the manager")
os.Exit(1)
}
Expand Down
14 changes: 3 additions & 11 deletions docs/developer-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,17 +57,9 @@ make generate

Below is a list of command-line flags accepted by Katib controller:

| Name | Type | Default | Description |
| ------------------------------- | ------------------------- | ----------------------------- | ---------------------------------------------------------------------------------------------------------------------- |
| enable-grpc-probe-in-suggestion | bool | true | Enable grpc probe in suggestions |
| experiment-suggestion-name | string | "default" | The implementation of suggestion interface in experiment controller |
| metrics-addr | string | ":8080" | The address that the metrics endpoint binds to |
| healthz-addr | string | ":18080" | The address that the healthz endpoint binds to |
| trial-resources | []schema.GroupVersionKind | null | The list of resources that can be used as trial template, in the form: Kind.version.group (e.g. TFJob.v1.kubeflow.org) |
| webhook-inject-securitycontext | bool | false | Inject the securityContext of container[0] in the sidecar |
| webhook-port | int | 8443 | The port number to be used for admission webhook server |
| enable-leader-election | bool | false | Enable leader election for katib-controller. Enabling this will ensure there is only one active katib-controller. |
| leader-election-id | string | "3fbc96e9.katib.kubeflow.org" | The ID for leader election. |
| Name | Type | Default | Description |
|--------------|--------|---------|----------------------------------------------------------------------------------------------------------------------------------|
| katib-config | string | "" | The katib-controller will load its initial configuration from this file. Omit this flag to use the default configuration values. |

## DB Manager Flags

Expand Down
6 changes: 3 additions & 3 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ require (
github.com/go-sql-driver/mysql v1.5.0
github.com/golang/mock v1.6.0
github.com/golang/protobuf v1.5.2
github.com/google/go-cmp v0.5.9
github.com/google/go-containerregistry v0.9.0
github.com/google/go-containerregistry/pkg/authn/k8schain v0.0.0-20211222182933-7c19fa370dbd
github.com/grpc-ecosystem/go-grpc-middleware v1.3.0
Expand All @@ -24,12 +25,14 @@ require (
github.com/tidwall/gjson v1.14.1
golang.org/x/net v0.5.0
google.golang.org/grpc v1.53.0
gopkg.in/yaml.v3 v3.0.1
k8s.io/api v0.25.3
k8s.io/apimachinery v0.25.3
k8s.io/client-go v0.25.3
k8s.io/code-generator v0.25.3
k8s.io/klog v1.0.0
k8s.io/kube-openapi v0.0.0-20220803162953-67bda5d908f1
k8s.io/utils v0.0.0-20220728103510-ee6ede2d64ed
sigs.k8s.io/controller-runtime v0.13.0
)

Expand Down Expand Up @@ -69,7 +72,6 @@ require (
github.com/golang-jwt/jwt/v4 v4.2.0 // indirect
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
github.com/google/gnostic v0.5.7-v3refs // indirect
github.com/google/go-cmp v0.5.9 // indirect
github.com/google/gofuzz v1.2.0 // indirect
github.com/google/uuid v1.3.0 // indirect
github.com/hashicorp/hcl v1.0.0 // indirect
Expand Down Expand Up @@ -129,15 +131,13 @@ require (
gopkg.in/ini.v1 v1.63.2 // indirect
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
gotest.tools/v3 v3.1.0 // indirect
k8s.io/apiextensions-apiserver v0.25.0 // indirect
k8s.io/cloud-provider v0.21.0 // indirect
k8s.io/component-base v0.25.0 // indirect
k8s.io/gengo v0.0.0-20211129171323-c02415ce4185 // indirect
k8s.io/klog/v2 v2.70.1 // indirect
k8s.io/legacy-cloud-providers v0.21.0 // indirect
k8s.io/utils v0.0.0-20220728103510-ee6ede2d64ed // indirect
sigs.k8s.io/json v0.0.0-20220713155537-f223a00ba0e2 // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.2.3 // indirect
sigs.k8s.io/yaml v1.3.0 // indirect
Expand Down
7 changes: 6 additions & 1 deletion hack/update-codegen.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ if [[ -z "${GOPATH:-}" ]]; then
fi

# Grab code-generator version from go.mod
CODEGEN_VERSION=$(cd ../../.. && grep 'k8s.io/code-generator' go.mod | awk '{print $2}')
CODEGEN_VERSION=$(cd ../.. && grep 'k8s.io/code-generator' go.mod | awk '{print $2}')
CODEGEN_PKG="$GOPATH/pkg/mod/k8s.io/code-generator@${CODEGEN_VERSION}"

if [[ ! -d "${CODEGEN_PKG}" ]]; then
Expand All @@ -53,3 +53,8 @@ echo "Generating clients for ${GROUP_VERSIONS} ..."
github.com/kubeflow/katib/pkg/apis/controller \
"${GROUP_VERSIONS}" \
--go-header-file "${PROJECT_ROOT}/hack/boilerplate/boilerplate.go.txt"

echo "Generating deepcopy for config.kubeflow.org ..."
"${PROJECT_ROOT}/bin/controller-gen" \
object:headerFile="${PROJECT_ROOT}/hack/boilerplate/boilerplate.go.txt" \
paths="${PROJECT_ROOT}/pkg/apis/config/..."
2 changes: 1 addition & 1 deletion hack/update-openapigen.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ if [[ -z "${GOPATH:-}" ]]; then
fi

# Grab code-generator version from go.mod
CODEGEN_VERSION=$(cd ../../.. && grep 'k8s.io/code-generator' go.mod | awk '{print $2}')
CODEGEN_VERSION=$(cd ../.. && grep 'k8s.io/code-generator' go.mod | awk '{print $2}')
CODEGEN_PKG="${GOPATH}/pkg/mod/k8s.io/code-generator@${CODEGEN_VERSION}"

if [[ ! -d ${CODEGEN_PKG} ]]; then
Expand Down
15 changes: 8 additions & 7 deletions manifests/v1beta1/components/controller/controller.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,7 @@ spec:
image: docker.io/kubeflowkatib/katib-controller
command: ["./katib-controller"]
args:
- "--webhook-port=8443"
- "--trial-resources=Job.v1.batch"
- "--trial-resources=TFJob.v1.kubeflow.org"
- "--trial-resources=PyTorchJob.v1.kubeflow.org"
- "--trial-resources=MPIJob.v1.kubeflow.org"
- "--trial-resources=XGBoostJob.v1.kubeflow.org"
- "--trial-resources=MXJob.v1.kubeflow.org"
- --katib-config=/katib-config.yaml
ports:
- containerPort: 8443
name: webhook
Expand Down Expand Up @@ -60,8 +54,15 @@ spec:
- mountPath: /tmp/cert
name: cert
readOnly: true
- mountPath: /katib-config.yaml
name: katib-config
subPath: katib-config.yaml
readOnly: true
volumes:
- name: cert
secret:
defaultMode: 420
secretName: katib-webhook-cert
- name: katib-config
configMap:
name: katib-config
Loading

0 comments on commit 6595c5b

Please sign in to comment.