diff --git a/CHANGELOG.md b/CHANGELOG.md index d0b9dde..1ef82fd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,9 @@ Notable changes between versions. ## Latest +* Add Slack notifications of node lifecycle events + * Add `-webhook` flag to set the WebhookURL + ## v0.1.0 * Initial port from bash script to Go diff --git a/README.md b/README.md index b03d964..4329970 100644 --- a/README.md +++ b/README.md @@ -85,6 +85,28 @@ systemd: WantedBy=multi-user.target ``` +### Configuration + +Configure via flags. + +| flag | description | default | +|------------|--------------|--------------| +| -platform | Platform to poll for termination notices | none | +| -webhook | Slack Webhook URL | "" | +| -uncordon | Uncordon node on start | true | +| -drain | Drain node on stop | true | +| -delete | Delete node on stop | true | +| -log-level | Logger level | info | +| -version | Show version | NA | +| -help | Show help | NA | + +Other values are set via environment variables. + +| variable | description | default | +|------------|------------------------|-----------| +| KUBECONFIG | Path to Kubeconfig | "" | +| HOSTNAME | Current node name | "" | + ### Spot Termination Notices [AWS](https://aws.amazon.com/blogs/aws/new-ec2-spot-instance-termination-notices/) and [Azure](https://learn.microsoft.com/en-us/azure/virtual-machine-scale-sets/virtual-machine-scale-sets-terminate-notification) provide warnings via instance metadata (2 min) before spot terminations. `scuttle` can monitor platform specific instance metadata endpoints to trigger drain or delete actions before shutdown. diff --git a/cmd/scuttle/main.go b/cmd/scuttle/main.go index 9722344..f08fe17 100644 --- a/cmd/scuttle/main.go +++ b/cmd/scuttle/main.go @@ -28,6 +28,7 @@ var ( func main() { flags := struct { + webhook string platform string uncordon bool drain bool @@ -37,6 +38,7 @@ func main() { help bool }{} + flag.StringVar(&flags.webhook, "webhook", "", "Slack Webhook URL (e.g. https://hooks.slack.com...)") flag.StringVar(&flags.platform, "platform", "none", "Set platform (none, aws, azure) to poll termination notices") flag.BoolVar(&flags.uncordon, "uncordon", true, "Enabling uncordoning node on start") flag.BoolVar(&flags.drain, "drain", true, "Enabling draining node on stop") @@ -85,6 +87,7 @@ func main() { // Termination watcher scuttle, err := sctl.New(&sctl.Config{ Logger: log, + Webhook: flags.webhook, Platform: flags.platform, ShouldUncordon: flags.uncordon, ShouldDrain: flags.drain, diff --git a/go.mod b/go.mod index 5cec9a6..29b5e98 100644 --- a/go.mod +++ b/go.mod @@ -4,6 +4,7 @@ go 1.19 require ( github.com/sirupsen/logrus v1.9.0 + github.com/slack-go/slack v0.11.4 k8s.io/api v0.25.4 k8s.io/apimachinery v0.25.4 k8s.io/client-go v0.25.4 @@ -22,6 +23,7 @@ require ( github.com/golang/protobuf v1.5.2 // indirect github.com/google/gnostic v0.5.7-v3refs // indirect github.com/google/gofuzz v1.1.0 // indirect + github.com/gorilla/websocket v1.4.2 // indirect github.com/imdario/mergo v0.3.6 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect diff --git a/go.sum b/go.sum index e98be93..febc58a 100644 --- a/go.sum +++ b/go.sum @@ -69,6 +69,8 @@ github.com/go-openapi/jsonreference v0.19.5/go.mod h1:RdybgQwPxbL4UEjuAruzK1x3nE github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk= github.com/go-openapi/swag v0.19.14 h1:gm3vOOXfiuw5i9p5N9xJvfjvuofpyvLA9Wr6QfK5Fng= github.com/go-openapi/swag v0.19.14/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ= +github.com/go-test/deep v1.0.4 h1:u2CU3YKy9I2pmu9pX0eq50wCgjfGIt539SqR7FbHiho= +github.com/go-test/deep v1.0.4/go.mod h1:wGDj63lr65AM2AQyKZd/NYHGb0R+1RLqB8NKt3aSFNA= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= @@ -110,6 +112,7 @@ github.com/google/go-cmp v0.4.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.7/go.mod h1:n+brtR0CgQNWTVd5ZUFpTBC8YFBDLK/h/bpaJ8/DtOE= github.com/google/go-cmp v0.5.8 h1:e6P7q2lk1O+qJJb4BtCQXlK8vWEO8V1ZeuEdJNOqZyg= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.1.0 h1:Hsa8mG0dQ46ij8Sl2AYJDUv1oA9/d6Vk+3LG99Oe02g= @@ -126,6 +129,8 @@ github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99/go.mod h1:ZgVRPoUq/hf github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= +github.com/gorilla/websocket v1.4.2 h1:+/TMaTYc4QFitKJxsQ7Yye35DkWvkdLcvGKqM+x0Ufc= +github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= @@ -166,11 +171,14 @@ github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1: github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/sirupsen/logrus v1.9.0 h1:trlNQbNUG3OdDrDil03MCb1H2o9nJ1x4/5LYw7byDE0= github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= +github.com/slack-go/slack v0.11.4 h1:ojSa7KlPm3PqY2AomX4VTxEsK5eci5JaxCjlzGV5zoM= +github.com/slack-go/slack v0.11.4/go.mod h1:hlGi5oXA+Gt+yWTPP0plCdRKmjsDxecdHxYQdlMQKOw= github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stoewer/go-strcase v1.2.0/go.mod h1:IBiWB2sKIp3wVVQ3Y035++gc+knqhUQag1KpM8ahLw8= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= diff --git a/internal/scuttle.go b/internal/scuttle.go index 4939157..d23ccaa 100644 --- a/internal/scuttle.go +++ b/internal/scuttle.go @@ -28,6 +28,7 @@ const ( // Config configures a Scuttle type Config struct { Logger *logrus.Logger + Webhook string Platform string ShouldUncordon bool ShouldDrain bool @@ -94,10 +95,12 @@ func (w *Scuttle) Run(ctx context.Context) error { select { case <-ctx.Done(): w.log.WithFields(fields).Info("scuttle: stopping...") + w.notifySlack(Shutdown, w.hostname) return w.stop(stopCtx) case <-ticker.C: w.log.WithFields(fields).Debug("scuttle: tick...") if w.pendingShutdown(ctx) { + w.notifySlack(TermNotice, w.hostname) return w.stop(stopCtx) } } @@ -112,6 +115,7 @@ func (w *Scuttle) start(ctx context.Context) error { if w.config.ShouldUncordon { w.log.WithFields(fields).Info("scuttle: uncordon node") + w.notifySlack(Uncordon, w.hostname) drainer := drain.New(&drain.Config{ Client: w.kubeClient, Logger: w.log, @@ -132,6 +136,7 @@ func (w *Scuttle) stop(ctx context.Context) error { // optionally drain to evict pods on the node if w.config.ShouldDrain { w.log.WithFields(fields).Info("scuttle: draining node") + w.notifySlack(Drain, w.hostname) drainer := drain.New(&drain.Config{ Client: w.kubeClient, Logger: w.log, @@ -148,6 +153,7 @@ func (w *Scuttle) stop(ctx context.Context) error { // optionally delete the node from the cluster if w.config.ShouldDelete { w.log.WithFields(fields).Info("scuttle: deleting node") + w.notifySlack(Delete, w.hostname) err := w.kubeClient.CoreV1().Nodes().Delete(ctx, w.hostname, v1.DeleteOptions{}) // best-effort, we need to continue even on error if err != nil { diff --git a/internal/slack.go b/internal/slack.go new file mode 100644 index 0000000..dae8b1b --- /dev/null +++ b/internal/slack.go @@ -0,0 +1,45 @@ +// Copyright (C) 2022 Poseidon Labs +// Copyright (C) 2022 Dalton Hubble +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. +package scuttle + +import ( + "fmt" + + "github.com/slack-go/slack" +) + +type Notification string + +const ( + Uncordon Notification = "uncordon" + TermNotice Notification = "term-notice" + Shutdown Notification = "shutdown" + Drain Notification = "drain" + Delete Notification = "delete" +) + +func (w *Scuttle) notifySlack(action Notification, node string) { + msg := &slack.WebhookMessage{} + + switch action { + case Uncordon: + msg.Text = fmt.Sprintf(":white_check_mark: Uncordon node `%s`", node) + case TermNotice: + msg.Text = fmt.Sprintf(":stopwatch: Detected spot termination notice for `%s`", node) + case Shutdown: + msg.Text = fmt.Sprintf(":octagonal_sign: Detected shutdown of `%s`", node) + case Drain: + msg.Text = fmt.Sprintf(":droplet: Draining node `%s`", node) + case Delete: + msg.Text = fmt.Sprintf(":headstone: Deleting node `%s`", node) + } + + err := slack.PostWebhook(w.config.Webhook, msg) + if err != nil { + w.log.Errorf("error notifying Slack webhook url: %v", err) + } +}