Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Slack notifications of node shutdown events #6

Merged
merged 1 commit into from
Nov 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ Notable changes between versions.

## Latest

* Add Slack notifications of node lifecycle events
* Add `-webhook` flag to set the WebhookURL

## v0.1.0

* Initial port from bash script to Go
Expand Down
22 changes: 22 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,28 @@ systemd:
WantedBy=multi-user.target
```

### Configuration

Configure via flags.

| flag | description | default |
|------------|--------------|--------------|
| -platform | Platform to poll for termination notices | none |
| -webhook | Slack Webhook URL | "" |
| -uncordon | Uncordon node on start | true |
| -drain | Drain node on stop | true |
| -delete | Delete node on stop | true |
| -log-level | Logger level | info |
| -version | Show version | NA |
| -help | Show help | NA |

Other values are set via environment variables.

| variable | description | default |
|------------|------------------------|-----------|
| KUBECONFIG | Path to Kubeconfig | "" |
| HOSTNAME | Current node name | "" |

### Spot Termination Notices

[AWS](https://aws.amazon.com/blogs/aws/new-ec2-spot-instance-termination-notices/) and [Azure](https://learn.microsoft.com/en-us/azure/virtual-machine-scale-sets/virtual-machine-scale-sets-terminate-notification) provide warnings via instance metadata (2 min) before spot terminations. `scuttle` can monitor platform specific instance metadata endpoints to trigger drain or delete actions before shutdown.
Expand Down
3 changes: 3 additions & 0 deletions cmd/scuttle/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ var (

func main() {
flags := struct {
webhook string
platform string
uncordon bool
drain bool
Expand All @@ -37,6 +38,7 @@ func main() {
help bool
}{}

flag.StringVar(&flags.webhook, "webhook", "", "Slack Webhook URL (e.g. https://hooks.slack.com...)")
flag.StringVar(&flags.platform, "platform", "none", "Set platform (none, aws, azure) to poll termination notices")
flag.BoolVar(&flags.uncordon, "uncordon", true, "Enabling uncordoning node on start")
flag.BoolVar(&flags.drain, "drain", true, "Enabling draining node on stop")
Expand Down Expand Up @@ -85,6 +87,7 @@ func main() {
// Termination watcher
scuttle, err := sctl.New(&sctl.Config{
Logger: log,
Webhook: flags.webhook,
Platform: flags.platform,
ShouldUncordon: flags.uncordon,
ShouldDrain: flags.drain,
Expand Down
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ go 1.19

require (
github.com/sirupsen/logrus v1.9.0
github.com/slack-go/slack v0.11.4
k8s.io/api v0.25.4
k8s.io/apimachinery v0.25.4
k8s.io/client-go v0.25.4
Expand All @@ -22,6 +23,7 @@ require (
github.com/golang/protobuf v1.5.2 // indirect
github.com/google/gnostic v0.5.7-v3refs // indirect
github.com/google/gofuzz v1.1.0 // indirect
github.com/gorilla/websocket v1.4.2 // indirect
github.com/imdario/mergo v0.3.6 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
Expand Down
8 changes: 8 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ github.com/go-openapi/jsonreference v0.19.5/go.mod h1:RdybgQwPxbL4UEjuAruzK1x3nE
github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk=
github.com/go-openapi/swag v0.19.14 h1:gm3vOOXfiuw5i9p5N9xJvfjvuofpyvLA9Wr6QfK5Fng=
github.com/go-openapi/swag v0.19.14/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ=
github.com/go-test/deep v1.0.4 h1:u2CU3YKy9I2pmu9pX0eq50wCgjfGIt539SqR7FbHiho=
github.com/go-test/deep v1.0.4/go.mod h1:wGDj63lr65AM2AQyKZd/NYHGb0R+1RLqB8NKt3aSFNA=
github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
Expand Down Expand Up @@ -110,6 +112,7 @@ github.com/google/go-cmp v0.4.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/
github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.7/go.mod h1:n+brtR0CgQNWTVd5ZUFpTBC8YFBDLK/h/bpaJ8/DtOE=
github.com/google/go-cmp v0.5.8 h1:e6P7q2lk1O+qJJb4BtCQXlK8vWEO8V1ZeuEdJNOqZyg=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/gofuzz v1.1.0 h1:Hsa8mG0dQ46ij8Sl2AYJDUv1oA9/d6Vk+3LG99Oe02g=
Expand All @@ -126,6 +129,8 @@ github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99/go.mod h1:ZgVRPoUq/hf
github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg=
github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk=
github.com/gorilla/websocket v1.4.2 h1:+/TMaTYc4QFitKJxsQ7Yye35DkWvkdLcvGKqM+x0Ufc=
github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
Expand Down Expand Up @@ -166,11 +171,14 @@ github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
github.com/sirupsen/logrus v1.9.0 h1:trlNQbNUG3OdDrDil03MCb1H2o9nJ1x4/5LYw7byDE0=
github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/slack-go/slack v0.11.4 h1:ojSa7KlPm3PqY2AomX4VTxEsK5eci5JaxCjlzGV5zoM=
github.com/slack-go/slack v0.11.4/go.mod h1:hlGi5oXA+Gt+yWTPP0plCdRKmjsDxecdHxYQdlMQKOw=
github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk=
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/stoewer/go-strcase v1.2.0/go.mod h1:IBiWB2sKIp3wVVQ3Y035++gc+knqhUQag1KpM8ahLw8=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
Expand Down
6 changes: 6 additions & 0 deletions internal/scuttle.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ const (
// Config configures a Scuttle
type Config struct {
Logger *logrus.Logger
Webhook string
Platform string
ShouldUncordon bool
ShouldDrain bool
Expand Down Expand Up @@ -94,10 +95,12 @@ func (w *Scuttle) Run(ctx context.Context) error {
select {
case <-ctx.Done():
w.log.WithFields(fields).Info("scuttle: stopping...")
w.notifySlack(Shutdown, w.hostname)
return w.stop(stopCtx)
case <-ticker.C:
w.log.WithFields(fields).Debug("scuttle: tick...")
if w.pendingShutdown(ctx) {
w.notifySlack(TermNotice, w.hostname)
return w.stop(stopCtx)
}
}
Expand All @@ -112,6 +115,7 @@ func (w *Scuttle) start(ctx context.Context) error {

if w.config.ShouldUncordon {
w.log.WithFields(fields).Info("scuttle: uncordon node")
w.notifySlack(Uncordon, w.hostname)
drainer := drain.New(&drain.Config{
Client: w.kubeClient,
Logger: w.log,
Expand All @@ -132,6 +136,7 @@ func (w *Scuttle) stop(ctx context.Context) error {
// optionally drain to evict pods on the node
if w.config.ShouldDrain {
w.log.WithFields(fields).Info("scuttle: draining node")
w.notifySlack(Drain, w.hostname)
drainer := drain.New(&drain.Config{
Client: w.kubeClient,
Logger: w.log,
Expand All @@ -148,6 +153,7 @@ func (w *Scuttle) stop(ctx context.Context) error {
// optionally delete the node from the cluster
if w.config.ShouldDelete {
w.log.WithFields(fields).Info("scuttle: deleting node")
w.notifySlack(Delete, w.hostname)
err := w.kubeClient.CoreV1().Nodes().Delete(ctx, w.hostname, v1.DeleteOptions{})
// best-effort, we need to continue even on error
if err != nil {
Expand Down
45 changes: 45 additions & 0 deletions internal/slack.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
// Copyright (C) 2022 Poseidon Labs
// Copyright (C) 2022 Dalton Hubble
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
package scuttle

import (
"fmt"

"github.com/slack-go/slack"
)

type Notification string

const (
Uncordon Notification = "uncordon"
TermNotice Notification = "term-notice"
Shutdown Notification = "shutdown"
Drain Notification = "drain"
Delete Notification = "delete"
)

func (w *Scuttle) notifySlack(action Notification, node string) {
msg := &slack.WebhookMessage{}

switch action {
case Uncordon:
msg.Text = fmt.Sprintf(":white_check_mark: Uncordon node `%s`", node)
case TermNotice:
msg.Text = fmt.Sprintf(":stopwatch: Detected spot termination notice for `%s`", node)
case Shutdown:
msg.Text = fmt.Sprintf(":octagonal_sign: Detected shutdown of `%s`", node)
case Drain:
msg.Text = fmt.Sprintf(":droplet: Draining node `%s`", node)
case Delete:
msg.Text = fmt.Sprintf(":headstone: Deleting node `%s`", node)
}

err := slack.PostWebhook(w.config.Webhook, msg)
if err != nil {
w.log.Errorf("error notifying Slack webhook url: %v", err)
}
}