diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..4ca75d5 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,9 @@ +# More info: https://docs.docker.com/engine/reference/builder/#dockerignore-file +# Ignore build and test binaries. +bin/ +.github/ +cluster/ +examples/ +hack/ +README.md +LICENSE \ No newline at end of file diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml new file mode 100644 index 0000000..6a88c7d --- /dev/null +++ b/.github/workflows/publish.yaml @@ -0,0 +1,101 @@ +name: ci + +on: + push: + branches: + - "main" + tags: + - "v*" + +env: + REGISTRY: ghcr.io + REGISTRY_IMAGE: ghcr.io/${{ github.repository }} + +jobs: + build: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + strategy: + fail-fast: false + matrix: + platform: + - linux/amd64 + - linux/arm64 + steps: + - name: Prepare + run: | + platform=${{ matrix.platform }} + echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV + - name: Checkout repository + uses: actions/checkout@v4 + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY_IMAGE }} + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: Log in to the Github Container Registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Build and push by digest + id: build + uses: docker/build-push-action@v5 + with: + context: . + file: cmd/crik/Dockerfile + platforms: ${{ matrix.platform }} + labels: ${{ steps.meta.outputs.labels }} + outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true + - name: Export digest + run: | + mkdir -p /tmp/digests + digest="${{ steps.build.outputs.digest }}" + touch "/tmp/digests/${digest#sha256:}" + - name: Upload digest + uses: actions/upload-artifact@v4 + with: + name: digests-${{ env.PLATFORM_PAIR }} + path: /tmp/digests/* + if-no-files-found: error + retention-days: 1 + + merge: + runs-on: ubuntu-latest + needs: + - build + steps: + - name: Download digests + uses: actions/download-artifact@v4 + with: + path: /tmp/digests + pattern: digests-* + merge-multiple: true + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY_IMAGE }} + - name: Log in to the Github Container Registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Create manifest list and push + working-directory: /tmp/digests + run: | + docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \ + $(printf '${{ env.REGISTRY_IMAGE }}@sha256:%s ' *) + - name: Inspect image + run: | + docker buildx imagetools inspect ${{ env.REGISTRY_IMAGE }}:${{ steps.meta.outputs.version }} \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7a7feec --- /dev/null +++ b/.gitignore @@ -0,0 +1,28 @@ + +# Binaries for programs and plugins +*.exe +*.exe~ +*.dll +*.so +*.dylib +bin/* +Dockerfile.cross + +# Test binary, built with `go test -c` +*.test + +# Output of the go coverage tool, specifically when used with LiteIDE +*.out + +# Go workspace file +go.work + +# Kubernetes Generated files - skip generated files, except for vendored files +!vendor/**/zz_generated.* + +# editor and IDE paraphernalia +.idea +.vscode +*.swp +*.swo +*~ diff --git a/.golangci.yml b/.golangci.yml new file mode 100644 index 0000000..aed8644 --- /dev/null +++ b/.golangci.yml @@ -0,0 +1,40 @@ +run: + deadline: 5m + allow-parallel-runners: true + +issues: + # don't skip warning about doc comments + # don't exclude the default set of lint + exclude-use-default: false + # restore some of the defaults + # (fill in the rest as needed) + exclude-rules: + - path: "api/*" + linters: + - lll + - path: "internal/*" + linters: + - dupl + - lll +linters: + disable-all: true + enable: + - dupl + - errcheck + - exportloopref + - goconst + - gocyclo + - gofmt + - goimports + - gosimple + - govet + - ineffassign + - lll + - misspell + - nakedret + - prealloc + - staticcheck + - typecheck + - unconvert + - unparam + - unused diff --git a/README.md b/README.md index 5f8f83c..5a70354 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,153 @@ -# crik -Checkpoint and Restore in Kubernetes +# Checkpoint and Restore in Kubernetes - crik + +`crik` is a project that aims to provide checkpoint and restore functionality for Kubernetes pods mainly targeted for +node shutdown and restart scenarios. Under the hood, it utilizes [`criu`](https://github.com/checkpoint-restore/criu) to +checkpoint and restore process trees. + +It is a work in progress and is not ready for production use. + +`crik` has two componenets: +- `crik` - a command wrapper that executes given command and checkpoints it when SIGTERM is received and restores from + checkpoint when image directory contains a checkpoint. +- `manager` - a kubernetes controller that watches `Node` objects and updates its internal map of states so that `crik` + can check whether it should checkpoint or restore depending on its node's state. + +## Quick Start + +The only pre-requisite is to have a Kubernetes cluster running. You can use `kind` to create a local cluster. + +```bash +kind create cluster +``` + +Then, you can deploy the simple-loop example where a counter increases every second and you can delete the pod and see +that it continues from where it left off in the new pod. + +```bash +kubectl apply -f examples/simple-loop.yaml +``` + +Watch logs: + +```bash +kubectl logs -f simple-loop-0 +``` + +In another terminal, delete the pod: + +```bash +kubectl delete pod simple-loop-0 +``` + +Now, a new pod is created. See that it continues from where it left off: + +```bash +kubectl logs -f simple-loop-0 +``` + +## Usage + +The application you want to checkpoint and restore should be run with `crik` command, like the following: + +```bash +crik run -- app-binary +``` + +The following is an example `Dockerfile` for your application that installs `crik` and runs your application. It assumes +your application is `entrypoint.sh`. +```Dockerfile +FROM ubuntu:22.04 + +RUN apt-get update && apt-get install --no-install-recommends --yes gnupg curl ca-certificates + +# crik requires criu to be available. +RUN curl "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x4E2A48715C45AEEC077B48169B29EEC9246B6CE2" | gpg --dearmor > /usr/share/keyrings/criu-ppa.gpg \ + && echo "deb [signed-by=/usr/share/keyrings/criu-ppa.gpg] https://ppa.launchpadcontent.net/criu/ppa/ubuntu jammy main" > /etc/apt/sources.list.d/criu.list \ + && apt-get update \ + && apt-get install --no-install-recommends --yes criu iptables + +# Install crik \ +COPY --from=ghcr.io/qawolf/crik:v0.1.0 /usr/local/bin/crik /usr/local/bin/crik + +# Copy your application +COPY entrypoint.sh /entrypoint.sh + +# Run your application with crik +ENTRYPOINT ["crik", "run", "--", "/entrypoint.sh"] +``` + +### Configuration + +Not all apps can be checkpointed and restored and for many of them, `criu` may need additional configurations. `crik` +provides a high level configuration interface that you can use to configure `crik` for your application. The following +is the minimum configuration you need to provide for your application and by default `crik` looks for `config.yaml` in +`/etc/crik` directory. + +```yaml +kind: ConfigMap +metadata: + name: crik-simple-loop +data: + config.yaml: |- + imageDir: /etc/checkpoint +``` + +Configuration options: +- `imageDir` - the directory where `crik` will store the checkpoint images. It needs to be available in the same path + in the new `Pod` as well. +- `additionalPaths` - additional paths that `crik` will include in the checkpoint and copy back in the new `Pod`. Populate + this list if you get `file not found` errors in the restore logs. The paths are relative to root `/` and can be + directories or files. +- `inotifyIncompatiblePaths` - paths that `crik` will delete before taking the checkpoint. Populate this list if you get + `fsnotify: Handle 0x278:0x2ffb5b cannot be opened` errors in the restore logs. You need to find the inode of the + file by converting `0x2ffb5b` to an integer, and then find the path of the file by running `find / -inum ` and + add the path to this list. See [this comment](https://github.com/checkpoint-restore/criu/issues/1187#issuecomment-1975557296) for more details. + +### Node State Controller + +You can optionally configure `crik` to take checkpoint only if the node it's running on is going to be shut down. This is +achieved by deploying a Kubernetes controller that watches `Node` events and updates its internal map of states so that +`crik` can check whether it should checkpoint or restore depending on its node's state. This may include direct calls +to the cloud provider's API to check the node's state in the future. + +Deploy the controller: + +```bash +helm install crik charts/crik +``` + +Make sure to include the URL of the server in `crik`'s configuration mounted to your `Pod`. + +```yaml +# Assuming the chart is deployed to default namespace. +kind: ConfigMap +metadata: + name: crik-simple-loop +data: + config.yaml: |- + imageDir: /etc/checkpoint + nodeStateServerURL: http://crik-node-state-server.default.svc.cluster.local:9376 +``` + +`crik` will hit the `/node-state` endpoint of the server to get the state of the node it's running on when it receives +SIGTERM and take checkpoint only if it returns `shutting-down` as the node's state. However, it needs to provide the +node name to the server so make sure to add the following environment variable to your container spec in your `Pod`: +```yaml +env: + - name: KUBERNETES_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName +``` + +## Developing + +Build `crik`: + +```bash +docker build -t crik:v1 -f cmd/crik/Dockerfile . +``` + +## License + +This project is licensed under the Apache License, Version 2.0 - see the [LICENSE](LICENSE) file for details. \ No newline at end of file diff --git a/cluster/charts/crik/templates/manager-deployment.yaml b/cluster/charts/crik/templates/manager-deployment.yaml new file mode 100644 index 0000000..d6877e3 --- /dev/null +++ b/cluster/charts/crik/templates/manager-deployment.yaml @@ -0,0 +1,86 @@ +apiVersion: v1 +kind: Service +metadata: + name: crik-node-state-server +spec: + selector: + app.kubernetes.io/name: crik-node-state-server + app.kubernetes.io/part-of: crik + ports: + - protocol: TCP + port: 9376 + targetPort: 9376 +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: crik-node-state-server +spec: + selector: + matchLabels: + app.kubernetes.io/name: crik-node-state-server + app.kubernetes.io/part-of: crik + replicas: 3 + template: + metadata: + annotations: + kubectl.kubernetes.io/default-container: main + labels: + app.kubernetes.io/name: crik-node-state-server + app.kubernetes.io/part-of: crik + spec: + serviceAccountName: crik-node-state-server + securityContext: + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault + containers: + - name: main + image: {{ .Values.nodeStateServer.image.repository }}:{{ .Values.nodeStateServer.image.tag }} + command: + - /manager + args: + - --metrics-port=8080 + - --health-probes-port=8081 + - --port=9376 + {{- if .Values.playgroundController.debug }} + - --debug + {{- end }} + ports: + # Exposes /nodes + - name: main + containerPort: 9376 + protocol: TCP + # Exposes only /metrics + - name: metrics + containerPort: 8080 + protocol: TCP + # Exposes /healthz and /readyz + - name: probes + containerPort: 8081 + protocol: TCP + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - "ALL" + livenessProbe: + httpGet: + path: /healthz + port: 8081 + initialDelaySeconds: 15 + periodSeconds: 20 + readinessProbe: + httpGet: + path: /readyz + port: 8081 + initialDelaySeconds: 5 + periodSeconds: 10 + resources: + limits: + cpu: 500m + memory: 128Mi + requests: + cpu: 10m + memory: 64Mi + terminationGracePeriodSeconds: 10 diff --git a/cluster/charts/crik/templates/manager-permissions.yaml b/cluster/charts/crik/templates/manager-permissions.yaml new file mode 100644 index 0000000..0aa4ce3 --- /dev/null +++ b/cluster/charts/crik/templates/manager-permissions.yaml @@ -0,0 +1,40 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: crik-node-state-server + labels: + app.kubernetes.io/name: crik-node-state-server + app.kubernetes.io/part-of: crik +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: crik-node-state-server + labels: + app.kubernetes.io/name: crik-node-state-server + app.kubernetes.io/part-of: crik +rules: + # Nodes + - apiGroups: + - "" + resources: + - nodes + verbs: + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: crik-node-state-server + labels: + app.kubernetes.io/name: crik-node-state-server + app.kubernetes.io/part-of: crik +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: crik-node-state-server +subjects: + - kind: ServiceAccount + name: crik-node-state-server diff --git a/cluster/charts/crik/values.yaml b/cluster/charts/crik/values.yaml new file mode 100644 index 0000000..1456621 --- /dev/null +++ b/cluster/charts/crik/values.yaml @@ -0,0 +1,5 @@ +nodeStateServer: + debug: false + image: + repository: ghcr.io/qawolf/crik-node-state-server + tag: v0.1.0 diff --git a/cmd/crik/Dockerfile b/cmd/crik/Dockerfile new file mode 100644 index 0000000..58ce6fc --- /dev/null +++ b/cmd/crik/Dockerfile @@ -0,0 +1,37 @@ +FROM golang:1.22 as build +ARG TARGETOS +ARG TARGETARCH + +WORKDIR /build + +COPY go.mod go.mod +COPY go.sum go.sum +RUN go mod download + +COPY cmd cmd +COPY internal internal + +RUN CGO_ENABLED=0 GOOS=${TARGETOS} GOARCH=${TARGETARCH} go build -o crik cmd/crik/main.go && \ + chmod +x crik + + +FROM ubuntu:22.04 + +RUN apt-get update && apt-get install --no-install-recommends --yes gnupg curl ca-certificates + +RUN curl "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x4E2A48715C45AEEC077B48169B29EEC9246B6CE2" | gpg --dearmor > /usr/share/keyrings/criu-ppa.gpg \ + && echo "deb [signed-by=/usr/share/keyrings/criu-ppa.gpg] https://ppa.launchpadcontent.net/criu/ppa/ubuntu jammy main" > /etc/apt/sources.list.d/criu.list \ + && apt-get update \ + && apt-get install --no-install-recommends --yes criu iptables + +# The PR https://github.com/checkpoint-restore/criu/pull/2360 is not merged yet, so we use criu from the docker image +# built from the PR. This is necessary if you get sched policy error during restore, which is the case with webkit-based +# browsers. + +#RUN apt-get update \ +# && apt install --no-install-recommends --yes libprotobuf-dev libprotobuf-c-dev protobuf-c-compiler protobuf-compiler python3-protobuf iptables nftables iproute2 libnftables-dev libcap-dev libnl-3-dev libnet-dev libaio-dev +#COPY --from=docker.io/muvaf/criu-x86_64:rst0git-6673a3b /criu/criu/criu /usr/sbin/criu + +COPY --from=build /build/crik /usr/local/bin/crik + +ENTRYPOINT ["crik", "run", "--"] diff --git a/cmd/crik/main.go b/cmd/crik/main.go new file mode 100644 index 0000000..e3be481 --- /dev/null +++ b/cmd/crik/main.go @@ -0,0 +1,156 @@ +//go:build linux +// +build linux + +/* +Copyright 2024 QA Wolf Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package main + +import ( + "encoding/json" + "fmt" + "net/http" + "os" + "os/exec" + "os/signal" + "strings" + "syscall" + + "github.com/alecthomas/kong" + "github.com/checkpoint-restore/go-criu/v7" + + "github.com/qawolf/crik/internal/controller/node" + cexec "github.com/qawolf/crik/internal/exec" +) + +var signalChan = make(chan os.Signal, 1) + +var cli struct { + Debug bool `help:"Enable debug mode."` + + Run Run `cmd:"" help:"Run given command wrapped by crik."` +} + +func main() { + ctx := kong.Parse(&cli) + if err := ctx.Run(); err != nil { + fmt.Printf("failed to run the command: %s", err.Error()) + os.Exit(1) + } +} + +type Run struct { + Args []string `arg:"" optional:"" passthrough:"" name:"command" help:"Command and its arguments to run. Required if --image-dir is not given or empty."` + + ConfigPath string `type:"path" default:"/etc/crik/config.yaml" help:"Path to the configuration file."` +} + +func (r *Run) Run() error { + cfg, err := cexec.ReadConfiguration(r.ConfigPath) + if err != nil && !os.IsNotExist(err) { + return fmt.Errorf("failed to read configuration: %w", err) + } + willRestore, err := shouldRestore(cfg) + if err != nil { + return fmt.Errorf("failed to check if restore is needed: %w", err) + } + if willRestore { + fmt.Printf("A checkpoint has been found in %s. Restoring.\n", cfg.ImageDir) + if err := cexec.RestoreWithCmd(cfg.ImageDir); err != nil { + return fmt.Errorf("failed to restore: %w", err) + } + return nil + } + if len(r.Args) == 0 { + return fmt.Errorf("command is required when there is no checkpoint to restore, i.e. --image-dir is not given or empty") + } + // Make sure the PID is a high number so that it's not taken up during restore. + //lastPidPath := "/proc/sys/kernel/ns_last_pid" + //if err := os.WriteFile(lastPidPath, []byte("9000"), 0644); err != nil { + // return fmt.Errorf("failed to write to %s: %w", lastPidPath, err) + //} + check := exec.Command("criu", "check", "--all") + check.Stderr = os.Stderr + check.Stdout = os.Stdout + _ = check.Run() + + cmd := exec.Command(r.Args[0], r.Args[1:]...) + cmd.SysProcAttr = &syscall.SysProcAttr{ + Setsid: true, + Unshareflags: syscall.CLONE_NEWIPC, + } + cmd.Stdin = nil + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + if err := cmd.Start(); err != nil { + return fmt.Errorf("failed to start command: %w", err) + } + fmt.Printf("Command started with PID %d\n", cmd.Process.Pid) + if cfg.ImageDir != "" { + fmt.Printf("Setting up SIGTERM handler to take checkpoint in %s\n", cfg.ImageDir) + signal.Notify(signalChan, syscall.SIGTERM) + sig := <-signalChan + switch sig { + case syscall.SIGTERM: + fmt.Println("Received SIGTERM.") + // Take checkpoint only if the node is in shutting down state or the node state server is not given. + if cfg.NodeStateServerURL != "" { + nodeName := os.Getenv("KUBERNETES_NODE_NAME") + resp, err := http.Get(fmt.Sprintf("%s/nodes/%s", cfg.NodeStateServerURL, nodeName)) + if err != nil { + return fmt.Errorf("failed to get node state: %w", err) + } + defer resp.Body.Close() + var response node.Node + if err := json.NewDecoder(resp.Body).Decode(&response); err != nil { + return fmt.Errorf("failed to decode node state: %w", err) + } + if response.State != node.NodeStateShuttingDown { + fmt.Println("Node is not in shutting down state. Not taking checkpoint.") + if err := cmd.Process.Signal(syscall.SIGTERM); err != nil { + return fmt.Errorf("failed to send SIGTERM to the process: %w", err) + } + return cmd.Wait() + } + } + duration, err := cexec.TakeCheckpoint(criu.MakeCriu(), cmd.Process.Pid, cfg) + if err != nil { + return fmt.Errorf("failed to take checkpoint: %w", err) + } + fmt.Printf("Checkpoint taken in %s\n", duration) + } + } + return cmd.Wait() +} + +func shouldRestore(cfg cexec.Configuration) (bool, error) { + if cfg.ImageDir == "" { + return false, nil + } + entries, err := os.ReadDir(cfg.ImageDir) + if os.IsNotExist(err) { + return false, nil + } + if err != nil { + return false, err + } + for _, entry := range entries { + if strings.HasSuffix(entry.Name(), ".img") { + return true, nil + } + } + return false, nil +} diff --git a/cmd/manager/Dockerfile b/cmd/manager/Dockerfile new file mode 100644 index 0000000..3125939 --- /dev/null +++ b/cmd/manager/Dockerfile @@ -0,0 +1,22 @@ +FROM golang:1.22 as builder +ARG TARGETOS +ARG TARGETARCH + +WORKDIR /workspace + +COPY go.mod go.mod +COPY go.sum go.sum +RUN go mod download + +COPY cmd/ cmd/ +COPY internal/ internal/ + +RUN CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} go build -a -o manager cmd/manager/main.go + +FROM gcr.io/distroless/static:nonroot +WORKDIR / +COPY --from=builder /workspace/manager . + +USER 65532:65532 + +ENTRYPOINT ["/manager"] diff --git a/cmd/manager/main.go b/cmd/manager/main.go new file mode 100644 index 0000000..ea7a5a7 --- /dev/null +++ b/cmd/manager/main.go @@ -0,0 +1,118 @@ +/* +Copyright 2024 QA Wolf Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package main + +import ( + "flag" + "net/http" + "os" + "time" + + // Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.) + // to ensure that exec-entrypoint and run can make use of them. + _ "k8s.io/client-go/plugin/pkg/client/auth" + + "github.com/crossplane/crossplane-runtime/pkg/logging" + "github.com/go-logr/logr" + "go.uber.org/zap/zapcore" + "k8s.io/apimachinery/pkg/runtime" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/healthz" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" + + "github.com/qawolf/crik/internal/controller/node" +) + +var ( + scheme = runtime.NewScheme() + setupLog = ctrl.Log.WithName("setup") +) + +func init() { + utilruntime.Must(clientgoscheme.AddToScheme(scheme)) +} + +func main() { + var metricsPort string + var healthProbesPort string + var serverPort string + var debug bool + flag.StringVar(&metricsPort, "metrics-port", "8080", "The port used by the metrics server.") + flag.StringVar(&healthProbesPort, "health-probes-port", "8081", "The port used to serve health probe endpoints.") + flag.StringVar(&serverPort, "port", "9376", "The port used to serve node state endpoint.") + flag.BoolVar(&debug, "debug", false, "Turn on debug logs.") + flag.Parse() + var zlog logr.Logger + if debug { + zlog = zap.New( + zap.UseDevMode(true), + zap.Level(zapcore.DebugLevel), + ) + } else { + zlog = zap.New( + zap.UseDevMode(false), + ) + } + log := logging.NewLogrLogger(zlog) + ctrl.SetLogger(zlog) + mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{ + Logger: zlog, + Scheme: scheme, + Metrics: metricsserver.Options{BindAddress: ":" + metricsPort}, + HealthProbeBindAddress: ":" + healthProbesPort, + // We don't need Node controller to be a singleton since it doesn't manipulate any state. + LeaderElection: false, + }) + if err != nil { + setupLog.Error(err, "unable to start manager") + os.Exit(1) + } + + s := node.NewServer() + go func() { + if err := (&http.Server{ + Addr: ":" + serverPort, + Handler: s, + ReadHeaderTimeout: 1 * time.Second, + }).ListenAndServe(); err != nil { + setupLog.Error(err, "unable to start server") + os.Exit(1) + } + }() + if err := node.Setup(mgr, s, log); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "Node") + os.Exit(1) + } + + if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil { + setupLog.Error(err, "unable to set up health check") + os.Exit(1) + } + if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil { + setupLog.Error(err, "unable to set up ready check") + os.Exit(1) + } + + setupLog.Info("starting manager") + if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil { + setupLog.Error(err, "problem running manager") + os.Exit(1) + } +} diff --git a/examples/simple-loop.yaml b/examples/simple-loop.yaml new file mode 100644 index 0000000..622a9d9 --- /dev/null +++ b/examples/simple-loop.yaml @@ -0,0 +1,75 @@ +#apiVersion: v1 +#kind: ConfigMap +#metadata: +# name: crik-browser +#data: +# config.yaml: |- +# imageDir: /etc/checkpoint +# additionalPaths: +# - /root/.cache # webkit +# inotifyIncompatiblePaths: +# - /root/.config/glib-2.0/settings # webkit +# - /usr/share/zoneinfo # chrome +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: crik-simple-loop +data: + config.yaml: |- + imageDir: /etc/checkpoint +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: simple-loop +spec: + replicas: 1 + selector: + matchLabels: + app: simple-loop + template: + metadata: + labels: + app: simple-loop + spec: + containers: + - name: main + image: ghcr.io/qawolf/crik:v0.1.0 + imagePullPolicy: Never # temp + securityContext: + privileged: true +# capabilities: +# add: [ "CHECKPOINT_RESTORE", "NET_ADMIN", "SYS_ADMIN" ] + args: + - bash + - -c + - 'for ((i=0; ; i++)); do echo "Count is at $i"; sleep 1; done' + volumeMounts: + - mountPath: /etc/crik + name: crik-config + - mountPath: /etc/checkpoint + name: checkpoint-storage + - mountPath: /proc/sys/kernel/ns_last_pid + name: checkpoint-cap + env: + - name: KUBERNETES_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + volumes: + - name: crik-config + configMap: + name: crik-simple-loop + - name: checkpoint-cap + hostPath: + path: /proc/sys/kernel/ns_last_pid + type: File + volumeClaimTemplates: + - metadata: + name: checkpoint-storage + spec: + accessModes: [ "ReadWriteOnce" ] + resources: + requests: + storage: 1Gi diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..3a2691e --- /dev/null +++ b/go.mod @@ -0,0 +1,71 @@ +module github.com/qawolf/crik + +go 1.22 + +require ( + github.com/alecthomas/kong v0.9.0 + github.com/checkpoint-restore/go-criu/v7 v7.1.0 + github.com/crossplane/crossplane-runtime v1.15.1 + github.com/go-logr/logr v1.4.1 + github.com/pkg/errors v0.9.1 + go.uber.org/zap v1.26.0 + k8s.io/api v0.29.1 + k8s.io/apimachinery v0.29.1 + k8s.io/client-go v0.29.1 + sigs.k8s.io/controller-runtime v0.17.0 +) + +require ( + github.com/beorn7/perks v1.0.1 // indirect + github.com/cespare/xxhash/v2 v2.2.0 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/emicklei/go-restful/v3 v3.11.0 // indirect + github.com/evanphx/json-patch/v5 v5.8.0 // indirect + github.com/fsnotify/fsnotify v1.7.0 // indirect + github.com/go-logr/zapr v1.3.0 // indirect + github.com/go-openapi/jsonpointer v0.19.6 // indirect + github.com/go-openapi/jsonreference v0.20.2 // indirect + github.com/go-openapi/swag v0.22.3 // indirect + github.com/gogo/protobuf v1.3.2 // indirect + github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect + github.com/golang/protobuf v1.5.3 // indirect + github.com/google/gnostic-models v0.6.8 // indirect + github.com/google/go-cmp v0.6.0 // indirect + github.com/google/gofuzz v1.2.0 // indirect + github.com/google/uuid v1.4.0 // indirect + github.com/imdario/mergo v0.3.16 // indirect + github.com/josharian/intern v1.0.0 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/mailru/easyjson v0.7.7 // indirect + github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/prometheus/client_golang v1.18.0 // indirect + github.com/prometheus/client_model v0.5.0 // indirect + github.com/prometheus/common v0.45.0 // indirect + github.com/prometheus/procfs v0.12.0 // indirect + github.com/spf13/pflag v1.0.5 // indirect + go.uber.org/multierr v1.11.0 // indirect + golang.org/x/exp v0.0.0-20240112132812-db7319d0e0e3 // indirect + golang.org/x/net v0.20.0 // indirect + golang.org/x/oauth2 v0.15.0 // indirect + golang.org/x/sys v0.18.0 // indirect + golang.org/x/term v0.16.0 // indirect + golang.org/x/text v0.14.0 // indirect + golang.org/x/time v0.5.0 // indirect + gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect + google.golang.org/appengine v1.6.8 // indirect + google.golang.org/protobuf v1.32.0 // indirect + gopkg.in/inf.v0 v0.9.1 // indirect + gopkg.in/yaml.v2 v2.4.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect + k8s.io/apiextensions-apiserver v0.29.1 // indirect + k8s.io/component-base v0.29.1 // indirect + k8s.io/klog/v2 v2.110.1 // indirect + k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00 // indirect + k8s.io/utils v0.0.0-20230726121419-3b25d923346b // indirect + sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect + sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect + sigs.k8s.io/yaml v1.4.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..0a765a9 --- /dev/null +++ b/go.sum @@ -0,0 +1,224 @@ +github.com/alecthomas/assert/v2 v2.6.0 h1:o3WJwILtexrEUk3cUVal3oiQY2tfgr/FHWiz/v2n4FU= +github.com/alecthomas/assert/v2 v2.6.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k= +github.com/alecthomas/kong v0.9.0 h1:G5diXxc85KvoV2f0ZRVuMsi45IrBgx9zDNGNj165aPA= +github.com/alecthomas/kong v0.9.0/go.mod h1:Y47y5gKfHp1hDc7CH7OeXgLIpp+Q2m1Ni0L5s3bI8Os= +github.com/alecthomas/repr v0.4.0 h1:GhI2A8MACjfegCPVq9f1FLvIBS+DrQ2KQBFZP1iFzXc= +github.com/alecthomas/repr v0.4.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4= +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= +github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/checkpoint-restore/go-criu/v7 v7.1.0 h1:JbQyO4o+P8ycNTMLPiiDqXg49bAcy4WljWCzYQho35A= +github.com/checkpoint-restore/go-criu/v7 v7.1.0/go.mod h1:1svAtmbtvX4BKI45OFzgoTTLG7oYFKdColv/Vcsb2A8= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/crossplane/crossplane-runtime v1.15.1 h1:g1h75tNYOQT152IUNxs8ZgSsRFQKrZN9z69KefMujXs= +github.com/crossplane/crossplane-runtime v1.15.1/go.mod h1:kRcJjJQmBFrR2n/KhwL8wYS7xNfq3D8eK4JliEScOHI= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g= +github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/evanphx/json-patch v5.6.0+incompatible h1:jBYDEEiFBPxA0v50tFdvOzQQTCvpL6mnFh5mB2/l16U= +github.com/evanphx/json-patch v5.6.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= +github.com/evanphx/json-patch/v5 v5.8.0 h1:lRj6N9Nci7MvzrXuX6HFzU8XjmhPiXPlsKEy1u0KQro= +github.com/evanphx/json-patch/v5 v5.8.0/go.mod h1:VNkHZ/282BpEyt/tObQO8s5CMPmYYq14uClGH4abBuQ= +github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= +github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= +github.com/go-logr/logr v1.3.0/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= +github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ= +github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg= +github.com/go-openapi/jsonpointer v0.19.6 h1:eCs3fxoIi3Wh6vtgmLTOjdhSpiqphQ+DaPn38N2ZdrE= +github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs= +github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE= +github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k= +github.com/go-openapi/swag v0.22.3 h1:yMBqmnQ0gyZvEb/+KzuWZOXgllrXT4SADYbvDaXHv/g= +github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= +github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= +github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= +github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I= +github.com/google/gnostic-models v0.6.8/go.mod h1:5n7qKqH0f5wFt+aWF8CW6pZLLNOfYuF5OpfBSENuI8U= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= +github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/pprof v0.0.0-20240117000934-35fc243c5815 h1:WzfWbQz/Ze8v6l++GGbGNFZnUShVpP/0xffCPLL+ax8= +github.com/google/pprof v0.0.0-20240117000934-35fc243c5815/go.mod h1:czg5+yv1E0ZGTi6S6vVK1mke0fV+FaUhNGcd6VRS9Ik= +github.com/google/uuid v1.4.0 h1:MtMxsa51/r9yyhkyLsVeVt0B+BGQZzpQiTQ4eHZ8bc4= +github.com/google/uuid v1.4.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM= +github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg= +github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= +github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= +github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= +github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= +github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0 h1:jWpvCLoY8Z/e3VKvlsiIGKtc+UG6U5vzxaoagmhXfyg= +github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0/go.mod h1:QUyp042oQthUoa9bqDv0ER0wrtXnBruoNd7aNjkbP+k= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/onsi/ginkgo/v2 v2.14.0 h1:vSmGj2Z5YPb9JwCWT6z6ihcUvDhuXLc3sJiqd3jMKAY= +github.com/onsi/ginkgo/v2 v2.14.0/go.mod h1:JkUdW7JkN0V6rFvsHcJ478egV3XH9NxpD27Hal/PhZw= +github.com/onsi/gomega v1.30.0 h1:hvMK7xYz4D3HapigLTeGdId/NcfQx1VHMJc60ew99+8= +github.com/onsi/gomega v1.30.0/go.mod h1:9sxs+SwGrKI0+PWe4Fxa9tFQQBG5xSsSbMXOI8PPpoQ= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_golang v1.18.0 h1:HzFfmkOzH5Q8L8G+kSJKUx5dtG87sewO+FoDDqP5Tbk= +github.com/prometheus/client_golang v1.18.0/go.mod h1:T+GXkCk5wSJyOqMIzVgvvjFDlkOQntgjkJWKrN5txjA= +github.com/prometheus/client_model v0.5.0 h1:VQw1hfvPvk3Uv6Qf29VrPF32JB6rtbgI6cYPYQjL0Qw= +github.com/prometheus/client_model v0.5.0/go.mod h1:dTiFglRmd66nLR9Pv9f0mZi7B7fk5Pm3gvsjB5tr+kI= +github.com/prometheus/common v0.45.0 h1:2BGz0eBc2hdMDLnO/8n0jeB3oPrt2D08CekT0lneoxM= +github.com/prometheus/common v0.45.0/go.mod h1:YJmSTw9BoKxJplESWWxlbyttQR4uaEcGyv9MZjVOJsY= +github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k6Bo= +github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo= +github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= +github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= +go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= +go.uber.org/zap v1.26.0 h1:sI7k6L95XOKS281NhVKOFCUNIvv9e0w4BF8N3u+tCRo= +go.uber.org/zap v1.26.0/go.mod h1:dtElttAiwGvoJ/vj4IwHBS/gXsEu/pZ50mUIRWuG0so= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/exp v0.0.0-20240112132812-db7319d0e0e3 h1:hNQpMuAJe5CtcUqCXaWga3FHu+kQvCqcsoVaQgSV60o= +golang.org/x/exp v0.0.0-20240112132812-db7319d0e0e3/go.mod h1:idGWGoKP1toJGkd5/ig9ZLuPcZBC3ewk7SzmH0uou08= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.20.0 h1:aCL9BSgETF1k+blQaYUBx9hJ9LOGP3gAVemcZlf1Kpo= +golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY= +golang.org/x/oauth2 v0.15.0 h1:s8pnnxNVzjWyrvYdFUQq5llS1PX2zhPXmccZv99h7uQ= +golang.org/x/oauth2 v0.15.0/go.mod h1:q48ptWNTY5XWf+JNten23lcvHpLJ0ZSxF5ttTHKVCAM= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= +golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.16.0 h1:m+B6fahuftsE9qjo0VWp2FW0mB3MTJvR0BaMQrq0pmE= +golang.org/x/term v0.16.0/go.mod h1:yn7UURbUtPyrVJPGPq404EukNFxcm/foM+bV/bfcDsY= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= +golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= +golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= +golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/tools v0.17.0 h1:FvmRgNOcs3kOa+T20R1uhfP9F6HgG2mfxDv1vrx1Htc= +golang.org/x/tools v0.17.0/go.mod h1:xsh6VxdV005rRVaS6SSAf9oiAqljS7UZUacMZ8Bnsps= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw= +gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY= +google.golang.org/appengine v1.6.8 h1:IhEN5q69dyKagZPYMSdIjS2HqprW324FRQZJcGqPAsM= +google.golang.org/appengine v1.6.8/go.mod h1:1jJ3jBArFh5pcgW8gCtRJnepW8FzD1V44FJffLiz/Ds= +google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +google.golang.org/protobuf v1.32.0 h1:pPC6BG5ex8PDFnkbrGU3EixyhKcQ2aDuBS36lqK/C7I= +google.golang.org/protobuf v1.32.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= +gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= +gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +k8s.io/api v0.29.1 h1:DAjwWX/9YT7NQD4INu49ROJuZAAAP/Ijki48GUPzxqw= +k8s.io/api v0.29.1/go.mod h1:7Kl10vBRUXhnQQI8YR/R327zXC8eJ7887/+Ybta+RoQ= +k8s.io/apiextensions-apiserver v0.29.1 h1:S9xOtyk9M3Sk1tIpQMu9wXHm5O2MX6Y1kIpPMimZBZw= +k8s.io/apiextensions-apiserver v0.29.1/go.mod h1:zZECpujY5yTW58co8V2EQR4BD6A9pktVgHhvc0uLfeU= +k8s.io/apimachinery v0.29.1 h1:KY4/E6km/wLBguvCZv8cKTeOwwOBqFNjwJIdMkMbbRc= +k8s.io/apimachinery v0.29.1/go.mod h1:6HVkd1FwxIagpYrHSwJlQqZI3G9LfYWRPAkUvLnXTKU= +k8s.io/client-go v0.29.1 h1:19B/+2NGEwnFLzt0uB5kNJnfTsbV8w6TgQRz9l7ti7A= +k8s.io/client-go v0.29.1/go.mod h1:TDG/psL9hdet0TI9mGyHJSgRkW3H9JZk2dNEUS7bRks= +k8s.io/component-base v0.29.1 h1:MUimqJPCRnnHsskTTjKD+IC1EHBbRCVyi37IoFBrkYw= +k8s.io/component-base v0.29.1/go.mod h1:fP9GFjxYrLERq1GcWWZAE3bqbNcDKDytn2srWuHTtKc= +k8s.io/klog/v2 v2.110.1 h1:U/Af64HJf7FcwMcXyKm2RPM22WZzyR7OSpYj5tg3cL0= +k8s.io/klog/v2 v2.110.1/go.mod h1:YGtd1984u+GgbuZ7e08/yBuAfKLSO0+uR1Fhi6ExXjo= +k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00 h1:aVUu9fTY98ivBPKR9Y5w/AuzbMm96cd3YHRTU83I780= +k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00/go.mod h1:AsvuZPBlUDVuCdzJ87iajxtXuR9oktsTctW/R9wwouA= +k8s.io/utils v0.0.0-20230726121419-3b25d923346b h1:sgn3ZU783SCgtaSJjpcVVlRqd6GSnlTLKgpAAttJvpI= +k8s.io/utils v0.0.0-20230726121419-3b25d923346b/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +sigs.k8s.io/controller-runtime v0.17.0 h1:fjJQf8Ukya+VjogLO6/bNX9HE6Y2xpsO5+fyS26ur/s= +sigs.k8s.io/controller-runtime v0.17.0/go.mod h1:+MngTvIQQQhfXtwfdGw/UOQ/aIaqsYywfCINOtwMO/s= +sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= +sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= +sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= +sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= +sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= +sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= diff --git a/hack/boilerplate.go.txt b/hack/boilerplate.go.txt new file mode 100644 index 0000000..b21429b --- /dev/null +++ b/hack/boilerplate.go.txt @@ -0,0 +1,15 @@ +/* +Copyright 2024 QA Wolf Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ \ No newline at end of file diff --git a/internal/controller/node/controller.go b/internal/controller/node/controller.go new file mode 100644 index 0000000..576046d --- /dev/null +++ b/internal/controller/node/controller.go @@ -0,0 +1,140 @@ +/* +Copyright 2024 QA Wolf Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package node contains the controller logic for the Nodes. +package node + +import ( + "context" + "strings" + + "github.com/crossplane/crossplane-runtime/pkg/event" + "github.com/crossplane/crossplane-runtime/pkg/logging" + "github.com/pkg/errors" + corev1 "k8s.io/api/core/v1" + kerrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/runtime" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +const ( + controllerName = "node-controller" + + errGetNode = "failed to get node" +) + +// Setup sets up the controller with the Manager. +func Setup(mgr ctrl.Manager, server *Server, log logging.Logger) error { + r := NewReconciler( + mgr.GetClient(), + mgr.GetScheme(), + WithEventRecorder(event.NewAPIRecorder(mgr.GetEventRecorderFor(controllerName))), + WithLogger(log.WithValues("controller", controllerName)), + ) + return ctrl.NewControllerManagedBy(mgr). + For(&corev1.Node{}).Complete(r) +} + +type NodeStateWriter interface { + SetNodeState(name string, state NodeState) + DeleteNodeState(name string) +} + +type NopNodeStateWriter struct{} + +func (NopNodeStateWriter) SetNodeState(name string, state NodeState) {} +func (NopNodeStateWriter) DeleteNodeState(name string) {} + +// WithEventRecorder sets the EventRecorder for the Reconciler. +func WithEventRecorder(e event.Recorder) ReconcilerOption { + return func(r *Reconciler) { + r.record = e + } +} + +// WithLogger sets the Logger for the Reconciler. +func WithLogger(l logging.Logger) ReconcilerOption { + return func(r *Reconciler) { + r.rootLog = l + } +} + +// WithNodeStateWriter sets the NodeStateWriter for the Reconciler. +func WithNodeStateWriter(s NodeStateWriter) ReconcilerOption { + return func(r *Reconciler) { + r.nodes = s + } +} + +// ReconcilerOption is a function that sets some option on the Reconciler. +type ReconcilerOption func(*Reconciler) + +// NewReconciler returns a new Reconciler. +func NewReconciler(c client.Client, s *runtime.Scheme, opts ...ReconcilerOption) *Reconciler { + r := &Reconciler{ + client: c, + Scheme: s, + record: event.NewNopRecorder(), + rootLog: logging.NewNopLogger(), + nodes: NopNodeStateWriter{}, + } + for _, f := range opts { + f(r) + } + return r +} + +// Reconciler reconciles a Node object to detect shutdown events and notify Playground pods running on that Node. +type Reconciler struct { + client client.Client + Scheme *runtime.Scheme + + record event.Recorder + rootLog logging.Logger + + nodes NodeStateWriter +} + +// Reconcile gets triggered by every event on Node resources. +func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + log := r.rootLog.WithValues("node", req.NamespacedName) + + n := &corev1.Node{} + if err := r.client.Get(ctx, req.NamespacedName, n); err != nil { + if kerrors.IsNotFound(err) { + r.nodes.DeleteNodeState(req.Name) + return ctrl.Result{}, nil + } + return ctrl.Result{}, errors.Wrap(err, errGetNode) + } + var readyCondition corev1.NodeCondition + for _, c := range n.Status.Conditions { + if c.Type == corev1.NodeReady { + readyCondition = c + break + } + } + // NOTE(muvaf): This covers GKE node shutdown event. It may or may not work for Kubernetes deployments. + if !(readyCondition.Status == corev1.ConditionFalse && + readyCondition.Reason == "KubeletNotReady" && + strings.Contains(readyCondition.Message, "node is shutting down")) { + return ctrl.Result{}, nil + } + log.Debug("node is shutting down", "node", n.Name, "phase", n.Status.Phase) + r.nodes.SetNodeState(n.Name, NodeStateShuttingDown) + return ctrl.Result{}, nil +} diff --git a/internal/controller/node/server.go b/internal/controller/node/server.go new file mode 100644 index 0000000..92e3447 --- /dev/null +++ b/internal/controller/node/server.go @@ -0,0 +1,97 @@ +/* +Copyright 2024 QA Wolf Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package node + +import ( + "k8s.io/apimachinery/pkg/util/json" + "net/http" + "strings" + "sync" +) + +// Node is the state of a node. +type Node struct { + Name string `json:"name"` + State NodeState `json:"state"` +} + +type NodeState string + +// Node states. +var ( + NodeStateUnknown NodeState = "unknown" + NodeStateShuttingDown NodeState = "shutting-down" +) + +type Server struct { + nodes map[string]NodeState + + *sync.RWMutex +} + +func NewServer() *Server { + return &Server{ + nodes: map[string]NodeState{}, + } +} + +// SetNodeState sets the state of a node. +func (s *Server) SetNodeState(name string, state NodeState) { + s.Lock() + defer s.Unlock() + s.nodes[name] = state +} + +// GetNodeState gets the state of a node. +func (s *Server) GetNodeState(name string) NodeState { + s.RLock() + defer s.RUnlock() + if node, ok := s.nodes[name]; ok { + return node + } + return NodeStateUnknown +} + +// DeleteNodeState deletes the state of a node from the map. +func (s *Server) DeleteNodeState(name string) { + s.Lock() + defer s.Unlock() + delete(s.nodes, name) +} + +func (s *Server) ServeHTTP(w http.ResponseWriter, r *http.Request) { + nodeName := strings.Trim(strings.TrimPrefix(r.URL.Path, "/v1/nodes/"), "/") + if nodeName == "" { + http.Error(w, "node query parameter is missing", http.StatusBadRequest) + return + } + switch r.Method { + case http.MethodGet: + n := Node{ + Name: nodeName, + State: s.GetNodeState(nodeName), + } + if err := json.NewEncoder(w).Encode(n); err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + w.WriteHeader(http.StatusOK) + default: + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + } + return +} diff --git a/internal/exec/checkpoint.go b/internal/exec/checkpoint.go new file mode 100644 index 0000000..6c2226c --- /dev/null +++ b/internal/exec/checkpoint.go @@ -0,0 +1,155 @@ +/* +Copyright 2024 QA Wolf Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package exec + +import ( + "fmt" + "os" + "path/filepath" + "sigs.k8s.io/yaml" + "strconv" + "syscall" + "time" + + "github.com/checkpoint-restore/go-criu/v7" + "github.com/checkpoint-restore/go-criu/v7/rpc" + "google.golang.org/protobuf/proto" +) + +type Actions struct { + pid int + configuration Configuration +} + +// PreDump is called when criu is about to dump the process. +func (a Actions) PreDump() error { + // Temp hack to resolve crash during dump. + for _, p := range a.configuration.InotifyIncompatiblePaths { + if err := os.RemoveAll(p); err != nil { + return fmt.Errorf("failed to remove %s: %w", p, err) + } + } + conf := &configurationOnDisk{ + Configuration: a.configuration, + } + conf.UnixFileDescriptorTrio = make([]string, 3) + fdDir := filepath.Join("/proc", strconv.Itoa(a.pid), "fd") + for i := 0; i < 3; i++ { + fdPath := filepath.Join(fdDir, strconv.Itoa(i)) + link, err := os.Readlink(fdPath) + if err != nil { + return fmt.Errorf("failed to read link of %s: %w", fdPath, err) + } + conf.UnixFileDescriptorTrio[i] = link + } + confYAML, err := yaml.Marshal(conf) + if err != nil { + return fmt.Errorf("failed to marshal fds: %w", err) + } + if err := os.WriteFile(filepath.Join(a.configuration.ImageDir, ConfigurationFileName), confYAML, 0o600); err != nil { + return fmt.Errorf("failed to write stdio-fds.json: %w", err) + } + if err := os.MkdirAll(filepath.Join(a.configuration.ImageDir, "extraFiles"), 0755); err != nil { + return fmt.Errorf("failed to create extra path: %w", err) + } + for _, p := range a.configuration.AdditionalPaths { + if _, err := os.Stat(p); os.IsNotExist(err) { + continue + } + if err := CopyDir(p, filepath.Join(a.configuration.ImageDir, "extraFiles", p)); err != nil { + return fmt.Errorf("failed to copy %s: %w", p, err) + } + } + return nil +} + +// PostDump does nothing. +func (a Actions) PostDump() error { + return nil +} + +// PreRestore does nothing. +func (a Actions) PreRestore() error { + return nil +} + +// PostRestore does nothing. +func (a Actions) PostRestore(pid int32) error { + return nil +} + +// NetworkLock does nothing. +func (a Actions) NetworkLock() error { + return nil +} + +// NetworkUnlock does nothing. +func (a Actions) NetworkUnlock() error { + return nil +} + +// SetupNamespaces does nothing. +func (a Actions) SetupNamespaces(_ int32) error { + return nil +} + +// PostSetupNamespaces does nothing. +func (a Actions) PostSetupNamespaces() error { + return nil +} + +// PostResume does nothing. +func (a Actions) PostResume() error { + return nil +} + +func TakeCheckpoint(c *criu.Criu, pid int, configuration Configuration) (time.Duration, error) { + start := time.Now() + fd, err := syscall.Open(configuration.ImageDir, syscall.O_DIRECTORY, 755) + if err != nil { + return time.Since(start), fmt.Errorf("failed to open directory %s: %w", configuration.ImageDir, err) + } + cgMode := rpc.CriuCgMode_IGNORE + opts := &rpc.CriuOpts{ + TcpEstablished: proto.Bool(true), + ShellJob: proto.Bool(false), + FileLocks: proto.Bool(false), + LogFile: proto.String("dump.log"), + AutoDedup: proto.Bool(false), + Pid: proto.Int32(int32(pid)), + ImagesDirFd: proto.Int32(int32(fd)), // To make it use ImagesDir. + OrphanPtsMaster: proto.Bool(true), + NotifyScripts: proto.Bool(true), + LeaveRunning: proto.Bool(false), + LeaveStopped: proto.Bool(false), + LogLevel: proto.Int32(4), + LazyPages: proto.Bool(false), + GhostLimit: proto.Uint32(500 * 1048576), // 500MB + Root: proto.String("/"), + TcpClose: proto.Bool(true), + ManageCgroupsMode: &cgMode, + External: GetExternalDirectoriesForCheckpoint(), + } + actions := Actions{ + pid: pid, + configuration: configuration, + } + if err := c.Dump(opts, actions); err != nil { + return time.Since(start), fmt.Errorf("failed to dump: %w", err) + } + return time.Since(start), nil +} diff --git a/internal/exec/opts.go b/internal/exec/opts.go new file mode 100644 index 0000000..8d640af --- /dev/null +++ b/internal/exec/opts.go @@ -0,0 +1,205 @@ +/* +Copyright 2024 QA Wolf Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package exec + +import ( + "fmt" + "io" + "os" + "path/filepath" + "strings" + + "github.com/checkpoint-restore/go-criu/v7/crit" + "k8s.io/apimachinery/pkg/util/yaml" +) + +const ( + ConfigurationFileName = "configuration.yaml" +) + +func ReadConfiguration(path string) (Configuration, error) { + b, err := os.ReadFile(path) + if err != nil { + return Configuration{}, fmt.Errorf("failed to read configuration file: %w", err) + } + var c Configuration + if err := yaml.Unmarshal(b, &c); err != nil { + return Configuration{}, fmt.Errorf("failed to unmarshal configuration: %w", err) + } + return c, nil +} + +// Configuration lets crik know about quirks of the processes whose checkpoint is being taken. For example, the files +// that need to be part of the checkpoint but are not part of the container's image need to be specified here. +type Configuration struct { + // ImageDir is the directory where the checkpoint is stored. It is expected to be available in the new container as + // well. + ImageDir string `json:"imageDir"` + + // NodeStateServerURL is the URL of the node state server. If given, crik will first check if the node is in shutting + // down state and only then take checkpoint. + // If not given, crik will always take checkpoint when it receives SIGTERM. + NodeStateServerURL string `json:"nodeStateServerURL"` + + // AdditionalPaths is the list of paths that are not part of the container's image but were opened by one of the + // processes in the tree. We need to make sure that these paths are available in the new container as well. + // The paths are relative to the root of the container's filesystem. + // Entries can be path to a file or a directory. + AdditionalPaths []string `json:"additionalPaths,omitempty"` + + // InotifyIncompatiblePaths is the list of paths that are known to cause issues with inotify. We delete those paths + // before taking the checkpoint. + InotifyIncompatiblePaths []string `json:"inotifyIncompatiblePaths,omitempty"` +} + +// configurationOnDisk contains additional metadata information about the checkpoint that is used during restore. +type configurationOnDisk struct { + Configuration + + // UnixFileDescriptors is the list of file descriptors that are opened by all UNIX processes by default. + // They map to 0 -> stdin, 1 -> stdout, 2 -> stderr. + // In containers, these are connected to either /dev/null or pipes. We need to make sure that when we restore, the + // pipes are connected to criu's stdin, stdout, and stderr which is what's connected to the new container's stdin, + // stdout, and stderr. + // This list has only 3 elements in all cases. + UnixFileDescriptorTrio []string `json:"unixFileDescriptorTrio,omitempty"` +} + +var ( + // DirectoryMounts is the list of directories that are mounted by the container runtime and need to be marked as + // such during checkpoint and restore so that the underlying files can change without breaking the restore process. + DirectoryMounts = []DirectoryMount{ + { + Name: "zoneinfo", + PathInCheckpoint: "/usr/share/zoneinfo", + PathInRestore: "/usr/share/zoneinfo", + }, + { + Name: "null", + PathInCheckpoint: "/dev/null", + PathInRestore: "/dev/null", + }, + { + Name: "random", + PathInCheckpoint: "/dev/random", + PathInRestore: "/dev/random", + }, + { + Name: "urandom", + PathInCheckpoint: "/dev/urandom", + PathInRestore: "/dev/urandom", + }, + { + Name: "tty", + PathInCheckpoint: "/dev/tty", + PathInRestore: "/dev/tty", + }, + { + Name: "zero", + PathInCheckpoint: "/dev/zero", + PathInRestore: "/dev/zero", + }, + { + Name: "full", + PathInCheckpoint: "/dev/full", + PathInRestore: "/dev/full", + }, + } +) + +type DirectoryMount struct { + Name string `json:"name"` + PathInCheckpoint string `json:"pathInCheckpoint"` + PathInRestore string `json:"pathInRestore"` +} + +func GetExternalDirectoriesForCheckpoint() []string { + result := make([]string, len(DirectoryMounts)) + for i, d := range DirectoryMounts { + result[i] = fmt.Sprintf("mnt[%s]:%s", d.PathInCheckpoint, d.Name) + } + return result +} + +func GetExternalDirectoriesForRestore() []string { + result := make([]string, len(DirectoryMounts)) + for i, d := range DirectoryMounts { + result[i] = fmt.Sprintf("mnt[%s]:%s", d.Name, d.PathInRestore) + } + return result +} + +func CopyDir(src, dst string) error { + return filepath.WalkDir(src, func(srcPath string, d os.DirEntry, err error) error { + if err != nil { + return err + } + rel, err := filepath.Rel(src, srcPath) + if err != nil { + return err + } + dstPath := filepath.Join(dst, rel) + if d.IsDir() { + return os.MkdirAll(dstPath, d.Type().Perm()) + } + // TODO(muvaf): This changes the perms of folder if the dir wasn't walked before. + if err := os.MkdirAll(filepath.Dir(dstPath), 0755); err != nil { + return err + } + src, err := os.Open(srcPath) + if err != nil { + return err + } + defer src.Close() + + dst, err := os.Create(dstPath) + if err != nil { + return err + } + defer dst.Close() + + if _, err := io.Copy(dst, src); err != nil { + return err + } + + // Get the source file mode to apply to the destination file + srcInfo, err := src.Stat() + if err != nil { + return err + } + return os.Chmod(dstPath, srcInfo.Mode()) + }) +} + +func GetKubePodFilePaths(imageDir string) (map[string]string, error) { + c := crit.New(nil, nil, imageDir, false, false) + fds, err := c.ExploreFds() + if err != nil { + return nil, fmt.Errorf("failed to explore fds: %w", err) + } + result := map[string]string{} + for _, fd := range fds { + for _, file := range fd.Files { + if !strings.HasPrefix(file.Path, "/sys/fs/cgroup/kubepods.slice") || + file.Type != "REG" { + continue + } + result[filepath.Base(file.Path)] = file.Path + } + } + return result, nil +} diff --git a/internal/exec/restore.go b/internal/exec/restore.go new file mode 100644 index 0000000..27bdc72 --- /dev/null +++ b/internal/exec/restore.go @@ -0,0 +1,94 @@ +/* +Copyright 2024 QA Wolf Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package exec + +import ( + "fmt" + "os" + "os/exec" + "path/filepath" + "sigs.k8s.io/yaml" + "strings" + "syscall" +) + +func RestoreWithCmd(imageDir string) error { + if err := os.MkdirAll("/tmp/.X11-unix", 0755); err != nil { + return fmt.Errorf("failed to mkdir /tmp/.X11-unix: %w", err) + } + if err := CopyDir(filepath.Join(imageDir, "extraFiles"), "/"); err != nil { + return fmt.Errorf("failed to copy extra files: %w", err) + } + args := []string{"restore", + "--images-dir", imageDir, + "--tcp-established", + "--file-locks", + "--evasive-devices", + "--tcp-close", + "--manage-cgroups=ignore", + "-v4", + "--log-file", "restore.log", + } + configYAML, err := os.ReadFile(filepath.Join(imageDir, ConfigurationFileName)) + if err != nil { + return fmt.Errorf("failed to read stdio file descriptors: %w", err) + } + conf := &configurationOnDisk{} + if err := yaml.Unmarshal(configYAML, conf); err != nil { + return fmt.Errorf("failed to unmarshal stdio file descriptors: %w", err) + } + for _, d := range GetExternalDirectoriesForRestore() { + args = append(args, "--external", d) + } + inheritedFds := conf.UnixFileDescriptorTrio + + // When cgroup v2 is used, the path to resource usage files contain pod and container IDs which are changed + // in the new pod. We find and replace them with the new files. + kubePodFiles, err := GetKubePodFilePaths(imageDir) + if err != nil { + return fmt.Errorf("failed to get kubepods.slice files: %w", err) + } + var extraFiles []*os.File + if len(kubePodFiles) > 0 { + // All processes within container are in the same cgroup, so getting the folder of self is enough. + str, err := os.ReadFile("/proc/self/cgroup") + if err != nil { + return fmt.Errorf("failed to read /proc/self/cgroup: %w", err) + } + basePath := filepath.Join("/sys/fs/cgroup", strings.Split(strings.Split(string(str), "\n")[0], ":")[2]) + for k, v := range kubePodFiles { + path := filepath.Join(basePath, k) + f, err := os.OpenFile(path, syscall.O_RDONLY, 0) + if err != nil { + return fmt.Errorf("failed to open %s: %w", k, err) + } + // The index of file descriptor in extraFiles must match the index+3 in inheritedFds because + // the first 3 file descriptors are reserved for stdin, stdout, and stderr. + inheritedFds = append(inheritedFds, strings.TrimPrefix(v, "/")) + extraFiles = append(extraFiles, f) + } + } + for i, fdStr := range inheritedFds { + args = append(args, "--inherit-fd", fmt.Sprintf("fd[%d]:%s", i, fdStr)) + } + cmd := exec.Command("criu", args...) + cmd.ExtraFiles = extraFiles + cmd.Stdin = nil + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + return cmd.Run() +}