-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Signed-off-by: Muvaffak Onus <me@muvaf.com>
- Loading branch information
Showing
21 changed files
with
1,871 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
# More info: https://docs.docker.com/engine/reference/builder/#dockerignore-file | ||
# Ignore build and test binaries. | ||
bin/ | ||
.github/ | ||
cluster/ | ||
examples/ | ||
hack/ | ||
README.md | ||
LICENSE |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
name: ci | ||
|
||
on: | ||
push: | ||
branches: | ||
- "main" | ||
tags: | ||
- "v*" | ||
|
||
env: | ||
REGISTRY: ghcr.io | ||
REGISTRY_IMAGE: ghcr.io/${{ github.repository }} | ||
|
||
jobs: | ||
build: | ||
runs-on: ubuntu-latest | ||
permissions: | ||
contents: read | ||
packages: write | ||
strategy: | ||
fail-fast: false | ||
matrix: | ||
platform: | ||
- linux/amd64 | ||
- linux/arm64 | ||
steps: | ||
- name: Prepare | ||
run: | | ||
platform=${{ matrix.platform }} | ||
echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV | ||
- name: Checkout repository | ||
uses: actions/checkout@v4 | ||
- name: Docker meta | ||
id: meta | ||
uses: docker/metadata-action@v5 | ||
with: | ||
images: ${{ env.REGISTRY_IMAGE }} | ||
- name: Set up QEMU | ||
uses: docker/setup-qemu-action@v3 | ||
- name: Set up Docker Buildx | ||
uses: docker/setup-buildx-action@v3 | ||
- name: Log in to the Github Container Registry | ||
uses: docker/login-action@v3 | ||
with: | ||
registry: ${{ env.REGISTRY }} | ||
username: ${{ github.actor }} | ||
password: ${{ secrets.GITHUB_TOKEN }} | ||
- name: Build and push by digest | ||
id: build | ||
uses: docker/build-push-action@v5 | ||
with: | ||
context: . | ||
file: cmd/crik/Dockerfile | ||
platforms: ${{ matrix.platform }} | ||
labels: ${{ steps.meta.outputs.labels }} | ||
outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true | ||
- name: Export digest | ||
run: | | ||
mkdir -p /tmp/digests | ||
digest="${{ steps.build.outputs.digest }}" | ||
touch "/tmp/digests/${digest#sha256:}" | ||
- name: Upload digest | ||
uses: actions/upload-artifact@v4 | ||
with: | ||
name: digests-${{ env.PLATFORM_PAIR }} | ||
path: /tmp/digests/* | ||
if-no-files-found: error | ||
retention-days: 1 | ||
|
||
merge: | ||
runs-on: ubuntu-latest | ||
needs: | ||
- build | ||
steps: | ||
- name: Download digests | ||
uses: actions/download-artifact@v4 | ||
with: | ||
path: /tmp/digests | ||
pattern: digests-* | ||
merge-multiple: true | ||
- name: Set up Docker Buildx | ||
uses: docker/setup-buildx-action@v3 | ||
- name: Docker meta | ||
id: meta | ||
uses: docker/metadata-action@v5 | ||
with: | ||
images: ${{ env.REGISTRY_IMAGE }} | ||
- name: Log in to the Github Container Registry | ||
uses: docker/login-action@v3 | ||
with: | ||
registry: ${{ env.REGISTRY }} | ||
username: ${{ github.actor }} | ||
password: ${{ secrets.GITHUB_TOKEN }} | ||
- name: Create manifest list and push | ||
working-directory: /tmp/digests | ||
run: | | ||
docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \ | ||
$(printf '${{ env.REGISTRY_IMAGE }}@sha256:%s ' *) | ||
- name: Inspect image | ||
run: | | ||
docker buildx imagetools inspect ${{ env.REGISTRY_IMAGE }}:${{ steps.meta.outputs.version }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
|
||
# Binaries for programs and plugins | ||
*.exe | ||
*.exe~ | ||
*.dll | ||
*.so | ||
*.dylib | ||
bin/* | ||
Dockerfile.cross | ||
|
||
# Test binary, built with `go test -c` | ||
*.test | ||
|
||
# Output of the go coverage tool, specifically when used with LiteIDE | ||
*.out | ||
|
||
# Go workspace file | ||
go.work | ||
|
||
# Kubernetes Generated files - skip generated files, except for vendored files | ||
!vendor/**/zz_generated.* | ||
|
||
# editor and IDE paraphernalia | ||
.idea | ||
.vscode | ||
*.swp | ||
*.swo | ||
*~ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
run: | ||
deadline: 5m | ||
allow-parallel-runners: true | ||
|
||
issues: | ||
# don't skip warning about doc comments | ||
# don't exclude the default set of lint | ||
exclude-use-default: false | ||
# restore some of the defaults | ||
# (fill in the rest as needed) | ||
exclude-rules: | ||
- path: "api/*" | ||
linters: | ||
- lll | ||
- path: "internal/*" | ||
linters: | ||
- dupl | ||
- lll | ||
linters: | ||
disable-all: true | ||
enable: | ||
- dupl | ||
- errcheck | ||
- exportloopref | ||
- goconst | ||
- gocyclo | ||
- gofmt | ||
- goimports | ||
- gosimple | ||
- govet | ||
- ineffassign | ||
- lll | ||
- misspell | ||
- nakedret | ||
- prealloc | ||
- staticcheck | ||
- typecheck | ||
- unconvert | ||
- unparam | ||
- unused |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,153 @@ | ||
# crik | ||
Checkpoint and Restore in Kubernetes | ||
# Checkpoint and Restore in Kubernetes - crik | ||
|
||
`crik` is a project that aims to provide checkpoint and restore functionality for Kubernetes pods mainly targeted for | ||
node shutdown and restart scenarios. Under the hood, it utilizes [`criu`](https://github.com/checkpoint-restore/criu) to | ||
checkpoint and restore process trees. | ||
|
||
It is a work in progress and is not ready for production use. | ||
|
||
`crik` has two componenets: | ||
- `crik` - a command wrapper that executes given command and checkpoints it when SIGTERM is received and restores from | ||
checkpoint when image directory contains a checkpoint. | ||
- `manager` - a kubernetes controller that watches `Node` objects and updates its internal map of states so that `crik` | ||
can check whether it should checkpoint or restore depending on its node's state. | ||
|
||
## Quick Start | ||
|
||
The only pre-requisite is to have a Kubernetes cluster running. You can use `kind` to create a local cluster. | ||
|
||
```bash | ||
kind create cluster | ||
``` | ||
|
||
Then, you can deploy the simple-loop example where a counter increases every second and you can delete the pod and see | ||
that it continues from where it left off in the new pod. | ||
|
||
```bash | ||
kubectl apply -f examples/simple-loop.yaml | ||
``` | ||
|
||
Watch logs: | ||
|
||
```bash | ||
kubectl logs -f simple-loop-0 | ||
``` | ||
|
||
In another terminal, delete the pod: | ||
|
||
```bash | ||
kubectl delete pod simple-loop-0 | ||
``` | ||
|
||
Now, a new pod is created. See that it continues from where it left off: | ||
|
||
```bash | ||
kubectl logs -f simple-loop-0 | ||
``` | ||
|
||
## Usage | ||
|
||
The application you want to checkpoint and restore should be run with `crik` command, like the following: | ||
|
||
```bash | ||
crik run -- app-binary | ||
``` | ||
|
||
The following is an example `Dockerfile` for your application that installs `crik` and runs your application. It assumes | ||
your application is `entrypoint.sh`. | ||
```Dockerfile | ||
FROM ubuntu:22.04 | ||
|
||
RUN apt-get update && apt-get install --no-install-recommends --yes gnupg curl ca-certificates | ||
|
||
# crik requires criu to be available. | ||
RUN curl "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x4E2A48715C45AEEC077B48169B29EEC9246B6CE2" | gpg --dearmor > /usr/share/keyrings/criu-ppa.gpg \ | ||
&& echo "deb [signed-by=/usr/share/keyrings/criu-ppa.gpg] https://ppa.launchpadcontent.net/criu/ppa/ubuntu jammy main" > /etc/apt/sources.list.d/criu.list \ | ||
&& apt-get update \ | ||
&& apt-get install --no-install-recommends --yes criu iptables | ||
|
||
# Install crik \ | ||
COPY --from=ghcr.io/qawolf/crik:v0.1.0 /usr/local/bin/crik /usr/local/bin/crik | ||
|
||
# Copy your application | ||
COPY entrypoint.sh /entrypoint.sh | ||
|
||
# Run your application with crik | ||
ENTRYPOINT ["crik", "run", "--", "/entrypoint.sh"] | ||
``` | ||
|
||
### Configuration | ||
|
||
Not all apps can be checkpointed and restored and for many of them, `criu` may need additional configurations. `crik` | ||
provides a high level configuration interface that you can use to configure `crik` for your application. The following | ||
is the minimum configuration you need to provide for your application and by default `crik` looks for `config.yaml` in | ||
`/etc/crik` directory. | ||
|
||
```yaml | ||
kind: ConfigMap | ||
metadata: | ||
name: crik-simple-loop | ||
data: | ||
config.yaml: |- | ||
imageDir: /etc/checkpoint | ||
``` | ||
Configuration options: | ||
- `imageDir` - the directory where `crik` will store the checkpoint images. It needs to be available in the same path | ||
in the new `Pod` as well. | ||
- `additionalPaths` - additional paths that `crik` will include in the checkpoint and copy back in the new `Pod`. Populate | ||
this list if you get `file not found` errors in the restore logs. The paths are relative to root `/` and can be | ||
directories or files. | ||
- `inotifyIncompatiblePaths` - paths that `crik` will delete before taking the checkpoint. Populate this list if you get | ||
`fsnotify: Handle 0x278:0x2ffb5b cannot be opened` errors in the restore logs. You need to find the inode of the | ||
file by converting `0x2ffb5b` to an integer, and then find the path of the file by running `find / -inum <inode>` and | ||
add the path to this list. See [this comment](https://github.com/checkpoint-restore/criu/issues/1187#issuecomment-1975557296) for more details. | ||
|
||
### Node State Controller | ||
|
||
You can optionally configure `crik` to take checkpoint only if the node it's running on is going to be shut down. This is | ||
achieved by deploying a Kubernetes controller that watches `Node` events and updates its internal map of states so that | ||
`crik` can check whether it should checkpoint or restore depending on its node's state. This may include direct calls | ||
to the cloud provider's API to check the node's state in the future. | ||
|
||
Deploy the controller: | ||
|
||
```bash | ||
helm install crik charts/crik | ||
``` | ||
|
||
Make sure to include the URL of the server in `crik`'s configuration mounted to your `Pod`. | ||
|
||
```yaml | ||
# Assuming the chart is deployed to default namespace. | ||
kind: ConfigMap | ||
metadata: | ||
name: crik-simple-loop | ||
data: | ||
config.yaml: |- | ||
imageDir: /etc/checkpoint | ||
nodeStateServerURL: http://crik-node-state-server.default.svc.cluster.local:9376 | ||
``` | ||
|
||
`crik` will hit the `/node-state` endpoint of the server to get the state of the node it's running on when it receives | ||
SIGTERM and take checkpoint only if it returns `shutting-down` as the node's state. However, it needs to provide the | ||
node name to the server so make sure to add the following environment variable to your container spec in your `Pod`: | ||
```yaml | ||
env: | ||
- name: KUBERNETES_NODE_NAME | ||
valueFrom: | ||
fieldRef: | ||
fieldPath: spec.nodeName | ||
``` | ||
|
||
## Developing | ||
|
||
Build `crik`: | ||
|
||
```bash | ||
docker build -t crik:v1 -f cmd/crik/Dockerfile . | ||
``` | ||
|
||
## License | ||
|
||
This project is licensed under the Apache License, Version 2.0 - see the [LICENSE](LICENSE) file for details. |
Oops, something went wrong.