diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a6058c9d9f794..806e324db6729 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -36,6 +36,7 @@ expanding into more specifics. 1. [Tips and Tricks](#tips-and-tricks) 1. [Benchmarking](#benchmarking) 1. [Profiling](#profiling) + 1. [Kubernetes](#kubernetes) 1. [Humans](#humans) 1. [Documentation](#documentation) 1. [Changelog](#changelog) @@ -547,6 +548,85 @@ cat stacks.folded | inferno-flamegraph > flamegraph.svg And that's it! You now have a flamegraph SVG file that can be opened and navigated in your favorite web browser. +### Kubernetes + +There is a special flow for when you develop portions of Vector that are +designed to work with Kubernetes, like `kubernetes_logs` source or the +`deployment/kubernetes/*.yaml` configs. + +This flow facilitates building Vector and deploying it into a cluster. + +#### Requirements + +There are some extra requirements besides what you'd normally need to work on +Vector: + +* `linux` system (create an issue if you want to work with another OS and we'll + help); +* [`skaffold`](https://skaffold.dev/) +* [`docker`](https://www.docker.com/) +* [`kubectl`](https://kubernetes.io/docs/tasks/tools/install-kubectl/) +* [`kustomize`](https://kustomize.io/) +* [`minikube`](https://minikube.sigs.k8s.io/)-powered or other k8s cluster +* [`cargo watch`](https://github.com/passcod/cargo-watch) + +#### The dev flow + +Once you have the requirements, use the `scripts/skaffold.sh dev` command. + +That's it, just one command should take care of everything! + +It will + +1. build the `vector` binary in development mode, +2. build a docker image from this binary via `skaffold/docker/Dockerfile`, +3. deploy `vector` into the Kubernetes cluster at your current kubectl context + using the built docker image and a mix of our production deployment + configuration from the `distribution/kubernetes/*.yaml` and the special + dev-flow configuration at `skaffold/manifests/*.yaml`; see + `kustomization.yaml` for the exact specification. + +As the result of invoking the `scripts/skaffold.sh dev`, you should see +a `skaffold` process running on your local machine, printing the logs from the +deployed `vector` instance. + +To stop the process, press `Ctrl+C`, and wait for `skaffold` to clean up +the cluster state and exit. + +`scripts/skaffold.sh` wraps `skaffold`, you can use other `skaffold` subcommands +if it fits you better. + +#### Troubleshooting + +You might need to tweak `skaffold`, here are some hints: + +* `skaffold` will try to detect whether a local cluster is used; if a local + cluster is used, `skaffold` won't push the docker images it builds to a + registry. + See [this page](https://skaffold.dev/docs/environment/local-cluster/) + for how you can troubleshoot and tweak this behavior. + +* `skaffold` can rewrite the image name so that you don't try to push a docker + image to a repo that you don't have access to. + See [this page](https://skaffold.dev/docs/environment/image-registries/) + for more info. + +* For the rest of the `skaffold` tweaks you might want to apply check out + [this page](https://skaffold.dev/docs/environment/). + +#### Going through the dev flow manually + +Is some cases `skaffold` may not work. It's possible to go through the dev flow +manually, without `skaffold`. + +One of the important thing `skaffold` does is it patches the configuration to +tie things together. If you want to go without it, you'll have to take care of +that yourself, thus some additional knowledge of Kubernetes inner workings is +required. + +Essentially, the steps you have to take to deploy manually are the same that +`skaffold` will perform, and they're outlined at the previous section. + ## Humans After making your change, you'll want to prepare it for Vector's users diff --git a/Cargo.lock b/Cargo.lock index 8b3d960dfdf50..2a4a9c3063511 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,10 +1,10 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. [[package]] -name = "adler" -version = "0.2.3" +name = "adler32" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee2a4ec343196209d6594e19543ae87a39f96d5534d7174822a3ad825dd6ed7e" +checksum = "5d2e7343e7fc9de883d1b0341e0b13970f764c14101234857d2ddafa1cb1cac2" [[package]] name = "ahash" @@ -1108,6 +1108,18 @@ dependencies = [ "smallvec 1.2.0", ] +[[package]] +name = "evmap" +version = "10.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e3ea06a83f97d3dc2eb06e51e7a729b418f0717a5558a5c870e3d5156dc558d" +dependencies = [ + "bytes 0.5.4", + "hashbag", + "slab", + "smallvec 1.2.0", +] + [[package]] name = "exitcode" version = "1.1.2" @@ -1181,9 +1193,9 @@ checksum = "37ab347416e802de484e4d03c7316c48f1ecb56574dfd4a46a80f173ce1de04d" [[package]] name = "flate2" -version = "1.0.16" +version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68c90b0fc46cf89d227cc78b40e494ff81287a92dd07631e5af0d06fe3cf885e" +checksum = "6bd6d6f4752952feb71363cffc9ebac9411b75b87c6ab6058c40c8900cf43c0f" dependencies = [ "cfg-if", "crc32fast", @@ -1494,6 +1506,12 @@ dependencies = [ "tokio-util", ] +[[package]] +name = "hashbag" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "452b31b3ffe7cf13de531eefae493301c85995a953c808d7c79a8f45abae0706" + [[package]] name = "hashbrown" version = "0.6.3" @@ -1770,9 +1788,9 @@ dependencies = [ [[package]] name = "inventory" -version = "0.1.7" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "621b50c176968fd3b0bd71f821a28a0ea98db2b5aea966b2fbb8bd1b7d310328" +checksum = "2bf98296081bd2cb540acc09ef9c97f22b7e487841520350293605db1b2c7a27" dependencies = [ "ctor", "ghost", @@ -1781,9 +1799,9 @@ dependencies = [ [[package]] name = "inventory-impl" -version = "0.1.7" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f99a4111304bade76468d05beab3487c226e4fe4c4de1c4e8f006e815762db73" +checksum = "0a8e30575afe28eea36a9a39136b70b2fb6b0dd0a212a5bd1f30a498395c0274" dependencies = [ "proc-macro2 1.0.18", "quote 1.0.2", @@ -1862,6 +1880,23 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "k8s-openapi" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57f95fd36c08ce592e67400a0f1a66f432196997d5a7e9a97e8743c33d8a9312" +dependencies = [ + "base64 0.12.0", + "bytes 0.5.4", + "chrono", + "http 0.2.1", + "percent-encoding 2.1.0", + "serde", + "serde-value", + "serde_json", + "url 2.1.1", +] + [[package]] name = "kernel32-sys" version = "0.2.2" @@ -2440,11 +2475,11 @@ dependencies = [ [[package]] name = "miniz_oxide" -version = "0.4.0" +version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be0f75932c1f6cfae3c04000e40114adf955636e19040f9c0a2c380702aa1c7f" +checksum = "6f3f74f726ae935c3f514300cc6773a0c9492abc5e972d42ba0c0ebb88757625" dependencies = [ - "adler", + "adler32", ] [[package]] @@ -2798,6 +2833,15 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "ordered-float" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fe9037165d7023b1228bc4ae9a2fa1a2b0095eca6c2998c624723dfd01314a5" +dependencies = [ + "num-traits", +] + [[package]] name = "os_pipe" version = "0.9.2" @@ -3960,6 +4004,16 @@ dependencies = [ "serde_derive", ] +[[package]] +name = "serde-value" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3a1a3341211875ef120e117ea7fd5228530ae7e7036a779fdc9117be6b3282c" +dependencies = [ + "ordered-float", + "serde", +] + [[package]] name = "serde_derive" version = "1.0.114" @@ -4873,9 +4927,9 @@ dependencies = [ [[package]] name = "tokio-uds" -version = "0.2.7" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab57a4ac4111c8c9dbcf70779f6fc8bc35ae4b2454809febac840ad19bd7e4e0" +checksum = "037ffc3ba0e12a0ab4aca92e5234e0dedeb48fddf6ccd260f1f150a36a9f2445" dependencies = [ "bytes 0.4.12", "futures 0.1.29", @@ -5449,7 +5503,8 @@ dependencies = [ "derive_is_enum_variant", "dirs", "elastic_responses", - "evmap", + "evmap 10.0.2", + "evmap 7.1.3", "exitcode", "file-source", "flate2", @@ -5467,6 +5522,7 @@ dependencies = [ "indexmap", "inventory", "jemallocator", + "k8s-openapi", "lazy_static 1.4.0", "leveldb", "libc", diff --git a/Cargo.toml b/Cargo.toml index 7169b41d897da..daf1097b26bf6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -142,6 +142,7 @@ strip-ansi-escapes = { version = "0.1.0"} colored = "1.9" warp = { package = "warp", version = "0.2", default-features = false, optional = true } evmap = { version = "7", features = ["bytes"], optional = true } +evmap10 = { package = "evmap", version = "10", features = ["bytes"], optional = true } logfmt = { version = "0.0.2", optional = true } notify = "4.0.14" once_cell = "1.3" @@ -152,6 +153,7 @@ pulsar = { version = "1.0.0", default-features = false, features = ["tokio-runti task-compat = "0.1" cidr-utils = "0.4.2" pin-project = "0.4.22" +k8s-openapi = { version = "0.9", features = ["v1_15"], optional = true } # For WASM vector-wasm = { path = "lib/vector-wasm", optional = true } @@ -228,6 +230,10 @@ leveldb-cmake = ["leveldb", "leveldb/leveldb-sys-3"] wasm = ["lucetc", "lucet-runtime", "lucet-wasi", "vector-wasm", "anyhow"] wasm-timings = ["wasm"] +# Enables kubernetes dependencies and shared code. Kubernetes-related sources, +# transforms and sinks should depend on this feature. +kubernetes = ["k8s-openapi", "evmap10"] + # Sources sources = [ "sources-docker", @@ -246,6 +252,7 @@ sources = [ "sources-syslog", "sources-tls", "sources-vector", + "sources-kubernetes-logs", ] sources-docker = ["bollard"] sources-file = ["bytesize"] @@ -263,6 +270,7 @@ sources-stdin = ["bytesize"] sources-syslog = ["sources-socket", "syslog_loose"] sources-tls = ["sources-http", "sources-logplex", "sources-socket", "sources-splunk_hec"] sources-vector = ["sources-socket"] +sources-kubernetes-logs = ["kubernetes", "transforms-merge", "transforms-json_parser", "transforms-regex_parser"] # Transforms transforms = [ @@ -427,6 +435,7 @@ kafka-integration-tests = ["sources-kafka", "sinks-kafka"] loki-integration-tests = ["sinks-loki"] pulsar-integration-tests = ["sinks-pulsar"] splunk-integration-tests = ["sinks-splunk_hec", "warp"] +kubernetes-integration-tests = ["sources-kubernetes-logs"] shutdown-tests = ["sources","sinks-console","sinks-prometheus","sinks-blackhole","unix","rdkafka","transforms-log_to_metric","transforms-lua"] disable-resolv-conf = [] diff --git a/distribution/kubernetes/vector-namespaced.yaml b/distribution/kubernetes/vector-namespaced.yaml index dec678f2bb6a9..6e52aaba2ff7f 100644 --- a/distribution/kubernetes/vector-namespaced.yaml +++ b/distribution/kubernetes/vector-namespaced.yaml @@ -7,12 +7,12 @@ data: # Configuration for vector. # Docs: https://vector.dev/docs/ - # Configure the controlled by the deployment. + # Data dir is location controlled at the `DaemonSet`. data_dir = "/vector-data-dir" # Ingest logs from Kubernetes. - [sources.kubernetes] - type = "kubernetes" + [sources.kubernetes_logs] + type = "kubernetes_logs" --- apiVersion: apps/v1 kind: DaemonSet @@ -28,11 +28,11 @@ spec: metadata: labels: name: vector + vector.dev/exclude: "true" spec: containers: - name: vector image: timberio/vector:latest-alpine - imagePullPolicy: Always args: - --config - /etc/vector/*.toml @@ -61,6 +61,7 @@ spec: - name: config-dir mountPath: /etc/vector/ readOnly: true + terminationGracePeriodSeconds: 60 tolerations: # This toleration is to have the daemonset runnable on master nodes. # Remove it if your masters can't run pods. diff --git a/kustomization.yaml b/kustomization.yaml new file mode 100644 index 0000000000000..f8145e36474c6 --- /dev/null +++ b/kustomization.yaml @@ -0,0 +1,10 @@ +# This is a part of our skaffold setup for development. +# Do not use in production. + +namespace: vector + +resources: + - distribution/kubernetes/vector-global.yaml + - skaffold/manifests/namespace.yaml + - skaffold/manifests/config.yaml + - distribution/kubernetes/vector-namespaced.yaml diff --git a/scripts/copy-docker-image-to-minikube.sh b/scripts/copy-docker-image-to-minikube.sh index 4d86f5e9d790e..0efe834b69e24 100755 --- a/scripts/copy-docker-image-to-minikube.sh +++ b/scripts/copy-docker-image-to-minikube.sh @@ -27,7 +27,8 @@ docker save "${IMAGES[@]}" | gzip >"$IMAGES_ARCHIVE" # Start a subshell to preserve the env state. ( # Switch to minikube docker. - eval "$(minikube --shell bash docker-env)" + # shellcheck source=minikube-docker-env.sh disable=SC1091 + . scripts/minikube-docker-env.sh # Load images. docker load -i "$IMAGES_ARCHIVE" diff --git a/scripts/minikube-docker-env.sh b/scripts/minikube-docker-env.sh new file mode 100644 index 0000000000000..fc2ffdd8ab9f0 --- /dev/null +++ b/scripts/minikube-docker-env.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +set -euo pipefail + +if ! COMMANDS="$(minikube --shell bash docker-env)"; then + echo "Unable to obtain docker env from minikube; is minikube started?" >&2 + exit 7 +fi + +eval "$COMMANDS" diff --git a/scripts/skaffold.sh b/scripts/skaffold.sh new file mode 100755 index 0000000000000..dd16c70603a7d --- /dev/null +++ b/scripts/skaffold.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +set -euo pipefail +cd "$(dirname "${BASH_SOURCE[0]}")/.." + +# Inital vector build to ensure we start at a valid state. +cargo build + +# Prepare .dockerignore so we don't send the whole dir to the docker as the +# context. +cat <target/debug/.dockerignore +**/* +!vector +EOF + +# Watch for changes in he background and rebuild the vector binary. +cargo watch -x build & + +# Kill all child processes of this bash instance. +trap 'kill -- "-$$"; exit 0' EXIT + +export SKAFFOLD_CACHE_ARTIFACTS=false +skaffold "$@" diff --git a/scripts/test-integration-kubernetes.sh b/scripts/test-integration-kubernetes.sh index d62e957b4d583..329154d95fdec 100755 --- a/scripts/test-integration-kubernetes.sh +++ b/scripts/test-integration-kubernetes.sh @@ -71,8 +71,5 @@ fi export KUBE_TEST_DEPLOY_COMMAND="scripts/deploy-kubernetes-test.sh" -# TODO: enable kubernetes tests when they're implemented -exit 0 # disable the test and make them pass - # Run the tests. cargo test --no-default-features --features kubernetes-integration-tests diff --git a/skaffold.yaml b/skaffold.yaml new file mode 100644 index 0000000000000..21fba4f685709 --- /dev/null +++ b/skaffold.yaml @@ -0,0 +1,15 @@ +apiVersion: skaffold/v2beta3 +kind: Config +metadata: + name: vector +build: + local: + useDockerCLI: true + push: false + artifacts: + - image: timberio/vector + context: target/debug + docker: + dockerfile: ../../skaffold/docker/Dockerfile +deploy: + kustomize: {} diff --git a/skaffold/docker/Dockerfile b/skaffold/docker/Dockerfile new file mode 100644 index 0000000000000..87947b2a8bcc6 --- /dev/null +++ b/skaffold/docker/Dockerfile @@ -0,0 +1,23 @@ +FROM debian:bullseye-slim + +RUN apt-get update \ + && apt-get install -y \ + ca-certificates \ + tzdata \ + patchelf \ + && rm -rf /var/lib/apt/lists/* + +ENTRYPOINT ["/usr/bin/vector"] +STOPSIGNAL SIGTERM + +ARG LOG=debug,vector=trace +ENV LOG="$LOG" + +COPY vector /usr/bin/vector + +RUN set -x \ + && ls -la /usr/bin/vector \ + && ldd /usr/bin/vector \ + && patchelf --set-interpreter /lib64/ld-linux-x86-64.so.2 /usr/bin/vector \ + && ldd /usr/bin/vector \ + && /usr/bin/vector --version diff --git a/skaffold/manifests/config.yaml b/skaffold/manifests/config.yaml new file mode 100644 index 0000000000000..e34be3fa588e0 --- /dev/null +++ b/skaffold/manifests/config.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: vector-config +data: + vector.toml: | + [sources.internal_metrics] + type = "internal_metrics" + + [sinks.stdout] + type = "console" + inputs = ["kubernetes_logs", "internal_metrics"] + target = "stdout" + encoding = "json" diff --git a/skaffold/manifests/namespace.yaml b/skaffold/manifests/namespace.yaml new file mode 100644 index 0000000000000..2b7563f84a801 --- /dev/null +++ b/skaffold/manifests/namespace.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: vector diff --git a/src/event/mod.rs b/src/event/mod.rs index 5d00940ca1d8a..1fbcbed860292 100644 --- a/src/event/mod.rs +++ b/src/event/mod.rs @@ -26,8 +26,10 @@ pub mod proto { pub static LOG_SCHEMA: OnceCell = OnceCell::new(); +pub const PARTIAL_STR: &str = "_partial"; // TODO: clean up the _STR suffix after we get rid of atoms + lazy_static! { - pub static ref PARTIAL: Atom = Atom::from("_partial"); + pub static ref PARTIAL: Atom = Atom::from(PARTIAL_STR); static ref LOG_SCHEMA_DEFAULT: LogSchema = LogSchema { message_key: Atom::from("message"), timestamp_key: Atom::from("timestamp"), diff --git a/src/internal_events/kubernetes/api_watcher.rs b/src/internal_events/kubernetes/api_watcher.rs new file mode 100644 index 0000000000000..cabfa35d19f01 --- /dev/null +++ b/src/internal_events/kubernetes/api_watcher.rs @@ -0,0 +1,24 @@ +use super::InternalEvent; +use std::fmt::Debug; + +#[derive(Debug)] +pub struct RequestPrepared { + pub request: R, +} + +impl InternalEvent for RequestPrepared { + fn emit_logs(&self) { + trace!(message = "request prepared", request = ?self.request); + } +} + +#[derive(Debug)] +pub struct ResponseReceived { + pub response: R, +} + +impl InternalEvent for ResponseReceived { + fn emit_logs(&self) { + trace!(message = "got response", response = ?self.response); + } +} diff --git a/src/internal_events/kubernetes/instrumenting_state.rs b/src/internal_events/kubernetes/instrumenting_state.rs new file mode 100644 index 0000000000000..9c256336dbcb1 --- /dev/null +++ b/src/internal_events/kubernetes/instrumenting_state.rs @@ -0,0 +1,57 @@ +use super::InternalEvent; +use metrics::counter; +use std::fmt::Debug; + +#[derive(Debug)] +pub struct StateItemAdded; + +#[derive(Debug)] +pub struct StateItemUpdated; + +#[derive(Debug)] +pub struct StateItemDeleted; + +#[derive(Debug)] +pub struct StateResynced; + +#[derive(Debug)] +pub struct StateMaintenanceRequested; + +#[derive(Debug)] +pub struct StateMaintenancePerformed; + +impl InternalEvent for StateItemAdded { + fn emit_metrics(&self) { + counter!("k8s_state_ops", 1, "op_kind" => "item_added"); + } +} + +impl InternalEvent for StateItemUpdated { + fn emit_metrics(&self) { + counter!("k8s_state_ops", 1, "op_kind" => "item_updated"); + } +} + +impl InternalEvent for StateItemDeleted { + fn emit_metrics(&self) { + counter!("k8s_state_ops", 1, "op_kind" => "item_deleted"); + } +} + +impl InternalEvent for StateResynced { + fn emit_metrics(&self) { + counter!("k8s_state_ops", 1, "op_kind" => "resynced"); + } +} + +impl InternalEvent for StateMaintenanceRequested { + fn emit_metrics(&self) { + counter!("k8s_state_ops", 1, "op_kind" => "maintenance_requested"); + } +} + +impl InternalEvent for StateMaintenancePerformed { + fn emit_metrics(&self) { + counter!("k8s_state_ops", 1, "op_kind" => "maintenance_performed"); + } +} diff --git a/src/internal_events/kubernetes/instrumenting_watcher.rs b/src/internal_events/kubernetes/instrumenting_watcher.rs new file mode 100644 index 0000000000000..599fa6d106d86 --- /dev/null +++ b/src/internal_events/kubernetes/instrumenting_watcher.rs @@ -0,0 +1,51 @@ +use super::InternalEvent; +use metrics::counter; +use std::fmt::Debug; + +#[derive(Debug)] +pub struct WatchRequestInvoked; + +impl InternalEvent for WatchRequestInvoked { + fn emit_metrics(&self) { + counter!("k8s_watch_requests_invoked", 1); + } +} + +#[derive(Debug)] +pub struct WatchRequestInvocationFailed { + pub error: E, +} + +impl InternalEvent for WatchRequestInvocationFailed { + fn emit_logs(&self) { + error!(message = "watch invocation failed", error = ?self.error, rate_limit_secs = 5); + } + + fn emit_metrics(&self) { + counter!("k8s_watch_requests_failed", 1); + } +} + +#[derive(Debug)] +pub struct WatchStreamItemObtained; + +impl InternalEvent for WatchStreamItemObtained { + fn emit_metrics(&self) { + counter!("k8s_watch_stream_items_obtained", 1); + } +} + +#[derive(Debug)] +pub struct WatchStreamErrored { + pub error: E, +} + +impl InternalEvent for WatchStreamErrored { + fn emit_logs(&self) { + error!(message = "watch stream errored", error = ?self.error, rate_limit_secs = 5); + } + + fn emit_metrics(&self) { + counter!("k8s_watch_stream_errors", 1); + } +} diff --git a/src/internal_events/kubernetes/mod.rs b/src/internal_events/kubernetes/mod.rs new file mode 100644 index 0000000000000..f5caf3340ecb9 --- /dev/null +++ b/src/internal_events/kubernetes/mod.rs @@ -0,0 +1,9 @@ +#![cfg(feature = "kubernetes")] + +use super::InternalEvent; + +pub mod api_watcher; +pub mod instrumenting_state; +pub mod instrumenting_watcher; +pub mod reflector; +pub mod stream; diff --git a/src/internal_events/kubernetes/reflector.rs b/src/internal_events/kubernetes/reflector.rs new file mode 100644 index 0000000000000..4090aab0a475f --- /dev/null +++ b/src/internal_events/kubernetes/reflector.rs @@ -0,0 +1,19 @@ +use super::InternalEvent; +use metrics::counter; + +/// Emitted when reflector gets a desync from the watch command. +#[derive(Debug)] +pub struct DesyncReceived { + /// The underlying error. + pub error: E, +} + +impl InternalEvent for DesyncReceived { + fn emit_logs(&self) { + warn!(message = "handling desync", error = ?self.error); + } + + fn emit_metrics(&self) { + counter!("k8s_reflector_desyncs", 1); + } +} diff --git a/src/internal_events/kubernetes/stream.rs b/src/internal_events/kubernetes/stream.rs new file mode 100644 index 0000000000000..964d553eabcb3 --- /dev/null +++ b/src/internal_events/kubernetes/stream.rs @@ -0,0 +1,14 @@ +use super::InternalEvent; +use metrics::counter; + +#[derive(Debug)] +pub struct ChunkProcessed { + pub byte_size: usize, +} + +impl InternalEvent for ChunkProcessed { + fn emit_metrics(&self) { + counter!("k8s_stream_chunks_processed", 1); + counter!("k8s_stream_bytes_processed", self.byte_size as u64); + } +} diff --git a/src/internal_events/kubernetes_logs.rs b/src/internal_events/kubernetes_logs.rs new file mode 100644 index 0000000000000..4add1ea95e218 --- /dev/null +++ b/src/internal_events/kubernetes_logs.rs @@ -0,0 +1,53 @@ +use super::InternalEvent; +use crate::Event; +use metrics::counter; + +#[derive(Debug)] +pub struct KubernetesLogsEventReceived<'a> { + pub file: &'a str, + pub byte_size: usize, +} + +impl InternalEvent for KubernetesLogsEventReceived<'_> { + fn emit_logs(&self) { + trace!( + message = "received one event", + file = %self.file + ); + } + + fn emit_metrics(&self) { + counter!( + "events_processed", 1, + "component_kind" => "source", + "component_type" => "kubernetes_logs", + ); + counter!( + "bytes_processed", self.byte_size as u64, + "component_kind" => "source", + "component_type" => "kubernetes_logs", + ); + } +} + +#[derive(Debug)] +pub struct KubernetesLogsEventAnnotationFailed<'a> { + pub event: &'a Event, +} + +impl InternalEvent for KubernetesLogsEventAnnotationFailed<'_> { + fn emit_logs(&self) { + warn!( + message = "failed to annotate event with pod metadata", + event = ?self.event + ); + } + + fn emit_metrics(&self) { + counter!( + "k8s_event_annotation_failures", 1, + "component_kind" => "source", + "component_type" => "kubernetes_logs", + ); + } +} diff --git a/src/internal_events/mod.rs b/src/internal_events/mod.rs index 0bf12382ce537..f44edb4857b2b 100644 --- a/src/internal_events/mod.rs +++ b/src/internal_events/mod.rs @@ -4,6 +4,8 @@ mod blackhole; mod elasticsearch; mod file; mod json; +#[cfg(feature = "sources-kubernetes-logs")] +mod kubernetes_logs; #[cfg(feature = "transforms-lua")] mod lua; #[cfg(feature = "sources-prometheus")] @@ -18,12 +20,16 @@ mod vector; #[cfg(feature = "wasm")] mod wasm; +pub mod kubernetes; + pub use self::add_fields::*; pub use self::aws_kinesis_streams::*; pub use self::blackhole::*; pub use self::elasticsearch::*; pub use self::file::*; pub use self::json::*; +#[cfg(feature = "sources-kubernetes-logs")] +pub use self::kubernetes_logs::*; #[cfg(feature = "transforms-lua")] pub use self::lua::*; #[cfg(feature = "sources-prometheus")] diff --git a/src/kubernetes/api_watcher.rs b/src/kubernetes/api_watcher.rs new file mode 100644 index 0000000000000..729b5d8ad2cc2 --- /dev/null +++ b/src/kubernetes/api_watcher.rs @@ -0,0 +1,154 @@ +//! A watcher based on the k8s API. + +use super::{ + client::Client, + stream as k8s_stream, + watch_request_builder::WatchRequestBuilder, + watcher::{self, Watcher}, +}; +use crate::internal_events::kubernetes::api_watcher as internal_events; +use futures::{ + future::BoxFuture, + stream::{BoxStream, Stream}, +}; +use http::StatusCode; +use hyper::Error as BodyError; +use k8s_openapi::{WatchOptional, WatchResponse}; +use snafu::{ResultExt, Snafu}; + +/// A simple watcher atop of the Kubernetes API [`Client`]. +pub struct ApiWatcher +where + B: 'static, +{ + client: Client, + request_builder: B, +} + +impl ApiWatcher +where + B: 'static, +{ + /// Create a new [`ApiWatcher`]. + pub fn new(client: Client, request_builder: B) -> Self { + Self { + client, + request_builder, + } + } +} + +impl ApiWatcher +where + B: 'static + WatchRequestBuilder, + ::Object: Send + Unpin, +{ + async fn invoke( + &mut self, + watch_optional: WatchOptional<'_>, + ) -> Result< + impl Stream< + Item = Result< + WatchResponse<::Object>, + k8s_stream::Error, + >, + > + 'static, + watcher::invocation::Error, + > { + // Prepare request. + let request = self + .request_builder + .build(watch_optional) + .context(invocation::RequestPreparation)?; + emit!(internal_events::RequestPrepared { request: &request }); + + // Send request, get response. + let response = self + .client + .send(request) + .await + .context(invocation::Request)?; + emit!(internal_events::ResponseReceived { + response: &response + }); + + // Handle response status code. + let status = response.status(); + if status != StatusCode::OK { + let source = invocation::Error::BadStatus { status }; + let err = if status == StatusCode::GONE { + watcher::invocation::Error::desync(source) + } else { + watcher::invocation::Error::other(source) + }; + return Err(err); + } + + // Stream response body. + let body = response.into_body(); + Ok(k8s_stream::body(body)) + } +} + +impl Watcher for ApiWatcher +where + B: 'static + WatchRequestBuilder + Send, + ::Object: Send + Unpin, +{ + type Object = ::Object; + + type InvocationError = invocation::Error; + + type StreamError = k8s_stream::Error; + type Stream = BoxStream<'static, Result, Self::StreamError>>; + + fn watch<'a>( + &'a mut self, + watch_optional: WatchOptional<'a>, + ) -> BoxFuture<'a, Result>> + { + Box::pin(async move { + self.invoke(watch_optional) + .await + .map(Box::pin) + .map(|stream| stream as BoxStream<_>) + }) + } +} + +pub mod invocation { + //! Invocation error. + use super::*; + + /// Errors that can occur while watching. + #[derive(Debug, Snafu)] + #[snafu(visibility(pub))] + pub enum Error { + /// Returned when the call-specific request builder fails. + #[snafu(display("failed to prepare an HTTP request"))] + RequestPreparation { + /// The underlying error. + source: k8s_openapi::RequestError, + }, + + /// Returned when the HTTP client fails to perform an HTTP request. + #[snafu(display("error during the HTTP request"))] + Request { + /// The error that API client retunred. + source: crate::Error, + }, + + /// Returned when the HTTP response has a bad status. + #[snafu(display("HTTP response has a bad status: {}", status))] + BadStatus { + /// The status from the HTTP response. + status: StatusCode, + }, + } + + impl From for watcher::invocation::Error { + fn from(source: Error) -> Self { + watcher::invocation::Error::other(source) + } + } +} diff --git a/src/kubernetes/client/config/in_cluster.rs b/src/kubernetes/client/config/in_cluster.rs new file mode 100644 index 0000000000000..cb2b4f956eacb --- /dev/null +++ b/src/kubernetes/client/config/in_cluster.rs @@ -0,0 +1,109 @@ +//! Everything related to building in-cluster configuration. + +use super::Config; +use crate::tls::TlsOptions; +use http::Uri; +use snafu::{ResultExt, Snafu}; + +impl Config { + /// Prepares a config suitable for use when running in k8s cluster. + pub fn in_cluster() -> Result { + let host = std::env::var("KUBERNETES_SERVICE_HOST").context(NotInCluster { + missing: "KUBERNETES_SERVICE_HOST", + })?; + let port = std::env::var("KUBERNETES_SERVICE_PORT").context(NotInCluster { + missing: "KUBERNETES_SERVICE_PORT", + })?; + + let base = Uri::builder() + .scheme("https") + .authority(join_host_port(host.as_str(), port.as_str()).as_str()) + .path_and_query("/") + .build() + .context(InvalidUrl)?; + + let token_file = "/var/run/secrets/kubernetes.io/serviceaccount/token"; + let root_ca_file = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"; + + let token = std::fs::read_to_string(token_file).context(Token)?; + + let mut tls_options = TlsOptions::default(); + tls_options.ca_file = Some(root_ca_file.into()); + + Ok(Self { + base, + token, + tls_options, + }) + } +} + +/// An error returned when building an in-cluster configuration. +#[derive(Debug, Snafu)] +pub enum Error { + /// The in-cluster configuration requested while executing not in a cluster + /// environment. + #[snafu(display("unable to load in-cluster configuration, KUBERNETES_SERVICE_HOST and KUBERNETES_SERVICE_PORT must be defined"))] + NotInCluster { + /// The underlying error. + source: std::env::VarError, + + /// The field that's missing. + missing: &'static str, + }, + + /// The token file could not be read successfully. + #[snafu(display("unable to read the token file"))] + Token { + /// The underlying error. + source: std::io::Error, + }, + + /// The configuration resulted in an invalid URL. + #[snafu(display("unable to construct a proper API server URL"))] + InvalidUrl { + /// The underlying error. + source: http::Error, + }, +} + +/// This function implements the exact same logic that Go's `net.JoinHostPort` +/// has. +/// Rust doesn't have anything like this out of the box, yet the reference +/// kubernetes client in-cluster config implementation uses it: +/// https://github.com/kubernetes/client-go/blob/3d5c80942cce510064da1ab62c579e190a0230fd/rest/config.go#L484 +/// +/// To avoid needlessly complicating the logic here, we simply implement the +/// `net.JoinHostPort` as it is in Go: https://golang.org/pkg/net/#JoinHostPort +fn join_host_port(host: &str, port: &str) -> String { + if host.contains(':') { + // If IPv6 address is used, use a special notation. + return format!("[{}]:{}", host, port); + } + // Use traditional notation for domain names and IPv4 addresses. + format!("{}:{}", host, port) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_join_host_port() { + // IPv4 + assert_eq!(join_host_port("0.0.0.0", "1234"), "0.0.0.0:1234"); + assert_eq!(join_host_port("127.0.0.1", "443"), "127.0.0.1:443"); + // IPv6 + assert_eq!(join_host_port("::", "1234"), "[::]:1234"); + assert_eq!( + join_host_port("2001:0db8:0000:0000:0000:8a2e:0370:7334", "1234"), + "[2001:0db8:0000:0000:0000:8a2e:0370:7334]:1234" + ); + assert_eq!( + join_host_port("2001:db8::8a2e:370:7334", "1234"), + "[2001:db8::8a2e:370:7334]:1234" + ); + // DNS + assert_eq!(join_host_port("example.com", "1234"), "example.com:1234"); + } +} diff --git a/src/kubernetes/client/config/mod.rs b/src/kubernetes/client/config/mod.rs new file mode 100644 index 0000000000000..96018d2b65ed9 --- /dev/null +++ b/src/kubernetes/client/config/mod.rs @@ -0,0 +1,27 @@ +//! Client configuration. + +use crate::tls::TlsOptions; +use http::Uri; + +pub mod in_cluster; + +/// A k8s client configuration. +/// +/// This type is designed to hold all possible variants of the configuration. +/// It also abstracts the client from the various ways to obtain the +/// configuration. +/// +/// The implementation is fairly limited, and only covers the use cases we +/// support. +#[derive(Debug, Clone)] +pub struct Config { + /// The base URL to use when constructing HTTP requests to the k8s API + /// server. + pub base: Uri, + + /// The bearer token to use at the `Authorization` header. + pub token: String, + + /// The TLS configuration parameters to use at the HTTP client. + pub tls_options: TlsOptions, +} diff --git a/src/kubernetes/client/mod.rs b/src/kubernetes/client/mod.rs new file mode 100644 index 0000000000000..a46948dfaa35c --- /dev/null +++ b/src/kubernetes/client/mod.rs @@ -0,0 +1,106 @@ +//! A Kubernetes API client built using Vector interfaces to the system +//! resources as building blocks. +//! +//! Here are a few pointers to the resources that were used as an inspiration +//! for this mod: +//! +//! - https://github.com/kubernetes/client-go/blob/master/tools/clientcmd/api/types.go +//! +//! A part of the official Kubernetes client library (in Go) that contains +//! the structure for KUBECONFIG files. Used for reference on naming things. +//! +//! - https://github.com/kubernetes/apimachinery/blob/master/pkg/watch/watch.go +//! +//! The reference design of the watchers composition and interfaces that's +//! known to work. +//! +//! - https://github.com/kubernetes/client-go/blob/master/rest/config.go +//! +//! The reference implementation on preparing the in-cluster config. +//! + +use crate::{dns::Resolver, sinks::util::http::HttpClient, tls::TlsSettings}; +use http::{ + header::{self, HeaderValue}, + uri, Request, Response, Uri, +}; +use hyper::body::Body; + +pub mod config; + +use config::Config; + +/// A client to the k8s API. +/// +/// Wraps our in-house [`HttpClient`]. +#[derive(Debug, Clone)] +pub struct Client { + inner: HttpClient, + uri_scheme: uri::Scheme, + uri_authority: uri::Authority, + auth_header: HeaderValue, +} + +impl Client { + /// Create a new [`Client`]. + /// + /// Takes the common kubernetes API cluster configuration [`Config`] and + /// a [`Resolver`] that is generally not the part of the config, but is + /// specific to our [`HttpClient`] implementation. + /// + /// Consumes the configuration to populate the internal state. + /// Retunrs an error if the configuratiton is not valid. + // TODO: add a proper error type. + pub fn new(config: Config, resolver: Resolver) -> crate::Result { + let Config { + base, + tls_options, + token, + } = config; + + let tls_settings = TlsSettings::from_options(&Some(tls_options))?; + let inner = HttpClient::new(resolver, tls_settings)?; + + let uri::Parts { + scheme, authority, .. + } = base.into_parts(); + + let uri_scheme = scheme.ok_or_else(|| "no scheme")?; + let uri_authority = authority.ok_or_else(|| "no authority")?; + + let auth_header = format!("Bearer {}", token); + let auth_header = HeaderValue::from_str(auth_header.as_str())?; + + Ok(Self { + inner, + uri_scheme, + uri_authority, + auth_header, + }) + } + + /// Alters a request according to the client configuraion and sends it. + pub async fn send>(&mut self, req: Request) -> crate::Result> { + let req = self.prepare_request(req); + self.inner.send(req).await + } + + fn prepare_request>(&self, req: Request) -> Request { + let (mut parts, body) = req.into_parts(); + let body = body.into(); + + parts.uri = self.adjust_uri(parts.uri); + parts + .headers + .insert(header::AUTHORIZATION, self.auth_header.clone()); + + Request::from_parts(parts, body) + } + + fn adjust_uri(&self, uri: Uri) -> Uri { + let mut parts = uri.into_parts(); + parts.scheme = Some(self.uri_scheme.clone()); + parts.authority = Some(self.uri_authority.clone()); + Uri::from_parts(parts).unwrap() + } +} diff --git a/src/kubernetes/debounce.rs b/src/kubernetes/debounce.rs new file mode 100644 index 0000000000000..0af7ac1cbcbf0 --- /dev/null +++ b/src/kubernetes/debounce.rs @@ -0,0 +1,243 @@ +//! Arbitrary signal debouncing logic. +//! +//! Call [`Debounce::signal`] multiple times within the debounce time window, +//! and the [`Debounce::debounced`] will be resolved only once. + +use std::time::Duration; +use tokio::time::{delay_until, Instant}; + +/// Provides an arbitrary signal debouncing. +pub struct Debounce { + sequence_start: Option, + time: Duration, +} + +impl Debounce { + /// Create a new [`Debounce`]. + pub fn new(time: Duration) -> Self { + Self { + sequence_start: None, + time, + } + } + + /// Trigger a signal to debounce. + pub fn signal(&mut self) { + if self.sequence_start.is_none() { + self.sequence_start = Some(Instant::now() + self.time); + } + } + + /// Debounced signal. + /// + /// This function resolves after a debounce timeout since the first signal + /// in sequence expires. + /// If there hasn't been a signal, or the debounce timeout isn't yet + /// exausted - the future will be in a pending state. + pub async fn debounced(&mut self) { + let sequence_start = match self.sequence_start { + Some(val) => val, + None => futures::future::pending().await, + }; + + delay_until(sequence_start).await; + self.sequence_start = None; + } + + /// This function exposes the state of the debounce logic. + /// If this returns `false`, you shouldn't `poll` on [`debounced`], as it's + /// pending indefinitely. + pub fn is_debouncing(&self) -> bool { + self.sequence_start.is_some() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use futures::{pin_mut, poll}; + + const TEST_DELAY_FRACTION: Duration = Duration::from_secs(60 * 60); // one hour + const TEST_DELAY: Duration = Duration::from_secs(24 * 60 * 60); // one day + + #[tokio::test] + async fn one_signal() { + tokio::time::pause(); + + let mut debounce = Debounce::new(TEST_DELAY); + assert!(debounce.sequence_start.is_none()); + + // Issue a signal. + debounce.signal(); + assert!(debounce.sequence_start.is_some()); + + { + // Request debounced signal. + let fut = debounce.debounced(); + pin_mut!(fut); + + // Shouldn't be available immediately. + assert!(poll!(&mut fut).is_pending()); + + // Simulate that we waited for some time, but no long enouh for the + // debounce to happen. + tokio::time::advance(TEST_DELAY_FRACTION).await; + + // Still shouldn't be available. + assert!(poll!(&mut fut).is_pending()); + + // Then wait long enough for debounce timeout to pass. + tokio::time::advance(TEST_DELAY * 2).await; + + // Should finally be available. + assert!(poll!(&mut fut).is_ready()); + } + + assert!(debounce.sequence_start.is_none()); + + tokio::time::resume(); + } + + #[tokio::test] + async fn late_request() { + tokio::time::pause(); + + let mut debounce = Debounce::new(TEST_DELAY); + assert!(debounce.sequence_start.is_none()); + + // Issue a signal. + debounce.signal(); + assert!(debounce.sequence_start.is_some()); + + // Simulate that we waited long enough. + tokio::time::advance(TEST_DELAY * 2).await; + assert!(debounce.sequence_start.is_some()); + + { + // Request a debounced signal. + let fut = debounce.debounced(); + pin_mut!(fut); + + // Should be available immediately. + assert!(poll!(&mut fut).is_ready()); + } + + assert!(debounce.sequence_start.is_none()); + + tokio::time::resume(); + } + + #[tokio::test] + async fn multiple_signals() { + tokio::time::pause(); + + let mut debounce = Debounce::new(TEST_DELAY); + assert!(debounce.sequence_start.is_none()); + + debounce.signal(); + + let first_signal_timestamp = debounce.sequence_start; + assert!(first_signal_timestamp.is_some()); + + debounce.signal(); + assert_eq!(debounce.sequence_start, first_signal_timestamp); + + tokio::time::advance(TEST_DELAY_FRACTION).await; + + debounce.signal(); + assert_eq!(debounce.sequence_start, first_signal_timestamp); + + { + let fut = debounce.debounced(); + pin_mut!(fut); + + assert!(poll!(&mut fut).is_pending()); + + tokio::time::advance(TEST_DELAY_FRACTION).await; + + assert!(poll!(&mut fut).is_pending()); + + tokio::time::advance(TEST_DELAY * 2).await; + + assert!(poll!(&mut fut).is_ready()); + } + + assert!(debounce.sequence_start.is_none()); + + tokio::time::resume(); + } + + #[tokio::test] + async fn sequence() { + tokio::time::pause(); + + let mut debounce = Debounce::new(TEST_DELAY); + assert!(debounce.sequence_start.is_none()); + + debounce.signal(); + + let first_signal_timestamp = debounce.sequence_start; + assert!(first_signal_timestamp.is_some()); + + debounce.signal(); + assert_eq!(debounce.sequence_start, first_signal_timestamp); + + tokio::time::advance(TEST_DELAY_FRACTION).await; + + debounce.signal(); + assert_eq!(debounce.sequence_start, first_signal_timestamp); + + { + let fut = debounce.debounced(); + pin_mut!(fut); + + assert!(poll!(&mut fut).is_pending()); + + tokio::time::advance(TEST_DELAY * 2).await; + + assert!(poll!(&mut fut).is_ready()); + } + + assert!(debounce.sequence_start.is_none()); + + debounce.signal(); + + let second_signal_timestamp = debounce.sequence_start; + assert!(second_signal_timestamp.is_some()); + assert_ne!(second_signal_timestamp, first_signal_timestamp); + + { + let fut = debounce.debounced(); + pin_mut!(fut); + + assert!(poll!(&mut fut).is_pending()); + + tokio::time::advance(TEST_DELAY * 2).await; + + assert!(poll!(&mut fut).is_ready()); + } + + assert!(debounce.sequence_start.is_none()); + + tokio::time::resume(); + } + + #[tokio::test] + async fn is_debouncing() { + tokio::time::pause(); + + let mut debounce = Debounce::new(TEST_DELAY); + assert_eq!(debounce.is_debouncing(), false); + + debounce.signal(); + assert_eq!(debounce.is_debouncing(), true); + + tokio::time::advance(TEST_DELAY * 2).await; + assert_eq!(debounce.is_debouncing(), true); + + debounce.debounced().await; + assert_eq!(debounce.is_debouncing(), false); + + tokio::time::resume(); + } +} diff --git a/src/kubernetes/hash_value.rs b/src/kubernetes/hash_value.rs new file mode 100644 index 0000000000000..2270dbf35fa08 --- /dev/null +++ b/src/kubernetes/hash_value.rs @@ -0,0 +1,72 @@ +//! A wrapper to implement hash for k8s resource objects. + +use k8s_openapi::{apimachinery::pkg::apis::meta::v1::ObjectMeta, Metadata}; +use std::hash::{Hash, Hasher}; +use std::ops::Deref; + +/// A wrapper that provdies a [`Hash`] implementation for any k8s resource +/// object. +/// Delegates to object uid for hashing and equality. +#[derive(Debug)] +pub struct HashValue>(T); + +impl HashValue +where + T: Metadata, +{ + /// Create a new [`HashValue`] by wrapping a value of `T`. + pub fn new(value: T) -> Self { + Self(value) + } + + /// Get the `uid` from the `T`'s [`Metadata`] (if any). + pub fn uid(&self) -> Option<&str> { + let ObjectMeta { ref uid, .. } = self.0.metadata(); + let uid = uid.as_ref()?; + Some(uid.as_str()) + } +} + +impl PartialEq for HashValue +where + T: Metadata, +{ + fn eq(&self, other: &Self) -> bool { + match (self.uid(), other.uid()) { + (Some(a), Some(b)) => a.eq(b), + (None, None) => true, + _ => false, + } + } +} + +impl Eq for HashValue where T: Metadata {} + +impl Hash for HashValue +where + T: Metadata, +{ + fn hash(&self, state: &mut H) { + self.uid().hash(state) + } +} + +impl Deref for HashValue +where + T: Metadata, +{ + type Target = T; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl AsRef for HashValue +where + T: Metadata, +{ + fn as_ref(&self) -> &T { + &self.0 + } +} diff --git a/src/kubernetes/instrumenting_watcher.rs b/src/kubernetes/instrumenting_watcher.rs new file mode 100644 index 0000000000000..7cfd1cba00e29 --- /dev/null +++ b/src/kubernetes/instrumenting_watcher.rs @@ -0,0 +1,67 @@ +//! A watcher that adds instrumentation. + +use super::watcher::{self, Watcher}; +use crate::internal_events::kubernetes::instrumenting_watcher as internal_events; +use futures::{future::BoxFuture, stream::BoxStream, FutureExt, StreamExt}; +use k8s_openapi::{WatchOptional, WatchResponse}; + +/// A watcher that wraps another watcher with instrumentation calls. +pub struct InstrumentingWatcher +where + T: Watcher, +{ + inner: T, +} + +impl InstrumentingWatcher +where + T: Watcher, +{ + /// Create a new [`InstrumentingWatcher`]. + pub fn new(inner: T) -> Self { + Self { inner } + } +} + +impl Watcher for InstrumentingWatcher +where + T: Watcher, + ::Stream: 'static, +{ + type Object = ::Object; + + type InvocationError = ::InvocationError; + + type StreamError = ::StreamError; + type Stream = BoxStream<'static, Result, Self::StreamError>>; + + fn watch<'a>( + &'a mut self, + watch_optional: WatchOptional<'a>, + ) -> BoxFuture<'a, Result>> + { + Box::pin(self.inner.watch(watch_optional).map(|result| { + result + .map(|stream| { + emit!(internal_events::WatchRequestInvoked); + Box::pin(stream.map(|item_result| { + item_result + .map(|item| { + emit!(internal_events::WatchStreamItemObtained); + item + }) + .map_err(|error| { + emit!(internal_events::WatchRequestInvocationFailed { + error: &error + }); + error + }) + })) as BoxStream<'static, _> + }) + .map_err(|error| { + emit!(internal_events::WatchRequestInvocationFailed { error: &error }); + error + }) + })) + } +} diff --git a/src/kubernetes/mock_watcher.rs b/src/kubernetes/mock_watcher.rs new file mode 100644 index 0000000000000..7a55a47eeaa13 --- /dev/null +++ b/src/kubernetes/mock_watcher.rs @@ -0,0 +1,183 @@ +//! A mock watcher. + +#![cfg(test)] + +use super::watcher::{self, Watcher}; +use async_stream::try_stream; +use futures::channel::mpsc::{Receiver, Sender}; +use futures::{future::BoxFuture, stream::BoxStream, SinkExt, StreamExt}; +use k8s_openapi::{Resource, WatchOptional, WatchResponse}; +use serde::de::DeserializeOwned; +use std::fmt; + +/// An event that's send to the test scenario driver. +#[derive(Debug, PartialEq)] +pub enum ScenarioEvent { + Invocation(OwnedWatchOptional), + Stream, +} + +/// An action that's send from the test scenario driver to specify the +/// invocation result. +pub enum ScenarioActionInvocation +where + T: DeserializeOwned + Resource, +{ + /// Return successfully and prepare the stream with responses from the + /// passed [`Receiver`]. + Ok(Receiver>), + /// Return a desync error. + ErrDesync, + /// Return an "other" (i.e. non-desync) error. + ErrOther, +} + +/// An action that's send from the test scenario driver to specify the +/// stream item request result. +pub enum ScenarioActionStream +where + T: DeserializeOwned + Resource, +{ + /// Return a watch response. + Ok(WatchResponse), + /// Return an error. + Err, + /// Complete the stream (return `None`). + Done, +} + +/// A mock watcher, useful for tests. +pub struct MockWatcher +where + T: DeserializeOwned + Resource, +{ + events_tx: Sender, + invocation_rx: Receiver>, +} + +impl MockWatcher +where + T: DeserializeOwned + Resource, +{ + /// Create a new [`MockWatcher`]. + pub fn new( + events_tx: Sender, + invocation_rx: Receiver>, + ) -> Self { + Self { + events_tx, + invocation_rx, + } + } +} + +impl Watcher for MockWatcher +where + T: DeserializeOwned + Resource + Send + Sync + Unpin + 'static, +{ + type Object = T; + + type StreamError = StreamError; + type Stream = BoxStream<'static, Result, Self::StreamError>>; + + type InvocationError = InvocationError; + + fn watch<'a>( + &'a mut self, + watch_optional: WatchOptional<'a>, + ) -> BoxFuture<'a, Result>> + { + let mut stream_events_tx = self.events_tx.clone(); + Box::pin(async move { + self.events_tx + .send(ScenarioEvent::Invocation(watch_optional.into())) + .await + .unwrap(); + + let action = self.invocation_rx.next().await.unwrap(); + match action { + ScenarioActionInvocation::Ok(mut stream_rx) => { + let stream = Box::pin(try_stream! { + loop { + stream_events_tx.send(ScenarioEvent::Stream) + .await + .unwrap(); + + let action = stream_rx.next().await.unwrap(); + match action { + ScenarioActionStream::Ok(val) => { + yield val + }, + ScenarioActionStream::Err => { + Err(StreamError)?; + break; + }, + ScenarioActionStream::Done => break, + } + } + }) + as BoxStream< + 'static, + Result, Self::StreamError>, + >; + Ok(stream) + } + ScenarioActionInvocation::ErrDesync => { + Err(watcher::invocation::Error::desync(InvocationError)) + } + ScenarioActionInvocation::ErrOther => { + Err(watcher::invocation::Error::other(InvocationError)) + } + } + }) + } +} + +/// An owned variant of [`WatchOptional`]. +/// Used to send it with [`ScenarioEvent`] to avoid the headaches with +/// lifetimes. +#[derive(Debug, PartialEq, Eq)] +pub struct OwnedWatchOptional { + pub allow_watch_bookmarks: Option, + pub field_selector: Option, + pub label_selector: Option, + pub pretty: Option, + pub resource_version: Option, + pub timeout_seconds: Option, +} + +impl<'a> From> for OwnedWatchOptional { + fn from(val: WatchOptional<'a>) -> Self { + Self { + allow_watch_bookmarks: val.allow_watch_bookmarks, + field_selector: val.field_selector.map(ToOwned::to_owned), + label_selector: val.label_selector.map(ToOwned::to_owned), + pretty: val.pretty.map(ToOwned::to_owned), + resource_version: val.resource_version.map(ToOwned::to_owned), + timeout_seconds: val.timeout_seconds, + } + } +} + +/// An error kind for the mock watcher invocation. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct InvocationError; + +/// An error kind for the mock watcher stream. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct StreamError; + +impl fmt::Display for InvocationError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(self, f) + } +} + +impl fmt::Display for StreamError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(self, f) + } +} + +impl std::error::Error for InvocationError {} +impl std::error::Error for StreamError {} diff --git a/src/kubernetes/mod.rs b/src/kubernetes/mod.rs new file mode 100644 index 0000000000000..f9df1a26cf21c --- /dev/null +++ b/src/kubernetes/mod.rs @@ -0,0 +1,42 @@ +//! This mod contains shared portions of the kubernetes implementations. +//! +//! Here are a few pointers to the resources that were used as an inspiration: +//! +//! - https://github.com/kubernetes/client-go/blob/master/tools/clientcmd/api/types.go +//! +//! A part of the official Kubernetes client library (in Go) that contains +//! the structure for KUBECONFIG files. Used for reference on naming things. +//! +//! - https://github.com/kubernetes/apimachinery/blob/master/pkg/watch/watch.go +//! +//! The reference design of the watchers composition and interfaces that's +//! known to work. +//! +//! - https://github.com/kubernetes/client-go/blob/master/rest/config.go +//! +//! The reference implementation on preparing the in-cluster config. +//! + +#![cfg(feature = "kubernetes")] +#![warn(missing_docs)] + +pub mod api_watcher; +pub mod client; +pub mod debounce; +pub mod hash_value; +pub mod instrumenting_watcher; +pub mod mock_watcher; +pub mod multi_response_decoder; +pub mod reflector; +pub mod resource_version; +pub mod state; +pub mod stream; +pub mod watch_request_builder; +pub mod watcher; + +// Reexports for more elegant public API. +pub use debounce::Debounce; +pub use hash_value::HashValue; +pub use multi_response_decoder::MultiResponseDecoder; +pub use reflector::Reflector; +pub use watch_request_builder::WatchRequestBuilder; diff --git a/src/kubernetes/multi_response_decoder.rs b/src/kubernetes/multi_response_decoder.rs new file mode 100644 index 0000000000000..034a9ba886491 --- /dev/null +++ b/src/kubernetes/multi_response_decoder.rs @@ -0,0 +1,498 @@ +//! Decode multiple [`Response`]s. + +use k8s_openapi::{http::StatusCode, Response, ResponseError}; + +/// Provides an algorithm to parse multiple [`Response`]s from multiple chunks +/// of data represented as `&[u8]`. +#[derive(Debug, Default)] +pub struct MultiResponseDecoder { + pending_data: Vec, + responses_buffer: Vec>, +} + +impl MultiResponseDecoder +where + T: Response, +{ + /// Create a new [`MultiResponseDecoder`]. + pub fn new() -> Self { + Self { + pending_data: Vec::new(), + responses_buffer: Vec::new(), + } + } + + /// Take the next chunk of data and spit out parsed `T`s. + pub fn process_next_chunk( + &mut self, + chunk: &[u8], + ) -> std::vec::Drain<'_, Result> { + self.pending_data.extend_from_slice(chunk); + loop { + match T::try_from_parts(StatusCode::OK, &self.pending_data) { + Ok((response, consumed_bytes)) => { + debug_assert!(consumed_bytes > 0, "parser must've consumed some data"); + self.pending_data.drain(..consumed_bytes); + self.responses_buffer.push(Ok(response)); + } + Err(ResponseError::NeedMoreData) => break, + Err(error) => { + error!(message = "error while decoding response", pending_data = ?self.pending_data, ?error); + self.responses_buffer.push(Err(error)); + break; + } + }; + } + self.responses_buffer.drain(..) + } + + /// Complete the parsing. + /// + /// Call this when you're not expecting any more data chunks. + /// Produces an error if there's unparsed data remaining. + pub fn finish(self) -> Result<(), Vec> { + let Self { pending_data, .. } = self; + // Kubernetes sometimes adds `\n` to the response, consider this + // a valid termination case. + if pending_data.is_empty() || pending_data == b"\n" { + return Ok(()); + } + Err(pending_data) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use k8s_openapi::{ + api::core::v1::Pod, + apimachinery::pkg::apis::meta::v1::{ObjectMeta, WatchEvent}, + WatchResponse, + }; + + /// Test object. + type TO = WatchResponse; + + // A helper function to make a test object. + fn make_to(uid: &str) -> TO { + WatchResponse::Ok(WatchEvent::Added(Pod { + metadata: ObjectMeta { + uid: Some(uid.to_owned()), + ..ObjectMeta::default() + }, + ..Pod::default() + })) + } + + fn assert_test_object( + tested_test_object: Option>, + expected_uid: &str, + ) { + let actual_to = tested_test_object + .expect("expected an yielded entry, but none found") + .expect("parsing failed"); + let expected_to = make_to(expected_uid); + match (actual_to, expected_to) { + (WatchResponse::Ok(actual_event), WatchResponse::Ok(expected_event)) => { + assert_eq!(actual_event, expected_event) + } + _ => panic!("expected an event, got something else"), + } + } + + #[test] + fn test_empty() { + let dec = MultiResponseDecoder::::new(); + assert!(dec.finish().is_ok()); + } + + #[test] + fn test_incomplete() { + let mut dec = MultiResponseDecoder::::new(); + + { + let mut stream = dec.process_next_chunk(b"{"); + assert!(stream.next().is_none()); + } + + assert_eq!(dec.finish().unwrap_err(), b"{"); + } + + #[test] + fn test_rubblish() { + let mut dec = MultiResponseDecoder::::new(); + + { + let mut stream = dec.process_next_chunk(b"qwerty"); + assert!(stream.next().unwrap().is_err()); + assert!(stream.next().is_none()); + } + + assert_eq!(dec.finish().unwrap_err(), b"qwerty"); + } + + #[test] + fn test_one() { + let mut dec = MultiResponseDecoder::::new(); + + { + let mut stream = dec.process_next_chunk( + br#"{ + "type": "ADDED", + "object": { + "kind": "Pod", + "apiVersion": "v1", + "metadata": { + "uid": "uid0" + } + } + }"#, + ); + assert_test_object(stream.next(), "uid0"); + assert!(stream.next().is_none()); + } + + assert!(dec.finish().is_ok()); + } + + #[test] + fn test_chunked() { + let mut dec = MultiResponseDecoder::::new(); + + { + let mut stream = dec.process_next_chunk( + br#"{ + "type": "ADDED", + "ob"#, + ); + assert!(stream.next().is_none()); + } + + { + let mut stream = dec.process_next_chunk( + br#"ject": { + "kind": "Pod", + "apiVersion": "v1", + "metadata": { + "uid": "uid0" + } + } + }"#, + ); + assert_test_object(stream.next(), "uid0"); + assert!(stream.next().is_none()); + } + + assert!(dec.finish().is_ok()); + } + + #[test] + fn test_two() { + let mut dec = MultiResponseDecoder::::new(); + + { + let mut stream = dec.process_next_chunk( + br#"{ + "type": "ADDED", + "object": { + "kind": "Pod", + "apiVersion": "v1", + "metadata": { + "uid": "uid0" + } + } + }{ + "type": "ADDED", + "object": { + "kind": "Pod", + "apiVersion": "v1", + "metadata": { + "uid": "uid1" + } + } + }"#, + ); + assert_test_object(stream.next(), "uid0"); + assert_test_object(stream.next(), "uid1"); + assert!(stream.next().is_none()); + } + + assert!(dec.finish().is_ok()); + } + + #[test] + fn test_many_chunked_1() { + let mut dec = MultiResponseDecoder::::new(); + + { + let mut stream = dec.process_next_chunk( + br#"{ + "type": "ADDED", + "ob"#, + ); + assert!(stream.next().is_none()); + } + + { + let mut stream = dec.process_next_chunk( + br#"ject": { + "kind": "Pod", + "apiVersion": "v1", + "metadata": { + "uid": "uid0" + } + } + }{ + "type": "ADDED", + "object": { + "kind": "Pod", + "apiVe"#, + ); + assert_test_object(stream.next(), "uid0"); + assert!(stream.next().is_none()); + } + + { + let mut stream = dec.process_next_chunk( + br#"rsion": "v1", + "metadata": { + "uid": "uid1" + } + } + }"#, + ); + assert_test_object(stream.next(), "uid1"); + assert!(stream.next().is_none()); + } + + assert!(dec.finish().is_ok()); + } + + #[test] + fn test_many_chunked_2() { + let mut dec = MultiResponseDecoder::::new(); + + { + let mut stream = dec.process_next_chunk( + br#"{ + "type": "ADDED", + "object": { + "kind": "Pod", + "apiVersion": "v1", + "metadata": { + "uid": "uid0" + } + } + }{ + "type": "ADDED", + "ob"#, + ); + assert_test_object(stream.next(), "uid0"); + assert!(stream.next().is_none()); + } + + { + let mut stream = dec.process_next_chunk( + br#"ject": { + "kind": "Pod", + "apiVersion": "v1", + "metadata": { + "uid": "uid1" + } + } + }{ + "type": "ADDED", + "object": { + "kind": "Pod", + "apiVersion": "v1", + "metadata": { + "uid": "uid2" + } + } + }{ + "type": "ADDED", + "object": { + "kind": "Pod", + "apiVe"#, + ); + assert_test_object(stream.next(), "uid1"); + assert_test_object(stream.next(), "uid2"); + assert!(stream.next().is_none()); + } + + { + let mut stream = dec.process_next_chunk( + br#"rsion": "v1", + "metadata": { + "uid": "uid3" + } + } + }{ + "type": "ADDED", + "object": { + "kind": "Pod", + "apiVersion": "v1", + "metadata": { + "uid": "uid4" + } + } + }"#, + ); + assert_test_object(stream.next(), "uid3"); + assert_test_object(stream.next(), "uid4"); + assert!(stream.next().is_none()); + } + + assert!(dec.finish().is_ok()); + } + + #[test] + fn test_two_one_by_one() { + let mut dec = MultiResponseDecoder::::new(); + + { + let mut stream = dec.process_next_chunk( + br#"{ + "type": "ADDED", + "object": { + "kind": "Pod", + "apiVersion": "v1", + "metadata": { + "uid": "uid0" + } + } + }"#, + ); + assert_test_object(stream.next(), "uid0"); + assert!(stream.next().is_none()); + } + + { + let mut stream = dec.process_next_chunk( + br#"{ + "type": "ADDED", + "object": { + "kind": "Pod", + "apiVersion": "v1", + "metadata": { + "uid": "uid1" + } + } + }"#, + ); + assert_test_object(stream.next(), "uid1"); + assert!(stream.next().is_none()); + } + + assert!(dec.finish().is_ok()); + } + + #[test] + fn test_incomplete_after_valid_data() { + let mut dec = MultiResponseDecoder::::new(); + + { + let mut stream = dec.process_next_chunk( + br#"{ + "type": "ADDED", + "object": { + "kind": "Pod", + "apiVersion": "v1", + "metadata": { + "uid": "uid0" + } + } + }{"#, + ); + assert_test_object(stream.next(), "uid0"); + assert!(stream.next().is_none()); + } + + assert_eq!(dec.finish().unwrap_err(), b"{"); + } + + #[test] + fn test_allows_unparsed_newlines_at_finish() { + let mut dec = MultiResponseDecoder::::new(); + + { + let mut stream = dec.process_next_chunk(b"\n"); + assert!(stream.next().is_none()); + } + + assert!(dec.finish().is_ok()); + } + + #[test] + fn test_memory_usage() { + let mut dec = MultiResponseDecoder::::new(); + + let chunk = br#"{ + "type": "ADDED", + "object": { + "kind": "Pod", + "apiVersion": "v1", + "metadata": { + "uid": "uid0" + } + } + }"#; + let mut chunks = chunk.iter().cycle(); + + let max_chunks_per_iter = 15; + + // Simulate processing a huge number of items. + for _ in 0..100_000 { + // Take random amout of bytes from the chunks iter and prepare the + // next chunk. + let to_take = rand::random::() % (chunk.len() * max_chunks_per_iter); + let next_chunk = (&mut chunks).take(to_take).cloned().collect::>(); + + // Process the chunk data. + let stream = dec.process_next_chunk(next_chunk.as_ref()); + drop(stream); // consume all the emitted items + } + + // Check that `pending_data` capacity didn't grow out way of hand. + // If we had issues with memory management, it would be the one + // to blow first. + assert!(dec.pending_data.capacity() <= chunk.len() * 100); + + // Ensure that response buffer never grows beyond it's capacitty limit. + // Capacity limit is set based on heuristics about `Vec` internals, and + // is adjusted to be as low as possible. + assert!(dec.responses_buffer.capacity() <= (max_chunks_per_iter + 2).next_power_of_two()); + } + + #[test] + fn test_practical_error_case_1() { + let mut dec = MultiResponseDecoder::::new(); + + { + let mut stream = dec.process_next_chunk(&[ + 123, 34, 116, 121, 112, 101, 34, 58, 34, 66, 79, 79, 75, 77, 65, 82, 75, 34, 44, + 34, 111, 98, 106, 101, 99, 116, 34, 58, 123, 34, 107, 105, 110, 100, 34, 58, 34, + 80, 111, 100, 34, 44, 34, 97, 112, 105, 86, 101, 114, 115, 105, 111, 110, 34, 58, + 34, 118, 49, 34, 44, 34, 109, 101, 116, 97, 100, 97, 116, 97, 34, 58, 123, 34, 114, + 101, 115, 111, 117, 114, 99, 101, 86, 101, 114, 115, 105, 111, 110, 34, 58, 34, 51, + 56, 52, 53, 34, 44, 34, 99, 114, 101, 97, 116, 105, 111, 110, 84, 105, 109, 101, + 115, 116, 97, 109, 112, 34, 58, 110, 117, 108, 108, 125, 44, 34, 115, 112, 101, 99, + 34, 58, 123, 34, 99, 111, 110, 116, 97, 105, 110, 101, 114, 115, 34, 58, 110, 117, + 108, 108, 125, 44, 34, 115, 116, 97, 116, 117, 115, 34, 58, 123, 125, 125, 125, 10, + ]); + let actual_to = stream + .next() + .expect("expected an yielded entry, but none found") + .expect("parsing failed"); + let expected_event = WatchEvent::Bookmark { + resource_version: "3845".into(), + }; + match actual_to { + WatchResponse::Ok(actual_event) => assert_eq!(actual_event, expected_event), + _ => panic!("expected an event, got something else"), + } + } + + assert!(dec.finish().is_ok()); + } +} diff --git a/src/kubernetes/reflector.rs b/src/kubernetes/reflector.rs new file mode 100644 index 0000000000000..7b42596354032 --- /dev/null +++ b/src/kubernetes/reflector.rs @@ -0,0 +1,1027 @@ +//! Watch and cache the remote Kubernetes API resources. + +use super::{ + resource_version, state, + watcher::{self, Watcher}, +}; +use crate::internal_events::kubernetes::reflector as internal_events; +use futures::{ + pin_mut, + stream::{Stream, StreamExt}, +}; +use k8s_openapi::{ + apimachinery::pkg::apis::meta::v1::{ObjectMeta, WatchEvent}, + Metadata, WatchOptional, WatchResponse, +}; +use snafu::Snafu; +use std::convert::Infallible; +use std::time::Duration; +use tokio::{select, time::delay_for}; + +/// Watches remote Kubernetes resources and maintains a local representation of +/// the remote state. "Reflects" the remote state locally. +/// +/// Does not expose evented API, but keeps track of the resource versions and +/// will automatically resume on desync. +pub struct Reflector +where + W: Watcher, + ::Object: Metadata + Send, + S: state::MaintainedWrite::Object>, +{ + watcher: W, + state_writer: S, + field_selector: Option, + label_selector: Option, + resource_version: resource_version::State, + pause_between_requests: Duration, +} + +impl Reflector +where + W: Watcher, + ::Object: Metadata + Send, + S: state::MaintainedWrite::Object>, +{ + /// Create a new [`Cache`]. + pub fn new( + watcher: W, + state_writer: S, + field_selector: Option, + label_selector: Option, + pause_between_requests: Duration, + ) -> Self { + let resource_version = resource_version::State::new(); + Self { + watcher, + state_writer, + label_selector, + field_selector, + resource_version, + pause_between_requests, + } + } +} + +impl Reflector +where + W: Watcher, + ::Object: Metadata + Send + Unpin + std::fmt::Debug, + ::InvocationError: Unpin, + ::StreamError: Unpin, + S: state::MaintainedWrite::Object>, +{ + /// Run the watch loop and drive the state updates via `state_writer`. + pub async fn run( + &mut self, + ) -> Result::InvocationError, ::StreamError>> + { + // Start the watch loop. + loop { + let invocation_result = self.issue_request().await; + let stream = match invocation_result { + Ok(val) => val, + Err(watcher::invocation::Error::Desync { source }) => { + emit!(internal_events::DesyncReceived { error: source }); + // We got desynced, reset the state and retry fetching. + self.resource_version.reset(); + self.state_writer.resync().await; + continue; + } + Err(watcher::invocation::Error::Other { source }) => { + // Not a desync, fail everything. + error!(message = "watcher error", error = ?source); + return Err(Error::Invocation { source }); + } + }; + + pin_mut!(stream); + loop { + // Obtain an value from the watch stream. + // If maintenance is requested, we perform it concurrently + // to reading items from the watch stream. + let maintenance_request = self.state_writer.maintenance_request(); + let val = select! { + // If we got a maintenance request - perform the + // maintenance. + _ = async { maintenance_request.unwrap().await }, if maintenance_request.is_some() => { + self.state_writer.perform_maintenance().await; + continue; + } + // If we got a value from the watch stream - just pass it + // outside. + val = stream.next() => val, + }; + trace!(message = "got an item from watch stream"); + + if let Some(item) = val { + // A new item arrived from the watch response stream + // first - process it. + self.process_stream_item(item).await?; + } else { + // Response stream has ended. + // Break the watch reading loop so the flow can + // continue an issue a new watch request. + break; + } + } + + // For the next pause duration we won't get any updates. + // This is better than flooding k8s api server with requests. + delay_for(self.pause_between_requests).await; + } + } + + /// Prepare and execute a watch request. + async fn issue_request( + &mut self, + ) -> Result<::Stream, watcher::invocation::Error<::InvocationError>> + { + let watch_optional = WatchOptional { + field_selector: self.field_selector.as_deref(), + label_selector: self.label_selector.as_deref(), + pretty: None, + resource_version: self.resource_version.get(), + timeout_seconds: Some(290), // https://github.com/kubernetes/kubernetes/issues/6513 + allow_watch_bookmarks: Some(true), + }; + let stream = self.watcher.watch(watch_optional).await?; + Ok(stream) + } + + /// Process an item from the watch response stream. + async fn process_stream_item( + &mut self, + item: <::Stream as Stream>::Item, + ) -> Result<(), Error<::InvocationError, ::StreamError>> { + // Any streaming error means the protocol is in an unxpected + // state. This is considered a fatal error, do not attempt + // to retry and just quit. + let response = item.map_err(|source| Error::Streaming { source })?; + + // Unpack the event. + let event = match response { + WatchResponse::Ok(event) => event, + WatchResponse::Other(_) => { + // Even though we could parse the response, we didn't + // get the data we expected on the wire. + // According to the rules, we just ignore the unknown + // responses. This may be a newly added piece of data + // our code doesn't know of. + // TODO: add more details on the data here if we + // encounter these messages in practice. + warn!(message = "got unexpected data in the watch response"); + return Ok(()); + } + }; + + // Prepare a resource version candidate so we can update (aka commit) it + // later. + let resource_version_candidate = match resource_version::Candidate::from_watch_event(&event) + { + Some(val) => val, + None => { + // This event doesn't have a resource version, this means + // it's not something we care about. + return Ok(()); + } + }; + + // Process the event. + self.process_event(event).await; + + // Record the resourse version for this event, so when we resume + // it won't be redelivered. + self.resource_version.update(resource_version_candidate); + + Ok(()) + } + + /// Translate received watch event to the state update. + async fn process_event(&mut self, event: WatchEvent<::Object>) { + match event { + WatchEvent::Added(object) => { + trace!(message = "got an object event", event = "added"); + self.state_writer.add(object).await; + } + WatchEvent::Deleted(object) => { + trace!(message = "got an object event", event = "deleted"); + self.state_writer.delete(object).await; + } + WatchEvent::Modified(object) => { + trace!(message = "got an object event", event = "modified"); + self.state_writer.update(object).await; + } + WatchEvent::Bookmark { .. } => { + trace!(message = "got an object event", event = "bookmark"); + // noop + } + _ => unreachable!("other event types should never reach this code"), + } + } +} + +/// Errors that can occur while watching. +#[derive(Debug, Snafu)] +pub enum Error +where + I: std::error::Error + 'static, + S: std::error::Error + 'static, +{ + /// Returned when the watch invocation (HTTP request) failed. + #[snafu(display("watch invocation failed"))] + Invocation { + /// The underlying invocation error. + source: I, + }, + + /// Returned when the stream failed with an error. + #[snafu(display("streaming error"))] + Streaming { + /// The underlying stream error. + source: S, + }, +} + +#[cfg(test)] +mod tests { + use super::{Error, Reflector}; + use crate::{ + kubernetes::{ + instrumenting_watcher::InstrumentingWatcher, + mock_watcher::{self, MockWatcher}, + state, + }, + test_util, + }; + use futures::{channel::mpsc, SinkExt, StreamExt}; + use k8s_openapi::{ + api::core::v1::Pod, + apimachinery::pkg::apis::meta::v1::{ObjectMeta, WatchEvent}, + Metadata, WatchResponse, + }; + use std::time::Duration; + + /// A helper function to simplify assertion on the `evmap` state. + fn gather_state(handle: &evmap10::ReadHandle>) -> Vec + where + T: Metadata + Clone, + { + let mut vec: Vec<(String, T)> = handle + .read() + .expect("expected read to be ready") + .iter() + .map(|(key, values)| { + assert_eq!(values.len(), 1); + let value = values.get_one().unwrap(); + (key.clone(), value.as_ref().as_ref().to_owned()) + }) + .collect(); + + // Sort the results by key for consistent assertions. + vec.sort_unstable_by(|a, b| a.0.cmp(&b.0)); + + // Discard keys. + vec.into_iter().map(|(_, value)| value).collect() + } + + // A helper to build a pod object for test purposes. + fn make_pod(uid: &str, resource_version: &str) -> Pod { + Pod { + metadata: ObjectMeta { + uid: Some(uid.to_owned()), + resource_version: Some(resource_version.to_owned()), + ..ObjectMeta::default() + }, + ..Pod::default() + } + } + + // A type alias to add expressiveness. + type StateSnapshot = Vec; + + // A helper enum to encode expected mock watcher invocation. + enum ExpInvRes { + Stream(Vec>), + Desync, + } + + // A simple test, to serve as a bare-bones example for adding further tests. + #[test] + fn simple_test() { + test_util::trace_init(); + test_util::block_on_std(async move { + // Prepare state. + let (state_events_tx, _state_events_rx) = mpsc::channel(0); + let (_state_actions_tx, state_actions_rx) = mpsc::channel(0); + let state_writer = state::mock::Writer::new(state_events_tx, state_actions_rx); + let state_writer = state::instrumenting::Writer::new(state_writer); + + // Prepare watcher. + let (watcher_events_tx, mut watcher_events_rx) = mpsc::channel(0); + let (mut watcher_invocations_tx, watcher_invocations_rx) = mpsc::channel(0); + let watcher = MockWatcher::::new(watcher_events_tx, watcher_invocations_rx); + let watcher = InstrumentingWatcher::new(watcher); + + // Prepare reflector. + let mut reflector = + Reflector::new(watcher, state_writer, None, None, Duration::from_secs(1)); + + // Run test logic. + let logic = tokio::spawn(async move { + // Wait for watcher to request next invocation. + assert!(matches!( + watcher_events_rx.next().await.unwrap(), + mock_watcher::ScenarioEvent::Invocation(_) + )); + + // We're done with the test, send the error to terminate the + // reflector. + watcher_invocations_tx + .send(mock_watcher::ScenarioActionInvocation::ErrOther) + .await + .unwrap(); + }); + + // Run the test and wait for an error. + let result = reflector.run().await; + + // Join on the logic first, to report logic errors with higher + // priority. + logic.await.unwrap(); + + // The only way reflector completes is with an error, but that's ok. + // In tests we make it exit with an error to complete the test. + result.unwrap_err(); + + // Explicitly drop the reflector at the very end. + drop(reflector); + }); + } + + // Test the properties of the normal execution flow. + #[test] + fn flow_test() { + test_util::trace_init(); + + let invocations = vec![ + ( + vec![], + None, + ExpInvRes::Stream(vec![ + WatchEvent::Added(make_pod("uid0", "10")), + WatchEvent::Added(make_pod("uid1", "15")), + ]), + ), + ( + vec![make_pod("uid0", "10"), make_pod("uid1", "15")], + Some("15".to_owned()), + ExpInvRes::Stream(vec![ + WatchEvent::Modified(make_pod("uid0", "20")), + WatchEvent::Added(make_pod("uid2", "25")), + ]), + ), + ( + vec![ + make_pod("uid0", "20"), + make_pod("uid1", "15"), + make_pod("uid2", "25"), + ], + Some("25".to_owned()), + ExpInvRes::Stream(vec![WatchEvent::Bookmark { + resource_version: "50".into(), + }]), + ), + ( + vec![ + make_pod("uid0", "20"), + make_pod("uid1", "15"), + make_pod("uid2", "25"), + ], + Some("50".to_owned()), + ExpInvRes::Stream(vec![ + WatchEvent::Deleted(make_pod("uid2", "55")), + WatchEvent::Modified(make_pod("uid0", "60")), + ]), + ), + ]; + let expected_resulting_state = vec![make_pod("uid0", "60"), make_pod("uid1", "15")]; + + // Use standard flow test logic. + run_flow_test(invocations, expected_resulting_state); + } + + // Test the properies of the flow with desync. + #[test] + fn desync_test() { + test_util::trace_init(); + + let invocations = vec![ + ( + vec![], + None, + ExpInvRes::Stream(vec![ + WatchEvent::Added(make_pod("uid0", "10")), + WatchEvent::Added(make_pod("uid1", "15")), + ]), + ), + ( + vec![make_pod("uid0", "10"), make_pod("uid1", "15")], + Some("15".to_owned()), + ExpInvRes::Desync, + ), + ( + vec![make_pod("uid0", "10"), make_pod("uid1", "15")], + None, + ExpInvRes::Stream(vec![ + WatchEvent::Added(make_pod("uid20", "1000")), + WatchEvent::Added(make_pod("uid21", "1005")), + ]), + ), + ( + vec![make_pod("uid20", "1000"), make_pod("uid21", "1005")], + Some("1005".to_owned()), + ExpInvRes::Stream(vec![WatchEvent::Modified(make_pod("uid21", "1010"))]), + ), + ]; + let expected_resulting_state = vec![make_pod("uid20", "1000"), make_pod("uid21", "1010")]; + + // Use standard flow test logic. + run_flow_test(invocations, expected_resulting_state); + } + + /// Test that the state is properly initialized even if no events arrived. + #[test] + fn no_updates_state_test() { + test_util::trace_init(); + + let invocations = vec![]; + let expected_resulting_state = vec![]; + + // Use standard flow test logic. + run_flow_test(invocations, expected_resulting_state); + } + + // Test that [`k8s_openapi::WatchOptional`] is populated properly. + #[test] + fn arguments_test() { + test_util::trace_init(); + test_util::block_on_std(async move { + // Prepare state. + let (state_events_tx, _state_events_rx) = mpsc::channel(0); + let (_state_actions_tx, state_actions_rx) = mpsc::channel(0); + let state_writer = state::mock::Writer::new(state_events_tx, state_actions_rx); + + // Prepare watcher. + let (watcher_events_tx, mut watcher_events_rx) = mpsc::channel(0); + let (mut watcher_invocations_tx, watcher_invocations_rx) = mpsc::channel(0); + let watcher = MockWatcher::::new(watcher_events_tx, watcher_invocations_rx); + let watcher = InstrumentingWatcher::new(watcher); + + // Prepare reflector. + let mut reflector = Reflector::new( + watcher, + state_writer, + Some("fields".to_owned()), + Some("labels".to_owned()), + Duration::from_secs(1), + ); + + // Run test logic. + let logic = tokio::spawn(async move { + // Wait for watcher to request next invocation. + let invocation_event = watcher_events_rx.next().await.unwrap(); + + // Assert that we obtained an invocation event and obtain + // the passed `watch_optional`. + let watch_optional = match invocation_event { + mock_watcher::ScenarioEvent::Invocation(val) => val, + _ => panic!("unexpected event from watcher mock"), + }; + + // Assert that the arguments are passed properly. + assert_eq!( + watch_optional, + mock_watcher::OwnedWatchOptional { + allow_watch_bookmarks: Some(true), + field_selector: Some("fields".to_owned()), + label_selector: Some("labels".to_owned()), + pretty: None, + resource_version: None, + timeout_seconds: Some(290), + } + ); + + // We're done with the test, send the error to terminate the + // reflector. + watcher_invocations_tx + .send(mock_watcher::ScenarioActionInvocation::ErrOther) + .await + .unwrap(); + }); + + // Run the test and wait for an error. + let result = reflector.run().await; + + // Join on the logic first, to report logic errors with higher + // priority. + logic.await.unwrap(); + + // The only way reflector completes is with an error, but that's ok. + // In tests we make it exit with an error to complete the test. + result.unwrap_err(); + + // Explicitly drop the reflector at the very end. + drop(reflector); + }) + } + + /// Test that the delayed delete works accordingly. + #[test] + fn test_delayed_deletes() { + test_util::trace_init(); + test_util::block_on_std(async move { + // Freeze time. + tokio::time::pause(); + + // Prepare state. + let (state_events_tx, mut state_events_rx) = mpsc::channel(0); + let (mut state_actions_tx, state_actions_rx) = mpsc::channel(0); + let state_writer = state::mock::Writer::new(state_events_tx, state_actions_rx); + let state_writer = state::instrumenting::Writer::new(state_writer); + let deletion_delay = Duration::from_secs(600); + let state_writer = state::delayed_delete::Writer::new(state_writer, deletion_delay); + + // Prepare watcher. + let (watcher_events_tx, mut watcher_events_rx) = mpsc::channel(0); + let (mut watcher_invocations_tx, watcher_invocations_rx) = mpsc::channel(0); + let watcher = MockWatcher::::new(watcher_events_tx, watcher_invocations_rx); + let watcher = InstrumentingWatcher::new(watcher); + + // Prepare reflector. + let mut reflector = + Reflector::new(watcher, state_writer, None, None, Duration::from_secs(1)); + + // Run test logic. + let logic = tokio::spawn(async move { + // Wait for watcher to request next invocation. + assert!(matches!( + watcher_events_rx.next().await.unwrap(), + mock_watcher::ScenarioEvent::Invocation(_) + )); + + // Provide watcher with a new stream. + let (mut watch_stream_tx, watch_stream_rx) = mpsc::channel(0); + watcher_invocations_tx + .send(mock_watcher::ScenarioActionInvocation::Ok(watch_stream_rx)) + .await + .unwrap(); + + // Wait for watcher to request next item from the stream. + assert_eq!( + watcher_events_rx.next().await.unwrap(), + mock_watcher::ScenarioEvent::Stream + ); + + // Send pod addition to a stream. + watch_stream_tx + .send(mock_watcher::ScenarioActionStream::Ok(WatchResponse::Ok( + WatchEvent::Added(make_pod("uid0", "10")), + ))) + .await + .unwrap(); + + // Let the reflector work until the pod addition propagates to + // the state. + assert_eq!( + state_events_rx.next().await.unwrap().unwrap_op(), + (make_pod("uid0", "10"), state::mock::OpKind::Add), + ); + + // Send the confirmation of the processing at the state. + state_actions_tx.send(()).await.unwrap(); + + // Let the reflector work until watcher requests next event from + // the stream. + assert_eq!( + watcher_events_rx.next().await.unwrap(), + mock_watcher::ScenarioEvent::Stream + ); + + // Send pod deletion to a stream. + watch_stream_tx + .send(mock_watcher::ScenarioActionStream::Ok(WatchResponse::Ok( + WatchEvent::Deleted(make_pod("uid0", "15")), + ))) + .await + .unwrap(); + + // Let the reflector work until watcher requests next event from + // the stream. + assert_eq!( + watcher_events_rx.next().await.unwrap(), + mock_watcher::ScenarioEvent::Stream + ); + + // Assert that the state didn't get the deletion (yet). + // State completes before the next item is requested from the + // watch stream, and since we waited for the stream item to + // be requested - we're guaranteed to have no race condition + // here. + assert!(state_events_rx.try_next().is_err()); + + // Advance the time 10 times the deletion delay. + tokio::time::advance(deletion_delay * 10).await; + + // At this point, maintenance should be performed, for both + // delayed deletion state and mock state. + + // Delayed deletes are processed first. + assert_eq!( + state_events_rx.next().await.unwrap().unwrap_op(), + (make_pod("uid0", "15"), state::mock::OpKind::Delete), + ); + + // Send the confirmation of the processing at the state. + state_actions_tx.send(()).await.unwrap(); + + // Then, the maintenance event should be triggered. + // This completes the `perform_maintenance` call. + assert!(matches!( + state_events_rx.next().await.unwrap(), + state::mock::ScenarioEvent::Maintenance + )); + + // Send the confirmation of the processing at the state. + state_actions_tx.send(()).await.unwrap(); + + // We're done with the test! Shutdown the stream and force an + // invocation error to terminate the reflector. + + // Watcher is still waiting for the item on stream. + // Send done notification to the stream. + watch_stream_tx + .send(mock_watcher::ScenarioActionStream::Done) + .await + .unwrap(); + + // Wait for next invocation and send an error to terminate the + // flow. + assert!(matches!( + watcher_events_rx.next().await.unwrap(), + mock_watcher::ScenarioEvent::Invocation(_) + )); + watcher_invocations_tx + .send(mock_watcher::ScenarioActionInvocation::ErrOther) + .await + .unwrap(); + }); + + // Run the test and wait for an error. + let result = reflector.run().await; + + // Join on the logic first, to report logic errors with higher + // priority. + logic.await.unwrap(); + + // The only way reflector completes is with an error, but that's ok. + // In tests we make it exit with an error to complete the test. + result.unwrap_err(); + + // Explicitly drop the reflector at the very end. + drop(reflector); + + // Unfreeze time. + tokio::time::resume(); + }) + } + + /// Test that stream error terminates the reflector. + #[test] + fn test_stream_error() { + test_util::trace_init(); + test_util::block_on_std(async move { + // Prepare state. + let (state_events_tx, _state_events_rx) = mpsc::channel(0); + let (_state_actions_tx, state_actions_rx) = mpsc::channel(0); + let state_writer = state::mock::Writer::new(state_events_tx, state_actions_rx); + let state_writer = state::instrumenting::Writer::new(state_writer); + + // Prepare watcher. + let (watcher_events_tx, mut watcher_events_rx) = mpsc::channel(0); + let (mut watcher_invocations_tx, watcher_invocations_rx) = mpsc::channel(0); + let watcher = MockWatcher::::new(watcher_events_tx, watcher_invocations_rx); + let watcher = InstrumentingWatcher::new(watcher); + + // Prepare reflector. + let mut reflector = + Reflector::new(watcher, state_writer, None, None, Duration::from_secs(1)); + + // Run test logic. + let logic = tokio::spawn(async move { + // Wait for watcher to request next invocation. + assert!(matches!( + watcher_events_rx.next().await.unwrap(), + mock_watcher::ScenarioEvent::Invocation(_) + )); + + // Provide watcher with a new stream. + let (mut watch_stream_tx, watch_stream_rx) = mpsc::channel(0); + watcher_invocations_tx + .send(mock_watcher::ScenarioActionInvocation::Ok(watch_stream_rx)) + .await + .unwrap(); + + // Wait for watcher to request next item from the stream. + assert_eq!( + watcher_events_rx.next().await.unwrap(), + mock_watcher::ScenarioEvent::Stream + ); + + // Send an error to the stream. + watch_stream_tx + .send(mock_watcher::ScenarioActionStream::Err) + .await + .unwrap(); + }); + + // Run the test and wait for an error. + let result = reflector.run().await; + + // Join on the logic first, to report logic errors with higher + // priority. + logic.await.unwrap(); + + // Assert that the reflector properly passed the error. + assert!(matches!( + result.unwrap_err(), + Error::Streaming { + source: mock_watcher::StreamError + } + )); + + // Explicitly drop the reflector at the very end. + drop(reflector); + }) + } + + /// Test that maintenance works accordingly. + #[test] + fn test_maintenance() { + test_util::trace_init(); + test_util::block_on_std(async move { + // Prepare state. + let (state_events_tx, mut state_events_rx) = mpsc::channel(0); + let (mut state_actions_tx, state_actions_rx) = mpsc::channel(0); + let (state_maintenance_request_events_tx, mut state_maintenance_request_events_rx) = + mpsc::channel(0); + let (mut state_maintenance_request_actions_tx, state_maintenance_request_actions_rx) = + mpsc::channel(0); + let state_writer = state::mock::Writer::new_with_maintenance( + state_events_tx, + state_actions_rx, + state_maintenance_request_events_tx, + state_maintenance_request_actions_rx, + ); + let state_writer = state::instrumenting::Writer::new(state_writer); + + // Prepare watcher. + let (watcher_events_tx, mut watcher_events_rx) = mpsc::channel(0); + let (mut watcher_invocations_tx, watcher_invocations_rx) = mpsc::channel(0); + let watcher = MockWatcher::::new(watcher_events_tx, watcher_invocations_rx); + let watcher = InstrumentingWatcher::new(watcher); + + // Prepare reflector. + let mut reflector = + Reflector::new(watcher, state_writer, None, None, Duration::from_secs(1)); + + // Run test logic. + let logic = tokio::spawn(async move { + // Wait for watcher to request next invocation. + assert!(matches!( + watcher_events_rx.next().await.unwrap(), + mock_watcher::ScenarioEvent::Invocation(_) + )); + + // Assert that maintenance request events didn't arrive yet. + assert!(state_maintenance_request_events_rx.try_next().is_err()); + + // Provide watcher with a new stream. + let (mut watch_stream_tx, watch_stream_rx) = mpsc::channel(0); + watcher_invocations_tx + .send(mock_watcher::ScenarioActionInvocation::Ok(watch_stream_rx)) + .await + .unwrap(); + + // Wait for reflector to request a state maintenance. + state_maintenance_request_events_rx.next().await.unwrap(); + + // Send the maintenance request action to advance to the + // maintenance. + state_maintenance_request_actions_tx.send(()).await.unwrap(); + + // Wait for a maintenance perform event arrival. + assert!(matches!( + state_events_rx.next().await.unwrap(), + state::mock::ScenarioEvent::Maintenance + )); + + // Send the confirmation of the state maintenance. + state_actions_tx.send(()).await.unwrap(); + + // Let the reflector work until watcher requests next event from + // the stream. + assert_eq!( + watcher_events_rx.next().await.unwrap(), + mock_watcher::ScenarioEvent::Stream + ); + + // We're done with the test! Shutdown the stream and force an + // invocation error to terminate the reflector. + + // Watcher is still waiting for the item on stream. + // Send done notification to the stream. + watch_stream_tx + .send(mock_watcher::ScenarioActionStream::Done) + .await + .unwrap(); + + // Wait for next invocation and send an error to terminate the + // flow. + assert!(matches!( + watcher_events_rx.next().await.unwrap(), + mock_watcher::ScenarioEvent::Invocation(_) + )); + watcher_invocations_tx + .send(mock_watcher::ScenarioActionInvocation::ErrOther) + .await + .unwrap(); + }); + + // Run the test and wait for an error. + let result = reflector.run().await; + + // Join on the logic first, to report logic errors with higher + // priority. + logic.await.unwrap(); + + // The only way reflector completes is with an error, but that's ok. + // In tests we make it exit with an error to complete the test. + result.unwrap_err(); + + // Explicitly drop the reflector at the very end. + drop(reflector); + }) + } + + // A helper function to run a flow test. + // Use this to test various flows without the test code repetition. + fn run_flow_test( + invocations: Vec<(StateSnapshot, Option, ExpInvRes)>, + expected_resulting_state: StateSnapshot, + ) { + test_util::block_on_std(async move { + // Freeze time. + tokio::time::pause(); + + // Prepare state. + let (state_reader, state_writer) = evmap10::new(); + let state_writer = state::evmap::Writer::new(state_writer, None); // test without debounce to avouid complexity + let state_writer = state::instrumenting::Writer::new(state_writer); + let resulting_state_reader = state_reader.clone(); + + // Prepare watcher. + let (watcher_events_tx, mut watcher_events_rx) = mpsc::channel(0); + let (mut watcher_invocations_tx, watcher_invocations_rx) = mpsc::channel(0); + let watcher: MockWatcher = + MockWatcher::new(watcher_events_tx, watcher_invocations_rx); + let watcher = InstrumentingWatcher::new(watcher); + + // Prepare reflector. + let pause_between_requests = Duration::from_secs(60 * 60); // 1 hour + let mut reflector = + Reflector::new(watcher, state_writer, None, None, pause_between_requests); + + // Run test logic. + let logic = tokio::spawn(async move { + // Process the invocations. + for ( + expected_state_before_op, + expected_resource_version, + expected_invocation_response, + ) in invocations + { + // Wait for watcher to request next invocation. + let invocation_event = watcher_events_rx.next().await.unwrap(); + + // Assert that we obtained an invocation event. + let watch_optional = match invocation_event { + mock_watcher::ScenarioEvent::Invocation(val) => val, + _ => panic!("unexpected event from watcher mock"), + }; + + // Assert the current state while within the watcher stream + // item production code. + let state = gather_state(&state_reader); + assert_eq!(state, expected_state_before_op); + + // Assert the resource version passed with watch invocation. + assert_eq!(watch_optional.resource_version, expected_resource_version); + + // Determine the requested action from the test scenario. + let responses = match expected_invocation_response { + // Stream is requested, continue with the current flow. + ExpInvRes::Stream(responses) => responses, + // Desync is requested, complete the invocation with the desync. + ExpInvRes::Desync => { + // Send the desync action to mock watcher. + watcher_invocations_tx + .send(mock_watcher::ScenarioActionInvocation::ErrDesync) + .await + .unwrap(); + continue; + } + }; + + // Prepare channels for use in stream of the watch mock. + let (mut watch_stream_tx, watch_stream_rx) = mpsc::channel(0); + + // Send the stream action to the watch invocation. + watcher_invocations_tx + .send(mock_watcher::ScenarioActionInvocation::Ok(watch_stream_rx)) + .await + .unwrap(); + + for response in responses { + // Wait for watcher to request next item from the stream. + assert_eq!( + watcher_events_rx.next().await.unwrap(), + mock_watcher::ScenarioEvent::Stream + ); + + // Send the requested action to the stream. + watch_stream_tx + .send(mock_watcher::ScenarioActionStream::Ok(WatchResponse::Ok( + response, + ))) + .await + .unwrap(); + } + + // Wait for watcher to request next item from the stream. + assert_eq!( + watcher_events_rx.next().await.unwrap(), + mock_watcher::ScenarioEvent::Stream + ); + + // Send the notification that the stream is over. + watch_stream_tx + .send(mock_watcher::ScenarioActionStream::Done) + .await + .unwrap(); + + // Advance the time to scroll pass the delay till next + // invocation. + tokio::time::advance(pause_between_requests * 2).await; + } + + // We're done with the test! Shutdown the stream and force an + // invocation error to terminate the reflector. + + // Wait for next invocation and send an error to terminate the + // flow. + assert!(matches!( + watcher_events_rx.next().await.unwrap(), + mock_watcher::ScenarioEvent::Invocation(_) + )); + watcher_invocations_tx + .send(mock_watcher::ScenarioActionInvocation::ErrOther) + .await + .unwrap(); + }); + + // Run the test and wait for an error. + let result = reflector.run().await; + + // Join on the logic first, to report logic errors with higher + // priority. + logic.await.unwrap(); + + // The only way reflector completes is with an error, but that's ok. + // In tests we make it exit with an error to complete the test. + result.unwrap_err(); + + // Assert the state after the reflector exit. + let resulting_state = gather_state(&resulting_state_reader); + assert_eq!(resulting_state, expected_resulting_state); + + // Explicitly drop the reflector at the very end. + // Internal evmap is dropped with the reflector, so readers won't + // work after drop. + drop(reflector); + + // Unfreeze time. + tokio::time::resume(); + }) + } +} diff --git a/src/kubernetes/resource_version.rs b/src/kubernetes/resource_version.rs new file mode 100644 index 0000000000000..d611381255b51 --- /dev/null +++ b/src/kubernetes/resource_version.rs @@ -0,0 +1,74 @@ +//! A resource version types to ensure proper usage protocol. + +use k8s_openapi::apimachinery::pkg::apis::meta::v1::{ObjectMeta, WatchEvent}; +use k8s_openapi::Metadata; + +/// Resource version state in the context of a chain of watch requests. +#[derive(Debug, Clone, Default)] +pub struct State(Option); + +impl State { + /// Create a new resource version [`State`]. + pub fn new() -> Self { + Self(None) + } + + /// Update the resource version from a candidate obtained earlier. + /// + /// Returns the previous state. + pub fn update(&mut self, candidate: Candidate) -> Option { + self.0.replace(candidate.0) + } + + /// Reset the resource version. Use in case of a desync. + /// + /// Returns the previous state. + pub fn reset(&mut self) -> Option { + self.0.take() + } + + /// Get the current resource version value. + pub fn get(&self) -> Option<&str> { + Some(self.0.as_ref()?.as_str()) + } +} + +/// A resource version candidate, can be used to update the resource version. +pub struct Candidate(String); + +impl Candidate { + /// Obtain a resource version [`Candidate`] from a [`WatchEvent`]. + pub fn from_watch_event(event: &WatchEvent) -> Option + where + T: Metadata, + { + let object = match event { + WatchEvent::Added(object) + | WatchEvent::Modified(object) + | WatchEvent::Deleted(object) => object, + WatchEvent::Bookmark { resource_version } => { + return Some(Self(resource_version.clone())) + } + WatchEvent::ErrorStatus(_) | WatchEvent::ErrorOther(_) => return None, + }; + Self::from_object(object) + } + + /// Obtain a resource version [`Candidate`] from a object of type `T`. + pub fn from_object(object: &T) -> Option + where + T: Metadata, + { + let metadata = object.metadata(); + + let new_resource_version = match metadata.resource_version { + Some(ref val) => val, + None => { + warn!(message = "Got empty resource version at object metadata"); + return None; + } + }; + + Some(Self(new_resource_version.clone())) + } +} diff --git a/src/kubernetes/state/delayed_delete.rs b/src/kubernetes/state/delayed_delete.rs new file mode 100644 index 0000000000000..7dad291f6112a --- /dev/null +++ b/src/kubernetes/state/delayed_delete.rs @@ -0,0 +1,296 @@ +//! A state wrapper that delays deletes. + +use async_trait::async_trait; +use futures::{future::BoxFuture, FutureExt}; +use std::{collections::VecDeque, time::Duration}; +use tokio::time::{delay_until, Instant}; + +/// A [`super::Write`] implementatiom that wraps another [`super::Write`] and +/// delays the delete calls. +/// Implements the logic for delaying the deletion of items from the storage. +pub struct Writer +where + T: super::Write + Send, + ::Item: Send + Sync, +{ + inner: T, + queue: VecDeque<(::Item, Instant)>, + delay_for: Duration, +} + +impl Writer +where + T: super::Write + Send, + ::Item: Send + Sync, +{ + /// Take a [`super::Write`] and return it wrapped with [`Self`]. + pub fn new(inner: T, delay_for: Duration) -> Self { + let queue = VecDeque::new(); + Self { + inner, + queue, + delay_for, + } + } +} + +impl Writer +where + T: super::Write + Send, + ::Item: Send + Sync, +{ + /// Schedules the delayed deletion of the item at the future. + pub fn schedule_delete(&mut self, item: ::Item) { + let deadline = Instant::now() + self.delay_for; + self.queue.push_back((item, deadline)); + } + + /// Clear the delayed deletion requests. + pub fn clear(&mut self) { + self.queue.clear(); + } + + /// Perform the queued deletions. + pub async fn perform(&mut self) { + let now = Instant::now(); + while let Some((_, deadline)) = self.queue.front() { + let deadline = *deadline; + trace!(message = "got delayed deletion deadline", ?deadline, ?now); + if deadline > now { + break; + } + trace!( + message = "processing delayed deletion for deadline", + ?deadline, + ?now + ); + let (item, _) = self.queue.pop_front().unwrap(); + self.inner.delete(item).await; + } + } + + /// Obtain the next deadline. + pub fn next_deadline(&self) -> Option { + self.queue.front().map(|(_, instant)| *instant) + } +} + +#[async_trait] +impl super::Write for Writer +where + T: super::Write + Send, + ::Item: Send + Sync, +{ + type Item = ::Item; + + async fn add(&mut self, item: Self::Item) { + self.inner.add(item).await + } + + async fn update(&mut self, item: Self::Item) { + self.inner.update(item).await + } + + async fn delete(&mut self, item: Self::Item) { + let deadline = Instant::now() + self.delay_for; + self.queue.push_back((item, deadline)); + } + + async fn resync(&mut self) { + self.queue.clear(); + self.inner.resync().await + } +} + +#[async_trait] +impl super::MaintainedWrite for Writer +where + T: super::MaintainedWrite + Send, + ::Item: Send + Sync, +{ + fn maintenance_request(&mut self) -> Option> { + let delayed_delete_deadline = self.next_deadline().map(delay_until); + let downstream = self.inner.maintenance_request(); + + match (downstream, delayed_delete_deadline) { + (Some(downstream), Some(delayed_delete_deadline)) => { + let fut = futures::future::select(downstream, delayed_delete_deadline) + .map(|either| either.factor_first().0); + Some(Box::pin(fut)) + } + (None, Some(delayed_delete_deadline)) => Some(Box::pin(delayed_delete_deadline)), + (Some(downstream), None) => Some(downstream), + (None, None) => None, + } + } + + async fn perform_maintenance(&mut self) { + // Perform delayed deletes. + self.perform().await; + + // Do the downstream maintenance. + self.inner.perform_maintenance().await; + } +} + +#[cfg(test)] +mod tests { + use super::super::{mock, MaintainedWrite, Write}; + use super::*; + use crate::test_util; + use futures::{channel::mpsc, SinkExt, StreamExt}; + use k8s_openapi::{api::core::v1::Pod, apimachinery::pkg::apis::meta::v1::ObjectMeta}; + + const DELAY_FOR: Duration = Duration::from_secs(3600); + + fn prepare_test() -> ( + Writer>, + mpsc::Receiver>, + mpsc::Sender<()>, + ) { + let (events_tx, events_rx) = mpsc::channel(0); + let (actions_tx, actions_rx) = mpsc::channel(0); + let writer = mock::Writer::new(events_tx, actions_rx); + let writer = Writer::new(writer, DELAY_FOR); + (writer, events_rx, actions_tx) + } + + fn make_pod() -> Pod { + Pod { + metadata: ObjectMeta { + name: Some("pod_name".to_owned()), + uid: Some("pod_uid".to_owned()), + ..ObjectMeta::default() + }, + ..Pod::default() + } + } + + fn no_maintenance_test_flow(ft: FT, fa: FA) + where + FT: for<'a> FnOnce(&'a mut (dyn Write + Send)) -> BoxFuture<'a, ()> + + Send + + 'static, + FA: FnOnce(mock::ScenarioEvent) + Send + 'static, + { + test_util::trace_init(); + test_util::block_on_std(async move { + tokio::time::pause(); + let (mut writer, mut events_rx, mut actions_tx) = prepare_test(); + + // Ensure that right after construction maintenance is not required. + assert!(writer.maintenance_request().is_none()); + + let join = { + tokio::spawn(async move { + let event = events_rx.next().await.unwrap(); + fa(event); + actions_tx.send(()).await.unwrap(); + }) + }; + + // Ensure that before the operation maintenance is not required. + assert!(writer.maintenance_request().is_none()); + + { + let fut = ft(&mut writer); + // pin_mut!(fut); + fut.await; + } + + // Ensure that after the operation maintenance is not required. + assert!(writer.maintenance_request().is_none()); + + join.await.unwrap(); + tokio::time::resume(); + + // Ensure that finally maintenance is not required. + assert!(writer.maintenance_request().is_none()); + }) + } + + #[test] + fn add() { + let pod = make_pod(); + let assert_pod = pod.clone(); + no_maintenance_test_flow( + |writer| Box::pin(writer.add(pod)), + |event| assert_eq!(event.unwrap_op(), (assert_pod, mock::OpKind::Add)), + ) + } + + #[test] + fn update() { + let pod = make_pod(); + let assert_pod = pod.clone(); + no_maintenance_test_flow( + |writer| Box::pin(writer.update(pod)), + |event| assert_eq!(event.unwrap_op(), (assert_pod, mock::OpKind::Update)), + ) + } + + #[test] + fn delete() { + test_util::trace_init(); + test_util::block_on_std(async { + // Freeze time. + tokio::time::pause(); + + // Prepare test parameters. + let (mut writer, mut events_rx, mut actions_tx) = prepare_test(); + + // Ensure that right after construction maintenance is not required. + assert!(writer.maintenance_request().is_none()); + + // Prepare a mock pod. + let pod = make_pod(); + + writer.delete(pod.clone()).await; + + // Ensure the deletion event didn't trigger the actual deletion immediately. + assert!(events_rx.try_next().is_err()); + + // Ensure maintenance request is now present. + let maintenance_request = writer + .maintenance_request() + .expect("maintenance request should be present"); + + // Advance time. + tokio::time::advance(DELAY_FOR * 2).await; + + // At this point, maintenance request should be ready. + maintenance_request.await; + + // Run the assertion on the delete operation to ensure maintenance + // actually causes a delete. + let join = tokio::spawn(async move { + // Control for the deletion action. + let event = events_rx.next().await.unwrap(); + assert_eq!(event.unwrap_op(), (pod, mock::OpKind::Delete)); + actions_tx.send(()).await.unwrap(); + + // Control for the mock perform maintenance call (donwstream maintenance). + let event = events_rx.next().await.unwrap(); + assert!(matches!(event, mock::ScenarioEvent::Maintenance)); + actions_tx.send(()).await.unwrap(); + }); + + // Perform maintenance. + writer.perform_maintenance().await; + + // Join on assertion to guarantee panic propagation. + join.await.unwrap(); + + // Unfreeze time. + tokio::time::resume(); + }) + } + + #[test] + fn resync() { + no_maintenance_test_flow( + |writer| Box::pin(writer.resync()), + |event| assert!(matches!(event, mock::ScenarioEvent::Resync)), + ) + } +} diff --git a/src/kubernetes/state/evmap.rs b/src/kubernetes/state/evmap.rs new file mode 100644 index 0000000000000..fa881341799aa --- /dev/null +++ b/src/kubernetes/state/evmap.rs @@ -0,0 +1,203 @@ +//! A state implementation backed by [`evmap10`]. + +use crate::kubernetes::{debounce::Debounce, hash_value::HashValue}; +use async_trait::async_trait; +use evmap10::WriteHandle; +use futures::future::BoxFuture; +use k8s_openapi::{apimachinery::pkg::apis::meta::v1::ObjectMeta, Metadata}; +use std::time::Duration; + +/// A [`WriteHandle`] wrapper that implements [`super::Write`]. +/// For use as a state writer implementation for +/// [`crate::kubernetes::Reflector`]. +pub struct Writer +where + T: Metadata + Send, +{ + inner: WriteHandle>, + debounced_flush: Option, +} + +impl Writer +where + T: Metadata + Send, +{ + /// Take a [`WriteHandle`], initialize it and return it wrapped with + /// [`Self`]. + pub fn new( + mut inner: WriteHandle>, + flush_debounce_timeout: Option, + ) -> Self { + // Prepare inner. + inner.purge(); + inner.refresh(); + + // Prepare flush debounce. + let debounced_flush = flush_debounce_timeout.map(Debounce::new); + + Self { + inner, + debounced_flush, + } + } + + /// Debounced `flush`. + /// When a number of flush events arrive un a row, we buffer them such that + /// only the last one in the chain is propagated. + /// This is intended to improve the state behaivor at resync - by delaying + /// the `flush` proparagion, we maximize the time `evmap` remains populated, + /// ideally allowing a single transition from non-populated to populated + /// state. + fn debounced_flush(&mut self) { + if let Some(ref mut debounced_flush) = self.debounced_flush { + debounced_flush.signal(); + } else { + self.inner.flush(); + } + } +} + +#[async_trait] +impl super::Write for Writer +where + T: Metadata + Send, +{ + type Item = T; + + async fn add(&mut self, item: Self::Item) { + if let Some((key, value)) = kv(item) { + self.inner.insert(key, value); + self.debounced_flush(); + } + } + + async fn update(&mut self, item: Self::Item) { + if let Some((key, value)) = kv(item) { + self.inner.update(key, value); + self.debounced_flush(); + } + } + + async fn delete(&mut self, item: Self::Item) { + if let Some((key, _value)) = kv(item) { + self.inner.empty(key); + self.debounced_flush(); + } + } + + async fn resync(&mut self) { + // By omiting the flush here, we cache the results from the + // previous run until flush is issued when the new events + // begin arriving, reducing the time during which the state + // has no data. + self.inner.purge(); + } +} + +#[async_trait] +impl super::MaintainedWrite for Writer +where + T: Metadata + Send, +{ + fn maintenance_request(&mut self) -> Option> { + if let Some(ref mut debounced_flush) = self.debounced_flush { + if debounced_flush.is_debouncing() { + return Some(Box::pin(debounced_flush.debounced())); + } + } + None + } + + async fn perform_maintenance(&mut self) { + if self.debounced_flush.is_some() { + self.inner.flush(); + } + } +} + +/// An alias to the value used at [`evmap`]. +pub type Value = Box>; + +/// Build a key value pair for using in [`evmap`]. +fn kv>(object: T) -> Option<(String, Value)> { + let value = Box::new(HashValue::new(object)); + let key = value.uid()?.to_owned(); + Some((key, value)) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::kubernetes::state::{MaintainedWrite, Write}; + use k8s_openapi::api::core::v1::Pod; + + fn make_pod(uid: &str) -> Pod { + Pod { + metadata: ObjectMeta { + uid: Some(uid.to_owned()), + ..ObjectMeta::default() + }, + ..Pod::default() + } + } + + #[test] + fn test_kv() { + let pod = make_pod("uid"); + let (key, val) = kv(pod.clone()).unwrap(); + assert_eq!(key, "uid"); + assert_eq!(val, Box::new(HashValue::new(pod))); + } + + #[tokio::test] + async fn test_without_debounce() { + let (state_reader, state_writer) = evmap10::new(); + let mut state_writer = Writer::new(state_writer, None); + + assert_eq!(state_reader.is_empty(), true); + assert!(state_writer.maintenance_request().is_none()); + + state_writer.add(make_pod("uid0")).await; + + assert_eq!(state_reader.is_empty(), false); + assert!(state_writer.maintenance_request().is_none()); + + drop(state_writer); + } + + #[tokio::test] + async fn test_with_debounce() { + // Due to https://github.com/tokio-rs/tokio/issues/2090 we're not + // pausing the time. + + let (state_reader, state_writer) = evmap10::new(); + let flush_debounce_timeout = Duration::from_millis(100); + let mut state_writer = Writer::new(state_writer, Some(flush_debounce_timeout)); + + assert_eq!(state_reader.is_empty(), true); + assert!(state_writer.maintenance_request().is_none()); + + state_writer.add(make_pod("uid0")).await; + state_writer.add(make_pod("uid1")).await; + + assert_eq!(state_reader.is_empty(), true); + assert!(state_writer.maintenance_request().is_some()); + + let join = tokio::spawn(async move { + let mut state_writer = state_writer; + state_writer.maintenance_request().unwrap().await; + state_writer.perform_maintenance().await; + state_writer + }); + + assert_eq!(state_reader.is_empty(), true); + + tokio::time::delay_for(flush_debounce_timeout * 2).await; + let mut state_writer = join.await.unwrap(); + + assert_eq!(state_reader.is_empty(), false); + assert!(state_writer.maintenance_request().is_none()); + + drop(state_writer); + } +} diff --git a/src/kubernetes/state/instrumenting.rs b/src/kubernetes/state/instrumenting.rs new file mode 100644 index 0000000000000..979d2794dd415 --- /dev/null +++ b/src/kubernetes/state/instrumenting.rs @@ -0,0 +1,355 @@ +//! An instrumenting state wrapper. + +use crate::internal_events::kubernetes::instrumenting_state as internal_events; +use async_trait::async_trait; +use futures::future::BoxFuture; + +/// A [`super::Write`] implementatiom that wraps another [`super::Write`] and +/// adds instrumentation. +pub struct Writer { + inner: T, +} + +impl Writer { + /// Take a [`super::Write`] and return it wrapped with [`Self`]. + pub fn new(inner: T) -> Self { + Self { inner } + } +} + +#[async_trait] +impl super::Write for Writer +where + T: super::Write + Send, +{ + type Item = ::Item; + + async fn add(&mut self, item: Self::Item) { + emit!(internal_events::StateItemAdded); + self.inner.add(item).await + } + + async fn update(&mut self, item: Self::Item) { + emit!(internal_events::StateItemUpdated); + self.inner.update(item).await + } + + async fn delete(&mut self, item: Self::Item) { + emit!(internal_events::StateItemDeleted); + self.inner.delete(item).await + } + + async fn resync(&mut self) { + emit!(internal_events::StateResynced); + self.inner.resync().await + } +} + +#[async_trait] +impl super::MaintainedWrite for Writer +where + T: super::MaintainedWrite + Send, +{ + fn maintenance_request(&mut self) -> Option> { + self.inner.maintenance_request().map(|future| { + emit!(internal_events::StateMaintenanceRequested); + future + }) + } + + async fn perform_maintenance(&mut self) { + emit!(internal_events::StateMaintenancePerformed); + self.inner.perform_maintenance().await + } +} + +#[cfg(test)] +mod tests { + use super::super::{mock, MaintainedWrite, Write}; + use super::*; + use crate::test_util; + use futures::{channel::mpsc, SinkExt, StreamExt}; + use k8s_openapi::{api::core::v1::Pod, apimachinery::pkg::apis::meta::v1::ObjectMeta}; + use once_cell::sync::OnceCell; + use std::sync::{Mutex, MutexGuard}; + + fn prepare_test() -> ( + Writer>, + mpsc::Receiver>, + mpsc::Sender<()>, + ) { + let (events_tx, events_rx) = mpsc::channel(0); + let (actions_tx, actions_rx) = mpsc::channel(0); + let writer = mock::Writer::new(events_tx, actions_rx); + let writer = Writer::new(writer); + (writer, events_rx, actions_tx) + } + + fn make_pod() -> Pod { + Pod { + metadata: ObjectMeta { + name: Some("pod_name".to_owned()), + uid: Some("pod_uid".to_owned()), + ..ObjectMeta::default() + }, + ..Pod::default() + } + } + + fn get_metric_value(op_kind: &'static str) -> Option { + let controller = crate::metrics::CONTROLLER.get().unwrap_or_else(|| { + crate::metrics::init().unwrap(); + crate::metrics::CONTROLLER + .get() + .expect("failed to init metric container") + }); + + let key = metrics_core::Key::from_name_and_labels( + "k8s_state_ops", + vec![metrics_core::Label::new("op_kind", op_kind)], + ); + controller + .snapshot() + .into_measurements() + .into_iter() + .find_map(|(candidate_key, measurement)| { + if candidate_key == key { + Some(measurement) + } else { + None + } + }) + } + + fn assert_counter_changed( + before: Option, + after: Option, + expected_difference: u64, + ) { + let before = before.unwrap_or_else(|| metrics_runtime::Measurement::Counter(0)); + let after = after.unwrap_or_else(|| metrics_runtime::Measurement::Counter(0)); + + let (before, after) = match (before, after) { + ( + metrics_runtime::Measurement::Counter(before), + metrics_runtime::Measurement::Counter(after), + ) => (before, after), + _ => panic!("metrics kind mismatch"), + }; + + let difference = after - before; + + assert_eq!(difference, expected_difference); + } + + /// Guarantees only one test will run at a time. + /// This is required because we assert on a global state, and we don't + /// want interference. + fn tests_lock() -> MutexGuard<'static, ()> { + static INSTANCE: OnceCell> = OnceCell::new(); + INSTANCE.get_or_init(|| Mutex::new(())).lock().unwrap() + } + + // TODO: tests here are ignored because they cause interference with + // the metrics tests. + // There is no way to assert individual emits, and asserting metrics + // directly causes issues: + // - these tests break the internal tests at the metrics implementation + // itself, since we end up initializing the metrics controller twice; + // - testing metrics introduces unintended coupling between subsystems, + // ideally we only need to assert that we emit, but avoid assumptions on + // what the results of that emit are. + // Unignore them and/or properly reimplemenmt once the issues above are + // resolved. + + #[ignore] + #[test] + fn add() { + let _guard = tests_lock(); + test_util::trace_init(); + test_util::block_on_std(async { + let (mut writer, mut events_rx, mut actions_tx) = prepare_test(); + + let pod = make_pod(); + + let join = { + let pod = pod.clone(); + let before = get_metric_value("item_added"); + tokio::spawn(async move { + assert_eq!( + events_rx.next().await.unwrap().unwrap_op(), + (pod, mock::OpKind::Add) + ); + + // By now metrics should've updated. + let after = get_metric_value("item_added"); + assert_counter_changed(before, after, 1); + + actions_tx.send(()).await.unwrap(); + }) + }; + + writer.add(pod).await; + join.await.unwrap(); + }) + } + + #[ignore] + #[test] + fn update() { + let _guard = tests_lock(); + test_util::trace_init(); + test_util::block_on_std(async { + let (mut writer, mut events_rx, mut actions_tx) = prepare_test(); + + let pod = make_pod(); + + let join = { + let pod = pod.clone(); + let before = get_metric_value("item_updated"); + tokio::spawn(async move { + assert_eq!( + events_rx.next().await.unwrap().unwrap_op(), + (pod, mock::OpKind::Update) + ); + + // By now metrics should've updated. + let after = get_metric_value("item_updated"); + assert_counter_changed(before, after, 1); + + actions_tx.send(()).await.unwrap(); + }) + }; + + writer.update(pod).await; + join.await.unwrap(); + }) + } + + #[ignore] + #[test] + fn delete() { + let _guard = tests_lock(); + test_util::trace_init(); + test_util::block_on_std(async { + let (mut writer, mut events_rx, mut actions_tx) = prepare_test(); + + let pod = make_pod(); + + let join = { + let pod = pod.clone(); + let before = get_metric_value("item_deleted"); + tokio::spawn(async move { + assert_eq!( + events_rx.next().await.unwrap().unwrap_op(), + (pod, mock::OpKind::Delete) + ); + + // By now metrics should've updated. + let after = get_metric_value("item_deleted"); + assert_counter_changed(before, after, 1); + + actions_tx.send(()).await.unwrap(); + }) + }; + + writer.delete(pod).await; + join.await.unwrap(); + }) + } + + #[ignore] + #[test] + fn resync() { + let _guard = tests_lock(); + test_util::trace_init(); + test_util::block_on_std(async { + let (mut writer, mut events_rx, mut actions_tx) = prepare_test(); + + let join = { + let before = get_metric_value("resynced"); + tokio::spawn(async move { + assert!(matches!( + events_rx.next().await.unwrap(), + mock::ScenarioEvent::Resync + )); + + let after = get_metric_value("resynced"); + assert_counter_changed(before, after, 1); + + actions_tx.send(()).await.unwrap(); + }) + }; + + writer.resync().await; + join.await.unwrap(); + }) + } + + #[ignore] + #[test] + fn request_maintenance_without_maintenance() { + let _guard = tests_lock(); + test_util::trace_init(); + test_util::block_on_std(async { + let (mut writer, _events_rx, _actions_tx) = prepare_test(); + let before = get_metric_value("maintenace_requested"); + let _ = writer.maintenance_request(); + let after = get_metric_value("maintenace_requested"); + assert_counter_changed(before, after, 0); + }) + } + + #[ignore] + #[test] + fn request_maintenance_with_maintenance() { + let _guard = tests_lock(); + test_util::trace_init(); + test_util::block_on_std(async { + let (events_tx, _events_rx) = mpsc::channel(0); + let (_actions_tx, actions_rx) = mpsc::channel(0); + let (maintenance_request_events_tx, _maintenance_request_events_rx) = mpsc::channel(0); + let (_maintenance_request_actions_tx, maintenance_request_actions_rx) = + mpsc::channel(0); + let writer = mock::Writer::::new_with_maintenance( + events_tx, + actions_rx, + maintenance_request_events_tx, + maintenance_request_actions_rx, + ); + let mut writer = Writer::new(writer); + let before = get_metric_value("maintenace_requested"); + let _ = writer.maintenance_request(); + let after = get_metric_value("maintenace_requested"); + assert_counter_changed(before, after, 1); + }) + } + + #[ignore] + #[test] + fn perform_maintenance() { + let _guard = tests_lock(); + test_util::trace_init(); + test_util::block_on_std(async { + let (mut writer, mut events_rx, mut actions_tx) = prepare_test(); + + let join = { + let before = get_metric_value("maintenace_performed"); + tokio::spawn(async move { + assert!(matches!( + events_rx.next().await.unwrap(), + mock::ScenarioEvent::Maintenance + )); + + let after = get_metric_value("maintenace_performed"); + assert_counter_changed(before, after, 1); + + actions_tx.send(()).await.unwrap(); + }) + }; + + writer.perform_maintenance().await; + join.await.unwrap(); + }) + } +} diff --git a/src/kubernetes/state/mock.rs b/src/kubernetes/state/mock.rs new file mode 100644 index 0000000000000..684df8a9e4ca1 --- /dev/null +++ b/src/kubernetes/state/mock.rs @@ -0,0 +1,172 @@ +//! A mock state. + +#![cfg(test)] + +use async_trait::async_trait; +use futures::{ + channel::mpsc::{Receiver, Sender}, + future::BoxFuture, + SinkExt, StreamExt, +}; +use k8s_openapi::{apimachinery::pkg::apis::meta::v1::ObjectMeta, Metadata}; + +/// The kind of item-scoped operation. +#[derive(Debug, PartialEq, Eq)] +pub enum OpKind { + /// Item added. + Add, + /// Item updated. + Update, + /// Item deleted. + Delete, +} + +/// An event that's send to the test scenario driver for operations flow. +pub enum ScenarioEvent +where + T: Metadata + Send, +{ + /// An item-scoped operation. + Item(T, OpKind), + /// Resync operation. + Resync, + /// Maintenance is performed. + Maintenance, +} + +impl ScenarioEvent +where + T: Metadata + Send, +{ + pub fn unwrap_op(self) -> (T, OpKind) { + match self { + ScenarioEvent::Item(val, op) => (val, op), + _ => panic!("unwrap_op on non-item op"), + } + } +} + +/// Mock writer. +/// +/// Uses channels to communicate with the test scenario driver. +/// +/// When the call is made on the mock - sends an event to the `events_tx` and +/// waits for at action to conduct in response to the event `actions_rx`. +/// +/// Note: the only action available in the [`super::Write`] is to just continue +/// and return. +pub struct Writer +where + T: Metadata + Send, +{ + events_tx: Sender>, + actions_rx: Receiver<()>, + maintenance_request: Option<(Sender<()>, Receiver<()>)>, +} + +impl Writer +where + T: Metadata + Send, +{ + /// Create a new mock writer. + /// Takes: + /// - `events_tx` - a message is sent here at the beginning of the + /// operation. + /// - `actions_rx` - a message is read from here before the operation + /// returns. + pub fn new(events_tx: Sender>, actions_rx: Receiver<()>) -> Self { + Self { + events_tx, + actions_rx, + maintenance_request: None, + } + } + + /// Create a new mock writer (with maintenance flow). + /// Takes: + /// - `events_tx` - a message is sent here at the beginning of the + /// operation. + /// - `actions_rx` - a message is read from here before the operation + /// returns; + /// - `maintenance_request_events_tx` - a message is sent here at the + /// beginning of the maintenance request; + /// - `maintenance_request_events_tx` - a message is read from here before + /// the maintenance request returns. + pub fn new_with_maintenance( + events_tx: Sender>, + actions_rx: Receiver<()>, + maintenance_request_events_tx: Sender<()>, + maintenance_request_actions_rx: Receiver<()>, + ) -> Self { + Self { + events_tx, + actions_rx, + maintenance_request: Some(( + maintenance_request_events_tx, + maintenance_request_actions_rx, + )), + } + } +} + +#[async_trait] +impl super::Write for Writer +where + T: Metadata + Send, +{ + type Item = T; + + async fn add(&mut self, item: Self::Item) { + self.events_tx + .send(ScenarioEvent::Item(item, OpKind::Add)) + .await + .unwrap(); + self.actions_rx.next().await.unwrap(); + } + + async fn update(&mut self, item: Self::Item) { + self.events_tx + .send(ScenarioEvent::Item(item, OpKind::Update)) + .await + .unwrap(); + self.actions_rx.next().await.unwrap(); + } + + async fn delete(&mut self, item: Self::Item) { + self.events_tx + .send(ScenarioEvent::Item(item, OpKind::Delete)) + .await + .unwrap(); + self.actions_rx.next().await.unwrap(); + } + + async fn resync(&mut self) { + self.events_tx.send(ScenarioEvent::Resync).await.unwrap(); + self.actions_rx.next().await.unwrap(); + } +} + +#[async_trait] +impl super::MaintainedWrite for Writer +where + T: Metadata + Send, +{ + fn maintenance_request(&mut self) -> Option> { + if let Some((ref mut events_tx, ref mut actions_rx)) = self.maintenance_request { + Some(Box::pin(async move { + events_tx.send(()).await.unwrap(); + actions_rx.next().await.unwrap(); + })) + } else { + None + } + } + + async fn perform_maintenance(&mut self) { + self.events_tx + .send(ScenarioEvent::Maintenance) + .await + .unwrap(); + self.actions_rx.next().await.unwrap(); + } +} diff --git a/src/kubernetes/state/mod.rs b/src/kubernetes/state/mod.rs new file mode 100644 index 0000000000000..e5dc91096f768 --- /dev/null +++ b/src/kubernetes/state/mod.rs @@ -0,0 +1,64 @@ +//! Local representation of the Kubernetes API resources state. + +use async_trait::async_trait; +use futures::future::BoxFuture; +use k8s_openapi::{apimachinery::pkg::apis::meta::v1::ObjectMeta, Metadata}; + +pub mod delayed_delete; +pub mod evmap; +pub mod instrumenting; +pub mod mock; + +/// Provides the interface for write access to the cached state. +/// Used by [`super::reflector::Reflector`]. +/// +/// This abstraction allows easily stacking storage behaviour logic, without +/// exploding the complexity at the [`super::reflector::Reflector`] level. +#[async_trait] +pub trait Write { + /// A type of the k8s resource the state operates on. + type Item: Metadata + Send; + + /// Add an object to the state. + async fn add(&mut self, item: Self::Item); + + /// Update an object at the state. + async fn update(&mut self, item: Self::Item); + + /// Delete on object from the state. + async fn delete(&mut self, item: Self::Item); + + /// Notify the state that resync is in progress. + async fn resync(&mut self); +} + +/// An extension of the [`Write`] type that adds maintenance support. +#[async_trait] +pub trait MaintainedWrite: Write { + /// A future that resolves when maintenance is required. + /// + /// Does not perform the maintenance itself, users must call + /// [`perform_maintenance`] to actually perform the maintenance. + /// + /// `None` if the state doesn't require maintenance, and + /// [`perform_maintenance`] shouldn't be called. + /// [`futures::future::FusedFuture`] should've been used here, but it's + /// not not trivially implementable with `async/await` syntax, so [`Option`] + /// wrapper is used instead for the same purpose. + /// + /// Circumstances of whether maintenance is required or not can change at + /// runtime. for instance, whether the maintenance is required can + /// depend on whether state is empty on not. Ultimately it's up to the state + /// implementation to decide whether maintenance is needed or not. + fn maintenance_request(&mut self) -> Option>; + + /// Perform the maintenance. + /// + /// If this function is called when no maintenance is required, this + /// function should just return. + /// + /// Wrapper [`MaintainedWrite`]s should always call the + /// `perform_maintenance` of the wrapped state when `perform_maintenance` is + /// called. + async fn perform_maintenance(&mut self); +} diff --git a/src/kubernetes/stream.rs b/src/kubernetes/stream.rs new file mode 100644 index 0000000000000..8b28007370526 --- /dev/null +++ b/src/kubernetes/stream.rs @@ -0,0 +1,169 @@ +//! Work with HTTP bodies as streams of Kubernetes resources. + +use super::multi_response_decoder::MultiResponseDecoder; +use crate::internal_events::kubernetes::stream as internal_events; +use async_stream::try_stream; +use bytes05::Buf; +use futures::pin_mut; +use futures::stream::Stream; +use hyper::body::HttpBody as Body; +use k8s_openapi::{Response, ResponseError}; +use snafu::{ResultExt, Snafu}; + +/// Converts the HTTP response [`Body`] to a stream of parsed Kubernetes +/// [`Response`]s. +pub fn body(body: B) -> impl Stream::Error>>> +where + T: Response + Unpin + 'static, + B: Body, + ::Error: std::error::Error + 'static + Unpin, +{ + try_stream! { + let mut decoder: MultiResponseDecoder = MultiResponseDecoder::new(); + + debug!(message = "streaming the HTTP body"); + + pin_mut!(body); + while let Some(buf) = body.data().await { + let mut buf = buf.context(Reading)?; + let chunk = buf.to_bytes(); + let responses = decoder.process_next_chunk(chunk.as_ref()); + emit!(internal_events::ChunkProcessed{ byte_size: chunk.len() }); + for response in responses { + let response = response.context(Parsing)?; + yield response; + } + } + decoder.finish().map_err(|data| Error::UnparsedDataUponCompletion { data })?; + } +} + +/// Errors that can occur in the stream. +#[derive(Debug, Snafu)] +pub enum Error +where + ReadError: std::error::Error + 'static, +{ + /// An error occured while reading the response body. + #[snafu(display("reading the data chunk failed"))] + Reading { + /// The error we got while reading. + source: ReadError, + }, + + /// An error occured while parsing the response body. + #[snafu(display("data parsing failed"))] + Parsing { + /// Response parsing error. + source: ResponseError, + }, + + /// An incomplete response remains in the buffer, but we don't expect + /// any more data. + #[snafu(display("unparsed data remaining upon completion"))] + UnparsedDataUponCompletion { + /// The unparsed data. + data: Vec, + }, +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::test_util; + use futures::StreamExt; + use k8s_openapi::{api::core::v1::Pod, WatchResponse}; + + fn hyper_body_from_chunks( + chunks: Vec>, + ) -> hyper::body::Body { + let in_stream = futures::stream::iter(chunks); + hyper::body::Body::wrap_stream(in_stream) + } + + #[test] + fn test_body() { + test_util::trace_init(); + test_util::block_on_std(async move { + let data = r#"{ + "type": "ADDED", + "object": { + "kind": "Pod", + "apiVersion": "v1", + "metadata": { + "uid": "uid0" + } + } + }"#; + let chunks: Vec> = vec![Ok(data)]; + let sample_body = hyper_body_from_chunks(chunks); + + let out_stream = body::<_, WatchResponse>(sample_body); + pin_mut!(out_stream); + + assert!(out_stream.next().await.unwrap().is_ok()); + assert!(out_stream.next().await.is_none()); + }) + } + + #[test] + fn test_body_passes_reading_error() { + test_util::trace_init(); + test_util::block_on_std(async move { + let err = std::io::Error::new(std::io::ErrorKind::Other, "test error"); + let chunks: Vec> = vec![Err(err)]; + let sample_body = hyper_body_from_chunks(chunks); + + let out_stream = body::<_, WatchResponse>(sample_body); + pin_mut!(out_stream); + + { + let err = out_stream.next().await.unwrap().unwrap_err(); + assert!(matches!(err, Error::Reading { source: hyper::Error { .. } })); + } + + assert!(out_stream.next().await.is_none()); + }) + } + + #[test] + fn test_body_passes_parsing_error() { + test_util::trace_init(); + test_util::block_on_std(async move { + let chunks: Vec> = vec![Ok("qwerty")]; + let sample_body = hyper_body_from_chunks(chunks); + + let out_stream = body::<_, WatchResponse>(sample_body); + pin_mut!(out_stream); + + { + let err = out_stream.next().await.unwrap().unwrap_err(); + assert!(matches!(err, Error::Parsing { source: ResponseError::Json(_) })); + } + + assert!(out_stream.next().await.is_none()); + }) + } + + #[test] + fn test_body_uses_finish() { + test_util::trace_init(); + test_util::block_on_std(async move { + let chunks: Vec> = vec![Ok("{")]; + let sample_body = hyper_body_from_chunks(chunks); + + let out_stream = body::<_, WatchResponse>(sample_body); + pin_mut!(out_stream); + + { + let err = out_stream.next().await.unwrap().unwrap_err(); + assert!(matches!( + err, + Error::UnparsedDataUponCompletion { data } if data == vec![b'{'] + )); + } + + assert!(out_stream.next().await.is_none()); + }) + } +} diff --git a/src/kubernetes/watch_request_builder.rs b/src/kubernetes/watch_request_builder.rs new file mode 100644 index 0000000000000..6ba31f439ea48 --- /dev/null +++ b/src/kubernetes/watch_request_builder.rs @@ -0,0 +1,102 @@ +//! Build watch request for k8s API and adapters for library types. +//! +//! # Examples +//! +//! ## Non-namespaced and cluster-global +//! +//! ```rust +//! # use vector::kubernetes::watch_request_builder::WatchRequestBuilder; +//! use k8s_openapi::api::core::v1::Pod; +//! +//! let _val: Box> = +//! Box::new(Pod::watch_pod_for_all_namespaces); +//! ``` +//! +//! ## Namespaced +//! +//! ```rust +//! # use vector::kubernetes::watch_request_builder::{WatchRequestBuilder, Namespaced}; +//! use k8s_openapi::api::core::v1::Pod; +//! +//! let _val: Box> = +//! Box::new(Namespaced("default", Pod::watch_namespaced_pod)); +//! ``` +//! + +use k8s_openapi::{ + apimachinery::pkg::apis::meta::v1::ObjectMeta, + http::{Request, StatusCode}, + Metadata, RequestError, ResponseBody, WatchOptional, WatchResponse, +}; +use serde::de::DeserializeOwned; + +/// Build a watch request for the k8s API. +/// +/// See module documentation. +pub trait WatchRequestBuilder { + /// The object type that's being watched. + type Object: Metadata + DeserializeOwned; + + /// Build a watch request. + fn build<'a>( + &self, + watch_optional: WatchOptional<'a>, + ) -> Result>, RequestError>; +} + +impl WatchRequestBuilder for F +where + T: Metadata + DeserializeOwned, + F: for<'w> Fn( + WatchOptional<'w>, + ) -> Result< + ( + Request>, + fn(StatusCode) -> ResponseBody>, + ), + RequestError, + >, +{ + type Object = T; + + fn build<'a>( + &self, + watch_optional: WatchOptional<'a>, + ) -> Result>, RequestError> { + let (request, _) = (self)(watch_optional)?; + Ok(request) + } +} + +/// Wrapper for a namespaced API. +/// +/// Specify the namespace and an API request building function. +/// +/// See module documentation for an example. +pub struct Namespaced(pub N, pub F); + +impl WatchRequestBuilder for Namespaced +where + N: AsRef, + T: Metadata + DeserializeOwned, + F: for<'w> Fn( + &'w str, + WatchOptional<'w>, + ) -> Result< + ( + Request>, + fn(StatusCode) -> ResponseBody>, + ), + RequestError, + >, +{ + type Object = T; + + fn build<'a>( + &self, + watch_optional: WatchOptional<'a>, + ) -> Result>, RequestError> { + let (request, _) = (self.1)(self.0.as_ref(), watch_optional)?; + Ok(request) + } +} diff --git a/src/kubernetes/watcher.rs b/src/kubernetes/watcher.rs new file mode 100644 index 0000000000000..997f7d977ad31 --- /dev/null +++ b/src/kubernetes/watcher.rs @@ -0,0 +1,73 @@ +//! Watcher abstraction. + +use futures::{future::BoxFuture, stream::Stream}; +use k8s_openapi::{Resource, WatchOptional, WatchResponse}; +use serde::de::DeserializeOwned; +use snafu::Snafu; + +/// Watch over the changes for a k8s resource. +pub trait Watcher { + /// The type of the watched object. + type Object: DeserializeOwned + Resource; + + /// The error type watcher invocation implementation uses internally. + type InvocationError: std::error::Error + Send + 'static; + + /// The error type watcher stream implementation uses internally. + type StreamError: std::error::Error + Send + 'static; + + /// The stream type produced by the watch request. + type Stream: Stream, Self::StreamError>> + Send; + + /// Issues a single watch request and returns a stream results. + fn watch<'a>( + &'a mut self, + watch_optional: WatchOptional<'a>, + ) -> BoxFuture<'a, Result>>; +} + +pub mod invocation { + //! Invocation errors. + use super::*; + + /// Error wrapper providing a semantic wrapper around invocation errors to + /// bind meaningful and actionably common error semantics to the arbitrary + /// underlying errors. + #[derive(Debug, Snafu)] + #[snafu(visibility(pub))] + pub enum Error + where + T: std::error::Error + Send + 'static, + { + /// Desync error signals that the server went out of sync and the resource + /// version specified in the call can no longer be used. + Desync { + /// The underlying error. + source: T, + }, + + /// Any other error that may have maening for downstream but doesn't have + /// a semantics attached to it at the [`Watcher`] trait level. + Other { + /// The underlying error. + source: T, + }, + } + + impl Error + where + T: std::error::Error + Send + 'static, + { + /// Create an `Error::Desync`. + #[inline] + pub fn desync(source: T) -> Self { + Self::Desync { source } + } + + /// Create an `Error::Other`. + #[inline] + pub fn other(source: T) -> Self { + Self::Other { source } + } + } +} diff --git a/src/lib.rs b/src/lib.rs index cfc80f4adfe54..870bc6a995c1c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,4 @@ +#![recursion_limit = "256"] // for async-stream #![allow(clippy::approx_constant)] #![allow(clippy::float_cmp)] #![allow(clippy::match_wild_err_arm)] @@ -33,6 +34,7 @@ pub mod async_read; pub mod hyper; #[cfg(feature = "rdkafka")] pub mod kafka; +pub mod kubernetes; pub mod list; pub mod metrics; pub mod region; diff --git a/src/sources/kubernetes_logs/k8s_paths_provider.rs b/src/sources/kubernetes_logs/k8s_paths_provider.rs new file mode 100644 index 0000000000000..fecbc487f056b --- /dev/null +++ b/src/sources/kubernetes_logs/k8s_paths_provider.rs @@ -0,0 +1,229 @@ +//! A paths provider for k8s logs. + +#![deny(missing_docs)] + +use super::path_helpers::build_pod_logs_directory; +use crate::kubernetes as k8s; +use evmap10::ReadHandle; +use file_source::paths_provider::PathsProvider; +use k8s_openapi::api::core::v1::Pod; +use std::path::PathBuf; + +/// A paths provider implementation that uses the state obtained from the +/// the k8s API. +pub struct K8sPathsProvider { + pods_state_reader: ReadHandle>, +} + +impl K8sPathsProvider { + /// Create a new [`K8sPathsProvider`]. + pub fn new(pods_state_reader: ReadHandle>) -> Self { + Self { pods_state_reader } + } +} + +impl PathsProvider for K8sPathsProvider { + type IntoIter = Vec; + + fn paths(&self) -> Vec { + let read_ref = match self.pods_state_reader.read() { + Some(v) => v, + None => { + // The state is not initialized or gone, fallback to using an + // empty array. + // TODO: consider `panic`ing here instead - fail-fast appoach + // is always better if possible, but it's not clear if it's + // a sane strategy here. + warn!(message = "unable to read the state of the pods"); + return Vec::new(); + } + }; + + read_ref + .into_iter() + .flat_map(|(uid, values)| { + let pod = values + .get_one() + .expect("we are supposed to be working with single-item values only"); + trace!(message = "providing log paths for pod", ?uid); + list_pod_log_paths(real_glob, pod) + }) + .collect() + } +} + +fn extract_pod_logs_directory(pod: &Pod) -> Option { + let metadata = &pod.metadata; + let namespace = metadata.namespace.as_ref()?; + let name = metadata.name.as_ref()?; + let uid = metadata.uid.as_ref()?; + Some(build_pod_logs_directory(&namespace, &name, &uid)) +} + +fn list_pod_log_paths<'a, G, GI>(mut glob_impl: G, pod: &Pod) -> impl Iterator + 'a +where + G: FnMut(&str) -> GI + 'a, + GI: Iterator + 'a, +{ + extract_pod_logs_directory(pod) + .into_iter() + .flat_map(move |dir| { + glob_impl( + // We seek to match the paths like + // `//.log` - paths managed by + // the `kubelet` as part of Kubernetes core logging + // architecture. + // In some setups, there will also be paths like + // `/.log` - those we want to skip. + &[ + dir.to_str() + .expect("non-utf8 path to pod logs dir is not supported"), + "*/*.log", + ] + .join("/"), + ) + }) +} + +fn real_glob(pattern: &str) -> impl Iterator { + glob::glob(pattern) + .expect("the pattern is supposed to always be correct") + .flat_map(|paths| paths.into_iter()) +} + +#[cfg(test)] +mod tests { + use super::{extract_pod_logs_directory, list_pod_log_paths}; + use k8s_openapi::{api::core::v1::Pod, apimachinery::pkg::apis::meta::v1::ObjectMeta}; + use std::path::PathBuf; + + #[test] + fn test_extract_pod_logs_directory() { + let cases = vec![ + (Pod::default(), None), + ( + Pod { + metadata: ObjectMeta { + namespace: Some("sandbox0-ns".to_owned()), + name: Some("sandbox0-name".to_owned()), + uid: Some("sandbox0-uid".to_owned()), + ..ObjectMeta::default() + }, + ..Pod::default() + }, + Some("/var/log/pods/sandbox0-ns_sandbox0-name_sandbox0-uid"), + ), + ( + Pod { + metadata: ObjectMeta { + namespace: Some("sandbox0-ns".to_owned()), + name: Some("sandbox0-name".to_owned()), + ..ObjectMeta::default() + }, + ..Pod::default() + }, + None, + ), + ( + Pod { + metadata: ObjectMeta { + namespace: Some("sandbox0-ns".to_owned()), + uid: Some("sandbox0-uid".to_owned()), + ..ObjectMeta::default() + }, + ..Pod::default() + }, + None, + ), + ( + Pod { + metadata: ObjectMeta { + name: Some("sandbox0-name".to_owned()), + uid: Some("sandbox0-uid".to_owned()), + ..ObjectMeta::default() + }, + ..Pod::default() + }, + None, + ), + ]; + + for (pod, expected) in cases { + assert_eq!( + extract_pod_logs_directory(&pod), + expected.map(PathBuf::from) + ); + } + } + + #[test] + fn test_list_pod_log_paths() { + let cases = vec![ + // Pod exists and has some containers that write logs. + ( + Pod { + metadata: ObjectMeta { + namespace: Some("sandbox0-ns".to_owned()), + name: Some("sandbox0-name".to_owned()), + uid: Some("sandbox0-uid".to_owned()), + ..ObjectMeta::default() + }, + ..Pod::default() + }, + // Calls to the glob mock. + vec![( + // The pattern to expect at the mock. + "/var/log/pods/sandbox0-ns_sandbox0-name_sandbox0-uid/*/*.log", + // The paths to return from the mock. + vec![ + "/var/log/pods/sandbox0-ns_sandbox0-name_sandbox0-uid/container1/qwe.log", + "/var/log/pods/sandbox0-ns_sandbox0-name_sandbox0-uid/container2/qwe.log", + "/var/log/pods/sandbox0-ns_sandbox0-name_sandbox0-uid/container3/qwe.log", + ], + )], + // Expected result. + vec![ + "/var/log/pods/sandbox0-ns_sandbox0-name_sandbox0-uid/container1/qwe.log", + "/var/log/pods/sandbox0-ns_sandbox0-name_sandbox0-uid/container2/qwe.log", + "/var/log/pods/sandbox0-ns_sandbox0-name_sandbox0-uid/container3/qwe.log", + ], + ), + // Pod doesn't have the metadata set. + (Pod::default(), vec![], vec![]), + // Pod has proper metadata, but doesn't have log files. + ( + Pod { + metadata: ObjectMeta { + namespace: Some("sandbox0-ns".to_owned()), + name: Some("sandbox0-name".to_owned()), + uid: Some("sandbox0-uid".to_owned()), + ..ObjectMeta::default() + }, + ..Pod::default() + }, + vec![( + "/var/log/pods/sandbox0-ns_sandbox0-name_sandbox0-uid/*/*.log", + vec![], + )], + vec![], + ), + ]; + + for (pod, expected_calls, expected_paths) in cases { + // Prepare the mock fn. + let mut expected_calls = expected_calls.into_iter(); + let mock_glob = move |pattern: &str| { + let (expected_pattern, paths_to_return) = expected_calls + .next() + .expect("implementation did a call that wasn't expected"); + + assert_eq!(pattern, expected_pattern); + paths_to_return.into_iter().map(PathBuf::from) + }; + + let actual_paths: Vec<_> = list_pod_log_paths(mock_glob, &pod).collect(); + let expeced_paths: Vec<_> = expected_paths.into_iter().map(PathBuf::from).collect(); + assert_eq!(actual_paths, expeced_paths) + } + } +} diff --git a/src/sources/kubernetes_logs/lifecycle.rs b/src/sources/kubernetes_logs/lifecycle.rs new file mode 100644 index 0000000000000..ea0c75aed392e --- /dev/null +++ b/src/sources/kubernetes_logs/lifecycle.rs @@ -0,0 +1,134 @@ +use crate::shutdown::{ShutdownSignal, ShutdownSignalToken}; +use futures::channel::oneshot; +use futures::future::{select, BoxFuture, Either}; +use futures::StreamExt; +use futures::{compat::Compat01As03, pin_mut, ready, stream::FuturesUnordered}; +use std::{ + future::Future, + pin::Pin, + task::{Context, Poll}, +}; + +/// Lifecycle encapsulates logic for managing a lifecycle of multiple futures +/// that are bounded together by a shared shutdown condition. +/// +/// If any of the futures completes, or global shutdown it requeted, all of the +/// managed futures are requested to shutdown. They can do so gracefully after +/// completing their work. +#[derive(Debug)] +pub struct Lifecycle<'bound> { + futs: FuturesUnordered>, + fut_shutdowns: Vec>, +} + +/// Holds a "global" shutdown signal or shutdown signal token. +/// Effectively used to hold the token or signal such that it can be dropped +/// after the shutdown is complete. +#[derive(Debug)] +pub enum GlobalShutdownToken { + /// The global shutdown singal was consumed, and we have a raw + /// [`ShutdownSignalToken`] now. + Token(ShutdownSignalToken), + /// The [`ShutdownSignal`] wasn't consumed, and still holds on to the + /// [`ShutdownSignalToken`]. Keep it around. + Ununsed(ShutdownSignal), +} + +impl<'bound> Lifecycle<'bound> { + /// Create a new [`Lifecycle`]. + pub fn new() -> Self { + Self { + futs: FuturesUnordered::new(), + fut_shutdowns: Vec::new(), + } + } + + /// Add a new future to be managed by the [`Lifecycle`]. + /// + /// Returns a [`Slot`] to be bound with the `Future`, and + /// a [`ShutdownHandle`] that is to be used by the bound future to wait for + /// shutdown. + pub fn add(&mut self) -> (Slot<'bound, '_>, ShutdownHandle) { + let (tx, rx) = oneshot::channel(); + let slot = Slot { + lifecycle: self, + shutdown_trigger: tx, + }; + let shutdown_handle = ShutdownHandle(rx); + (slot, shutdown_handle) + } + + /// Run the managed futures and keep track of the shutdown process. + pub async fn run(mut self, mut global_shutdown: ShutdownSignal) -> GlobalShutdownToken { + let first_task_fut = self.futs.next(); + pin_mut!(first_task_fut); + + let global_shutdown_fut = Compat01As03::new(&mut global_shutdown); + let token = match select(first_task_fut, global_shutdown_fut).await { + Either::Left((None, _)) => { + trace!(message = "lifecycle had no tasks upon run, we're done"); + GlobalShutdownToken::Ununsed(global_shutdown) + } + Either::Left((Some(()), _)) => { + trace!(message = "lifecycle had the first task completed"); + GlobalShutdownToken::Ununsed(global_shutdown) + } + Either::Right((shutdown_signal_token_result, _)) => { + let shutdown_signal_token = shutdown_signal_token_result.unwrap(); + trace!(message = "lifecycle got a global shutdown request"); + GlobalShutdownToken::Token(shutdown_signal_token) + } + }; + + // Send the shutdowns to all managed futures. + for fut_shutdown in self.fut_shutdowns { + if fut_shutdown.send(()).is_err() { + trace!( + message = "error while sending a future shutdown, \ + the receiver is already dropped; \ + this is not a problem" + ); + } + } + + // Wait for all the futures to complete. + while let Some(()) = self.futs.next().await { + trace!(message = "a lifecycle-managed future completed after shutdown was requested"); + } + + // Return the global shutdown token so that caller can perform it's + // cleanup. + token + } +} + +/// Represents an unbounded slot at the lifecycle. +#[derive(Debug)] +pub struct Slot<'bound, 'lc> { + lifecycle: &'lc mut Lifecycle<'bound>, + shutdown_trigger: oneshot::Sender<()>, +} + +impl<'bound, 'lc> Slot<'bound, 'lc> { + /// Bind the lifecycle slot to a concrete future. + /// The passed future MUST start it's shutdown process when requested to + /// shutdown via the signal passed from the corresponding + /// [`ShutdownHandle`]. + pub fn bind(self, future: BoxFuture<'bound, ()>) { + self.lifecycle.futs.push(future); + self.lifecycle.fut_shutdowns.push(self.shutdown_trigger); + } +} + +/// A handle that allows waiting for the lifecycle-issued shutdown. +#[derive(Debug)] +pub struct ShutdownHandle(oneshot::Receiver<()>); + +impl Future for ShutdownHandle { + type Output = (); + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + let _ = ready!(Pin::new(&mut self.0).poll(cx)); + Poll::Ready(()) + } +} diff --git a/src/sources/kubernetes_logs/mod.rs b/src/sources/kubernetes_logs/mod.rs new file mode 100644 index 0000000000000..729c649becc1d --- /dev/null +++ b/src/sources/kubernetes_logs/mod.rs @@ -0,0 +1,302 @@ +//! This mod implements `kubernetes_logs` source. +//! The scope of this source is to consume the log files that `kubelet` keeps +//! at `/var/log/pods` at the host of the k8s node when `vector` itself is +//! running inside the cluster as a `DaemonSet`. + +#![deny(missing_docs)] + +use crate::event::{self, Event}; +use crate::internal_events::{KubernetesLogsEventAnnotationFailed, KubernetesLogsEventReceived}; +use crate::kubernetes as k8s; +use crate::{ + dns::Resolver, + shutdown::ShutdownSignal, + sources, + topology::config::{DataType, GlobalOptions, SourceConfig, SourceDescription}, + transforms::Transform, +}; +use bytes05::Bytes; +use evmap10::{self as evmap}; +use file_source::{FileServer, FileServerShutdown, Fingerprinter}; +use futures::{future::FutureExt, sink::Sink, stream::StreamExt}; +use futures01::sync::mpsc; +use k8s_openapi::api::core::v1::Pod; +use serde::{Deserialize, Serialize}; +use std::path::PathBuf; +use std::time::Duration; + +mod k8s_paths_provider; +mod lifecycle; +mod parser; +mod partial_events_merger; +mod path_helpers; +mod pod_metadata_annotator; +mod transform_utils; +mod util; + +use k8s_paths_provider::K8sPathsProvider; +use lifecycle::Lifecycle; +use pod_metadata_annotator::PodMetadataAnnotator; + +/// The key we use for `file` field. +const FILE_KEY: &str = "file"; + +/// The `self_node_name` value env var key. +const SELF_NODE_NAME_ENV_KEY: &str = "VECTOR_SELF_NODE_NAME"; + +/// Configuration for the `kubernetes_logs` source. +#[derive(Deserialize, Serialize, Debug, Clone, Default)] +#[serde(deny_unknown_fields, default)] +pub struct Config { + /// The `name` of the Kubernetes `Node` that Vector runs at. + /// Required to filter the `Pod`s to only include the ones with the log + /// files accessible locally. + #[serde(default = "default_self_node_name_env_template")] + self_node_name: String, + + /// Automatically merge partial events. + #[serde(default = "crate::serde::default_true")] + auto_partial_merge: bool, + + /// Specifies the field names for metadata annotation. + annotation_fields: pod_metadata_annotator::FieldsSpec, +} + +inventory::submit! { + SourceDescription::new_without_default::(COMPONENT_NAME) +} + +const COMPONENT_NAME: &str = "kubernetes_logs"; + +#[typetag::serde(name = "kubernetes_logs")] +impl SourceConfig for Config { + fn build( + &self, + name: &str, + globals: &GlobalOptions, + shutdown: ShutdownSignal, + out: mpsc::Sender, + ) -> crate::Result { + let source = Source::new(self, Resolver, globals, name)?; + + // TODO: this is a workaround for the legacy futures 0.1. + // When the core is updated to futures 0.3 this should be simplied + // significantly. + let out = futures::compat::Compat01As03Sink::new(out); + let fut = source.run(out, shutdown); + let fut = fut.map(|result| { + result.map_err(|error| { + error!(message = "source future failed", ?error); + }) + }); + let fut = Box::pin(fut); + let fut = futures::compat::Compat::new(fut); + let fut: sources::Source = Box::new(fut); + Ok(fut) + } + + fn output_type(&self) -> DataType { + DataType::Log + } + + fn source_type(&self) -> &'static str { + COMPONENT_NAME + } +} + +#[derive(Clone)] +struct Source { + client: k8s::client::Client, + self_node_name: String, + data_dir: PathBuf, + auto_partial_merge: bool, + fields_spec: pod_metadata_annotator::FieldsSpec, +} + +impl Source { + fn new( + config: &Config, + resolver: Resolver, + globals: &GlobalOptions, + name: &str, + ) -> crate::Result { + let self_node_name = if config.self_node_name.is_empty() + || config.self_node_name == default_self_node_name_env_template() + { + std::env::var(SELF_NODE_NAME_ENV_KEY).map_err(|_| { + format!( + "self_node_name config value or {} env var is not set", + SELF_NODE_NAME_ENV_KEY + ) + })? + } else { + config.self_node_name.clone() + }; + info!( + message = "obtained Kubernetes Node name to collect logs for (self)", + ?self_node_name + ); + + let k8s_config = k8s::client::config::Config::in_cluster()?; + let client = k8s::client::Client::new(k8s_config, resolver)?; + + let data_dir = globals.resolve_and_make_data_subdir(None, name)?; + + Ok(Self { + client, + self_node_name, + data_dir, + auto_partial_merge: config.auto_partial_merge, + fields_spec: config.annotation_fields.clone(), + }) + } + + async fn run(self, out: O, global_shutdown: ShutdownSignal) -> crate::Result<()> + where + O: Sink + Send + 'static, + >::Error: std::error::Error, + { + let Self { + client, + self_node_name, + data_dir, + auto_partial_merge, + fields_spec, + } = self; + + let field_selector = format!("spec.nodeName={}", self_node_name); + let label_selector = "vector.dev/exclude!=true".to_owned(); + + let watcher = k8s::api_watcher::ApiWatcher::new(client, Pod::watch_pod_for_all_namespaces); + let watcher = k8s::instrumenting_watcher::InstrumentingWatcher::new(watcher); + let (state_reader, state_writer) = evmap::new(); + let state_writer = + k8s::state::evmap::Writer::new(state_writer, Some(Duration::from_millis(10))); + let state_writer = k8s::state::instrumenting::Writer::new(state_writer); + let state_writer = + k8s::state::delayed_delete::Writer::new(state_writer, Duration::from_secs(60)); + + let mut reflector = k8s::reflector::Reflector::new( + watcher, + state_writer, + Some(field_selector), + Some(label_selector), + Duration::from_secs(1), + ); + let reflector_process = reflector.run(); + + let paths_provider = K8sPathsProvider::new(state_reader.clone()); + let annotator = PodMetadataAnnotator::new(state_reader, fields_spec); + + // TODO: maybe some of the parameters have to be configurable. + let max_line_bytes = 32 * 1024; // 32 KiB + let file_server = FileServer { + paths_provider, + max_read_bytes: 2048, + start_at_beginning: true, + ignore_before: None, + max_line_bytes, + data_dir, + glob_minimum_cooldown: Duration::from_secs(10), + fingerprinter: Fingerprinter::FirstLineChecksum { + max_line_length: max_line_bytes, + }, + oldest_first: false, + remove_after: None, + }; + + let (file_source_tx, file_source_rx) = + futures::channel::mpsc::channel::<(Bytes, String)>(100); + + let mut parser = parser::build(); + let mut partial_events_merger = partial_events_merger::build(auto_partial_merge); + + let events = file_source_rx.map(move |(bytes, file)| { + emit!(KubernetesLogsEventReceived { + file: &file, + byte_size: bytes.len(), + }); + let mut event = create_event(bytes, &file); + if annotator.annotate(&mut event, &file).is_none() { + emit!(KubernetesLogsEventAnnotationFailed { event: &event }); + } + event + }); + let events = events + .filter_map(move |event| futures::future::ready(parser.transform(event))) + .filter_map(move |event| { + futures::future::ready(partial_events_merger.transform(event)) + }); + + let event_processing_loop = events.map(Ok).forward(out); + + let mut lifecycle = Lifecycle::new(); + { + let (slot, shutdown) = lifecycle.add(); + let fut = + util::cancel_on_signal(reflector_process, shutdown).map(|result| match result { + Ok(()) => info!(message = "reflector process completed gracefully"), + Err(error) => { + error!(message = "reflector process exited with an error", ?error) + } + }); + slot.bind(Box::pin(fut)); + } + { + let (slot, shutdown) = lifecycle.add(); + let fut = util::run_file_server(file_server, file_source_tx, shutdown).map(|result| { + match result { + Ok(FileServerShutdown) => info!(message = "file server completed gracefully"), + Err(error) => error!(message = "file server exited with an error", ?error), + } + }); + slot.bind(Box::pin(fut)); + } + { + let (slot, shutdown) = lifecycle.add(); + let fut = util::complete_with_deadline_on_signal( + event_processing_loop, + shutdown, + Duration::from_secs(30), // more than enough time to propagate + ) + .map(|result| { + match result { + Ok(Ok(())) => info!(message = "event processing loop completed gracefully"), + Ok(Err(error)) => error!( + message = "event processing loop exited with an error", + ?error + ), + Err(error) => error!( + message = "event processing loop timed out during the shutdown", + ?error + ), + }; + }); + slot.bind(Box::pin(fut)); + } + + lifecycle.run(global_shutdown).await; + info!(message = "done"); + Ok(()) + } +} + +fn create_event(line: Bytes, file: &str) -> Event { + let mut event = Event::from(line); + + // Add source type. + event + .as_mut_log() + .insert(event::log_schema().source_type_key(), COMPONENT_NAME); + + // Add file. + event.as_mut_log().insert(FILE_KEY, file); + + event +} + +/// This function returns the default value for `self_node_name` variable +/// as it should be at the generated config file. +fn default_self_node_name_env_template() -> String { + format!("${{{}}}", SELF_NODE_NAME_ENV_KEY) +} diff --git a/src/sources/kubernetes_logs/parser/cri.rs b/src/sources/kubernetes_logs/parser/cri.rs new file mode 100644 index 0000000000000..3d1c402ac049b --- /dev/null +++ b/src/sources/kubernetes_logs/parser/cri.rs @@ -0,0 +1,156 @@ +use crate::{ + event::{self, Event, LogEvent, Value}, + transforms::{ + regex_parser::{RegexParser, RegexParserConfig}, + Transform, + }, +}; +use lazy_static::lazy_static; +use snafu::{OptionExt, Snafu}; +use string_cache::DefaultAtom as Atom; + +lazy_static! { + pub static ref MULTILINE_TAG: Atom = Atom::from("multiline_tag"); +} + +/// Parser for the CRI log format. +/// +/// Expects logs to arrive in a CRI log format. +/// +/// CRI log format ([documentation][cri_log_format]) is a simple +/// newline-separated text format. We rely on regular expressions to parse it. +/// +/// Normalizes parsed data for consistency. +/// +/// [cri_log_format]: https://github.com/kubernetes/community/blob/ee2abbf9dbfa4523b414f99a04ddc97bd38c74b2/contributors/design-proposals/node/kubelet-cri-logging.md +pub struct Cri { + // TODO: patch `RegexParser` to expose the concerete type on build. + regex_parser: Box, +} + +impl Cri { + /// Create a new [`Cri`] parser. + pub fn new() -> Self { + let regex_parser = { + let mut rp_config = RegexParserConfig::default(); + + let pattern = r"^(?P.*) (?P(stdout|stderr)) (?P(P|F)) (?P.*)$"; + rp_config.patterns = vec![pattern.to_owned()]; + + rp_config.types.insert( + event::log_schema().timestamp_key().clone(), + "timestamp|%+".to_owned(), + ); + + RegexParser::build(&rp_config).expect("regexp patterns are static, should never fail") + }; + + Self { regex_parser } + } +} + +impl Transform for Cri { + fn transform(&mut self, event: Event) -> Option { + let mut event = self.regex_parser.transform(event)?; + normalize_event(event.as_mut_log()).ok()?; + Some(event) + } +} + +fn normalize_event(log: &mut LogEvent) -> Result<(), NormalizationError> { + // Detect if this is a partial event. + let multiline_tag = log + .remove(&MULTILINE_TAG) + .context(MultilineTagFieldMissing)?; + let multiline_tag = match multiline_tag { + Value::Bytes(val) => val, + _ => return Err(NormalizationError::MultilineTagValueUnexpectedType), + }; + + let is_partial = multiline_tag[0] == b'P'; + + // For partial messages add a partial event indicator. + if is_partial { + log.insert(event::PARTIAL_STR, true); + } + + Ok(()) +} + +#[derive(Debug, Snafu)] +enum NormalizationError { + MultilineTagFieldMissing, + MultilineTagValueUnexpectedType, +} + +#[cfg(test)] +pub mod tests { + use super::super::test_util; + use super::Cri; + use crate::event::LogEvent; + + fn make_long_string(base: &str, len: usize) -> String { + base.chars().cycle().take(len).collect() + } + + /// Shared test cases. + pub fn cases() -> Vec<(String, LogEvent)> { + vec![ + ( + "2016-10-06T00:17:09.669794202Z stdout F The content of the log entry 1".into(), + test_util::make_log_event( + "The content of the log entry 1", + "2016-10-06T00:17:09.669794202Z", + "stdout", + false, + ), + ), + ( + "2016-10-06T00:17:09.669794202Z stdout P First line of log entry 2".into(), + test_util::make_log_event( + "First line of log entry 2", + "2016-10-06T00:17:09.669794202Z", + "stdout", + true, + ), + ), + ( + "2016-10-06T00:17:09.669794202Z stdout P Second line of the log entry 2".into(), + test_util::make_log_event( + "Second line of the log entry 2", + "2016-10-06T00:17:09.669794202Z", + "stdout", + true, + ), + ), + ( + "2016-10-06T00:17:10.113242941Z stderr F Last line of the log entry 2".into(), + test_util::make_log_event( + "Last line of the log entry 2", + "2016-10-06T00:17:10.113242941Z", + "stderr", + false, + ), + ), + // A part of the partial message with a realistic length. + ( + [ + r#"2016-10-06T00:17:10.113242941Z stdout P "#, + make_long_string("very long message ", 16 * 1024).as_str(), + ] + .join(""), + test_util::make_log_event( + make_long_string("very long message ", 16 * 1024).as_str(), + "2016-10-06T00:17:10.113242941Z", + "stdout", + true, + ), + ), + ] + } + + #[test] + fn test_parsing() { + test_util::test_parser(Cri::new, cases()); + } +} diff --git a/src/sources/kubernetes_logs/parser/docker.rs b/src/sources/kubernetes_logs/parser/docker.rs new file mode 100644 index 0000000000000..0a79fb1ba02f7 --- /dev/null +++ b/src/sources/kubernetes_logs/parser/docker.rs @@ -0,0 +1,181 @@ +use crate::{ + event::{self, Event, LogEvent, Value}, + transforms::{ + json_parser::{JsonParser, JsonParserConfig}, + Transform, + }, +}; +use chrono::{DateTime, Utc}; +use lazy_static::lazy_static; +use snafu::{OptionExt, ResultExt, Snafu}; +use string_cache::DefaultAtom as Atom; + +lazy_static! { + pub static ref TIME: Atom = Atom::from("time"); + pub static ref LOG: Atom = Atom::from("log"); +} + +/// Parser for the docker log format. +/// +/// Expects logs to arrive in a JSONLines format with the fields names and +/// contents specific to the implementation of the Docker `json` log driver. +/// +/// Normalizes parsed data for consistency. +#[derive(Debug)] +pub struct Docker { + json_parser: JsonParser, +} + +impl Docker { + /// Create a new [`Docker`] parser. + pub fn new() -> Self { + let json_parser = { + let mut config = JsonParserConfig::default(); + config.drop_field = true; + + // Drop so that it's possible to detect if message is in json format. + config.drop_invalid = true; + + config.into() + }; + + Self { json_parser } + } +} + +impl Transform for Docker { + fn transform(&mut self, event: Event) -> Option { + let mut event = self.json_parser.transform(event)?; + normalize_event(event.as_mut_log()).ok()?; + Some(event) + } +} + +const DOCKER_MESSAGE_SPLIT_THRESHOLD: usize = 16 * 1024; // 16 Kib + +fn normalize_event(log: &mut LogEvent) -> Result<(), NormalizationError> { + // Parse and rename timestamp. + let time = log.remove(&TIME).context(TimeFieldMissing)?; + let time = match time { + Value::Bytes(val) => val, + _ => return Err(NormalizationError::TimeValueUnexpectedType), + }; + let time = DateTime::parse_from_rfc3339(String::from_utf8_lossy(time.as_ref()).as_ref()) + .context(TimeParsing)?; + log.insert( + event::log_schema().timestamp_key(), + time.with_timezone(&Utc), + ); + + // Parse message, remove trailing newline and detect if it's partial. + let message = log.remove(&LOG).context(LogFieldMissing)?; + let mut message = match message { + Value::Bytes(val) => val, + _ => return Err(NormalizationError::LogValueUnexpectedType), + }; + // Here we apply out heuristics to detect if messge is partial. + // Partial messages are only split in docker at the maximum message length + // (`DOCKER_MESSAGE_SPLIT_THRESHOLD`). + // Thus, for a message to be partial it also has to have exactly that + // length. + // Now, whether that message will or won't actually be partial if it has + // exactly the max length is unknown. We consider all messages with the + // exact length of `DOCKER_MESSAGE_SPLIT_THRESHOLD` bytes partial + // by default, and then, if they end with newline - consider that + // an exception and make them non-partial. + // This is still not ideal, and can potentially be improved. + let mut is_partial = message.len() == DOCKER_MESSAGE_SPLIT_THRESHOLD; + if message.last().map(|&b| b as char == '\n').unwrap_or(false) { + message.truncate(message.len() - 1); + is_partial = false; + }; + log.insert(event::log_schema().message_key(), message); + + // For partial messages add a partial event indicator. + if is_partial { + log.insert(event::PARTIAL_STR, true); + } + + Ok(()) +} + +#[derive(Debug, Snafu)] +enum NormalizationError { + TimeFieldMissing, + TimeValueUnexpectedType, + TimeParsing { source: chrono::ParseError }, + LogFieldMissing, + LogValueUnexpectedType, +} + +#[cfg(test)] +pub mod tests { + use super::super::test_util; + use super::Docker; + use crate::event::LogEvent; + + fn make_long_string(base: &str, len: usize) -> String { + base.chars().cycle().take(len).collect() + } + + /// Shared test cases. + pub fn cases() -> Vec<(String, LogEvent)> { + vec![ + ( + r#"{"log": "The actual log line\n", "stream": "stderr", "time": "2016-10-05T00:00:30.082640485Z"}"#.into(), + test_util::make_log_event( + "The actual log line", + "2016-10-05T00:00:30.082640485Z", + "stderr", + false, + ), + ), + ( + r#"{"log": "A line without newline chan at the end", "stream": "stdout", "time": "2016-10-05T00:00:30.082640485Z"}"#.into(), + test_util::make_log_event( + "A line without newline chan at the end", + "2016-10-05T00:00:30.082640485Z", + "stdout", + false, + ), + ), + // Partial message due to message length. + ( + [ + r#"{"log": ""#, + make_long_string("partial ", 16 * 1024).as_str(), + r#"", "stream": "stdout", "time": "2016-10-05T00:00:30.082640485Z"}"#, + ] + .join(""), + test_util::make_log_event( + make_long_string("partial ",16 * 1024).as_str(), + "2016-10-05T00:00:30.082640485Z", + "stdout", + true, + ), + ), + // Non-partial message, because message length matches but + // the message also ends with newline. + ( + [ + r#"{"log": ""#, + make_long_string("non-partial ", 16 * 1024 - 1).as_str(), + r"\n", + r#"", "stream": "stdout", "time": "2016-10-05T00:00:30.082640485Z"}"#, + ] + .join(""), + test_util::make_log_event( + make_long_string("non-partial ", 16 * 1024 - 1).as_str(), + "2016-10-05T00:00:30.082640485Z", + "stdout", + false, + ), + ), + ] + } + + #[test] + fn test_parsing() { + test_util::test_parser(Docker::new, cases()); + } +} diff --git a/src/sources/kubernetes_logs/parser/mod.rs b/src/sources/kubernetes_logs/parser/mod.rs new file mode 100644 index 0000000000000..fbb7befdd7a24 --- /dev/null +++ b/src/sources/kubernetes_logs/parser/mod.rs @@ -0,0 +1,12 @@ +mod cri; +mod docker; +mod picker; +mod test_util; + +/// Parser for any log format supported by `kubelet`. +pub type Parser = picker::Picker; + +/// Build a parser for any log format supported by `kubelet`. +pub fn build() -> Parser { + picker::Picker::new() +} diff --git a/src/sources/kubernetes_logs/parser/picker.rs b/src/sources/kubernetes_logs/parser/picker.rs new file mode 100644 index 0000000000000..52ce4bdee2c4d --- /dev/null +++ b/src/sources/kubernetes_logs/parser/picker.rs @@ -0,0 +1,75 @@ +use super::{cri::Cri, docker::Docker}; +use crate::{ + event::{self, Event, Value}, + transforms::Transform, +}; + +pub enum Picker { + Init, + Docker(Docker), + Cri(Cri), +} + +impl Picker { + pub fn new() -> Self { + Picker::Init + } +} + +impl Transform for Picker { + fn transform(&mut self, event: Event) -> Option { + match self { + Picker::Init => { + let message = event + .as_log() + .get(event::log_schema().message_key()) + .expect("message key must be present"); + let bytes = if let Value::Bytes(bytes) = message { + bytes + } else { + panic!("message value must be Bytes"); + }; + if bytes.len() > 1 && bytes[0] == b'{' { + *self = Picker::Docker(Docker::new()) + } else { + *self = Picker::Cri(Cri::new()) + } + self.transform(event) + } + Picker::Docker(t) => t.transform(event), + Picker::Cri(t) => t.transform(event), + } + } +} + +#[cfg(test)] +mod tests { + use super::super::{cri, docker, test_util}; + use super::{Picker, Transform}; + use crate::{event::LogEvent, Event}; + + /// Picker has to work for all test cases for underlying parsers. + fn cases() -> Vec<(String, LogEvent)> { + let mut cases = vec![]; + cases.extend(docker::tests::cases()); + cases.extend(cri::tests::cases()); + cases + } + + #[test] + fn test_parsing() { + test_util::test_parser(Picker::new, cases()); + } + + #[test] + fn test_parsing_invalid() { + let cases = vec!["", "qwe", "{"]; + + for message in cases { + let input = Event::from(message); + let mut picker = Picker::new(); + let output = picker.transform(input); + assert!(output.is_none()); + } + } +} diff --git a/src/sources/kubernetes_logs/parser/test_util.rs b/src/sources/kubernetes_logs/parser/test_util.rs new file mode 100644 index 0000000000000..44149c0ce23b6 --- /dev/null +++ b/src/sources/kubernetes_logs/parser/test_util.rs @@ -0,0 +1,47 @@ +#![cfg(test)] + +use crate::{ + event::{Event, LogEvent}, + transforms::Transform, +}; +use chrono::{DateTime, Utc}; + +/// Build a log event for test purposes. +/// +/// The implementation is shared, and therefore consistent across all +/// the parsers. +pub fn make_log_event(message: &str, timestamp: &str, stream: &str, is_partial: bool) -> LogEvent { + let mut log = LogEvent::default(); + + log.insert("message", message); + + let timestamp = DateTime::parse_from_rfc3339(timestamp) + .expect("invalid test case") + .with_timezone(&Utc); + log.insert("timestamp", timestamp); + + log.insert("stream", stream); + + if is_partial { + log.insert("_partial", true); + } + log +} + +/// Shared logic for testing parsers. +/// +/// Takes a parser builder and a list of test cases. +pub fn test_parser(builder: B, cases: Vec<(String, LogEvent)>) +where + B: Fn() -> T, + T: Transform, +{ + for (message, expected) in cases { + let input = Event::from(message); + let mut parser = (builder)(); + let output = parser + .transform(input) + .expect("parser failed to parse the event"); + assert_eq!(Event::Log(expected), output, "expected left, actual right"); + } +} diff --git a/src/sources/kubernetes_logs/partial_events_merger.rs b/src/sources/kubernetes_logs/partial_events_merger.rs new file mode 100644 index 0000000000000..27e90bef39e98 --- /dev/null +++ b/src/sources/kubernetes_logs/partial_events_merger.rs @@ -0,0 +1,25 @@ +#![deny(missing_docs)] + +use super::transform_utils::optional::Optional; +use super::FILE_KEY; +use crate::event; +use crate::transforms::merge::{Merge, MergeConfig}; +use string_cache::Atom; + +/// Partial event merger. +pub type PartialEventsMerger = Optional; + +pub fn build(enabled: bool) -> PartialEventsMerger { + Optional(if enabled { + Some( + MergeConfig { + partial_event_marker_field: event::PARTIAL.clone(), + merge_fields: vec![event::log_schema().message_key().clone()], + stream_discriminant_fields: vec![Atom::from(FILE_KEY)], + } + .into(), + ) + } else { + None + }) +} diff --git a/src/sources/kubernetes_logs/path_helpers.rs b/src/sources/kubernetes_logs/path_helpers.rs new file mode 100644 index 0000000000000..2035d26a0b4f3 --- /dev/null +++ b/src/sources/kubernetes_logs/path_helpers.rs @@ -0,0 +1,113 @@ +//! Simple helpers for building and parsing k8s paths. +//! +//! Loosely based on https://github.com/kubernetes/kubernetes/blob/31305966789525fca49ec26c289e565467d1f1c4/pkg/kubelet/kuberuntime/helpers.go + +#![deny(missing_docs)] + +use std::path::PathBuf; + +/// The root directory for pod logs. +const K8S_LOGS_DIR: &str = "/var/log/pods"; + +/// The delimiter used in the log path. +const LOG_PATH_DELIMITER: &str = "_"; + +/// Builds absolute log directory path for a pod sandbox. +/// +/// Based on https://github.com/kubernetes/kubernetes/blob/31305966789525fca49ec26c289e565467d1f1c4/pkg/kubelet/kuberuntime/helpers.go#L178 +pub(super) fn build_pod_logs_directory( + pod_namespace: &str, + pod_name: &str, + pod_uid: &str, +) -> PathBuf { + [ + K8S_LOGS_DIR, + &[pod_namespace, pod_name, pod_uid].join(LOG_PATH_DELIMITER), + ] + .join("/") + .into() +} + +/// Parses pod log file path and returns the log file info. +/// +/// Assumes the input is a valid pod log file name. +/// +/// Inspired by https://github.com/kubernetes/kubernetes/blob/31305966789525fca49ec26c289e565467d1f1c4/pkg/kubelet/kuberuntime/helpers.go#L186 +pub(super) fn parse_log_file_path(path: &str) -> Option> { + let mut componenets = path.rsplit('/'); + + let _log_file_name = componenets.next()?; + let container_name = componenets.next()?; + let pod_dir = componenets.next()?; + + let mut pod_dir_componenets = pod_dir.rsplit(LOG_PATH_DELIMITER); + + let pod_uid = pod_dir_componenets.next()?; + let pod_name = pod_dir_componenets.next()?; + let pod_namespace = pod_dir_componenets.next()?; + + Some(LogFileInfo { + pod_namespace, + pod_name, + pod_uid, + container_name, + }) +} + +/// Contains the information extracted from the pod log file path. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct LogFileInfo<'a> { + pub pod_namespace: &'a str, + pub pod_name: &'a str, + pub pod_uid: &'a str, + pub container_name: &'a str, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_build_pod_logs_directory() { + let cases = vec![ + // Valid inputs. + ( + ("sandbox0-ns", "sandbox0-name", "sandbox0-uid"), + "/var/log/pods/sandbox0-ns_sandbox0-name_sandbox0-uid", + ), + // Invalid inputs. + (("", "", ""), "/var/log/pods/__"), + ]; + + for ((in_namespace, in_name, in_uid), expected) in cases.into_iter() { + assert_eq!( + build_pod_logs_directory(in_namespace, in_name, in_uid), + PathBuf::from(expected) + ); + } + } + + #[test] + fn test_parse_log_file_path() { + let cases = vec![ + // Valid inputs. + ( + "/var/log/pods/sandbox0-ns_sandbox0-name_sandbox0-uid/sandbox0-container0-name/1.log", + Some(LogFileInfo { + pod_namespace: "sandbox0-ns", + pod_name: "sandbox0-name", + pod_uid: "sandbox0-uid", + container_name: "sandbox0-container0-name", + }), + ), + // Invalid inputs. + ("/var/log/pods/other", None), + ("qwe", None), + ("", None), + ]; + + for (input, expected) in cases.into_iter() { + assert_eq!(parse_log_file_path(input), expected); + } + } +} diff --git a/src/sources/kubernetes_logs/pod_metadata_annotator.rs b/src/sources/kubernetes_logs/pod_metadata_annotator.rs new file mode 100644 index 0000000000000..b1e919e24dea6 --- /dev/null +++ b/src/sources/kubernetes_logs/pod_metadata_annotator.rs @@ -0,0 +1,162 @@ +//! Annotates events with pod metadata. + +#![deny(missing_docs)] + +use super::path_helpers::parse_log_file_path; +use crate::{event::LogEvent, kubernetes as k8s, Event}; +use evmap10::ReadHandle; +use k8s_openapi::{api::core::v1::Pod, apimachinery::pkg::apis::meta::v1::ObjectMeta}; +use serde::{Deserialize, Serialize}; + +#[derive(Deserialize, Serialize, Debug, Clone)] +#[serde(deny_unknown_fields, default)] +pub struct FieldsSpec { + pub pod_name: String, + pub pod_namespace: String, + pub pod_uid: String, + pub pod_labels: String, +} + +impl Default for FieldsSpec { + fn default() -> Self { + Self { + pod_name: "kubernetes.pod_name".to_owned(), + pod_namespace: "kubernetes.pod_namespace".to_owned(), + pod_uid: "kubernetes.pod_uid".to_owned(), + pod_labels: "kubernetes.pod_labels".to_owned(), + } + } +} + +/// Annotate the event with pod metadata. +pub struct PodMetadataAnnotator { + pods_state_reader: ReadHandle>, + fields_spec: FieldsSpec, +} + +impl PodMetadataAnnotator { + /// Create a new [`PodMetadataAnnotator`]. + pub fn new( + pods_state_reader: ReadHandle>, + fields_spec: FieldsSpec, + ) -> Self { + Self { + pods_state_reader, + fields_spec, + } + } +} + +impl PodMetadataAnnotator { + /// Annotates an event with the information from the [`Pod::metadata`]. + /// The event has to be obtained from kubernetes log file, and have a + /// [`FILE_KEY`] field set with a file that the line came from. + pub fn annotate(&self, event: &mut Event, file: &str) -> Option<()> { + let log = event.as_mut_log(); + let uid = parse_log_file_path(file)?.pod_uid; + let guard = self.pods_state_reader.get(uid)?; + let entry = guard.get_one()?; + let pod: &Pod = entry.as_ref(); + annotate_from_metadata(log, &self.fields_spec, &pod.metadata); + Some(()) + } +} + +fn annotate_from_metadata(log: &mut LogEvent, fields_spec: &FieldsSpec, metadata: &ObjectMeta) { + for (ref key, ref val) in [ + (&fields_spec.pod_name, &metadata.name), + (&fields_spec.pod_namespace, &metadata.namespace), + (&fields_spec.pod_uid, &metadata.uid), + ] + .iter() + { + if let Some(val) = val { + log.insert(key, val); + } + } + + if let Some(labels) = &metadata.labels { + for (key, val) in labels.iter() { + log.insert(format!("{}.{}", fields_spec.pod_labels, key), val); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_annotate_from_metadata() { + let cases = vec![ + ( + FieldsSpec::default(), + ObjectMeta::default(), + LogEvent::default(), + ), + ( + FieldsSpec::default(), + ObjectMeta { + name: Some("sandbox0-name".to_owned()), + namespace: Some("sandbox0-ns".to_owned()), + uid: Some("sandbox0-uid".to_owned()), + labels: Some( + vec![ + ("sandbox0-label0".to_owned(), "val0".to_owned()), + ("sandbox0-label1".to_owned(), "val1".to_owned()), + ] + .into_iter() + .collect(), + ), + ..ObjectMeta::default() + }, + { + let mut log = LogEvent::default(); + log.insert("kubernetes.pod_name", "sandbox0-name"); + log.insert("kubernetes.pod_namespace", "sandbox0-ns"); + log.insert("kubernetes.pod_uid", "sandbox0-uid"); + log.insert("kubernetes.pod_labels.sandbox0-label0", "val0"); + log.insert("kubernetes.pod_labels.sandbox0-label1", "val1"); + log + }, + ), + ( + FieldsSpec { + pod_name: "name".to_owned(), + pod_namespace: "ns".to_owned(), + pod_uid: "uid".to_owned(), + pod_labels: "labels".to_owned(), + }, + ObjectMeta { + name: Some("sandbox0-name".to_owned()), + namespace: Some("sandbox0-ns".to_owned()), + uid: Some("sandbox0-uid".to_owned()), + labels: Some( + vec![ + ("sandbox0-label0".to_owned(), "val0".to_owned()), + ("sandbox0-label1".to_owned(), "val1".to_owned()), + ] + .into_iter() + .collect(), + ), + ..ObjectMeta::default() + }, + { + let mut log = LogEvent::default(); + log.insert("name", "sandbox0-name"); + log.insert("ns", "sandbox0-ns"); + log.insert("uid", "sandbox0-uid"); + log.insert("labels.sandbox0-label0", "val0"); + log.insert("labels.sandbox0-label1", "val1"); + log + }, + ), + ]; + + for (fields_spec, metadata, expected) in cases.into_iter() { + let mut log = LogEvent::default(); + annotate_from_metadata(&mut log, &fields_spec, &metadata); + assert_eq!(log, expected); + } + } +} diff --git a/src/sources/kubernetes_logs/transform_utils/mod.rs b/src/sources/kubernetes_logs/transform_utils/mod.rs new file mode 100644 index 0000000000000..4b24267e84d53 --- /dev/null +++ b/src/sources/kubernetes_logs/transform_utils/mod.rs @@ -0,0 +1 @@ +pub mod optional; diff --git a/src/sources/kubernetes_logs/transform_utils/optional.rs b/src/sources/kubernetes_logs/transform_utils/optional.rs new file mode 100644 index 0000000000000..df470c71b2e9e --- /dev/null +++ b/src/sources/kubernetes_logs/transform_utils/optional.rs @@ -0,0 +1,20 @@ +//! Optional transform. + +#![deny(missing_docs)] + +use crate::{event::Event, transforms::Transform}; + +/// Optional transform. +/// Passes events through the specified transform is any, otherwise passes them, +/// as-is. +/// Useful to avoid boxing the transforms. +pub struct Optional(pub Option); + +impl Transform for Optional { + fn transform(&mut self, event: Event) -> Option { + match self.0 { + Some(ref mut val) => val.transform(event), + None => Some(event), + } + } +} diff --git a/src/sources/kubernetes_logs/util.rs b/src/sources/kubernetes_logs/util.rs new file mode 100644 index 0000000000000..acf5f18ce056c --- /dev/null +++ b/src/sources/kubernetes_logs/util.rs @@ -0,0 +1,70 @@ +use bytes05::Bytes; +use file_source::{paths_provider::PathsProvider, FileServer, FileServerShutdown}; +use futures::future::{select, Either}; +use futures::{pin_mut, Sink}; +use std::convert::Infallible; +use std::error::Error; +use std::{future::Future, time::Duration}; +use tokio::task::spawn_blocking; + +/// A tiny wrapper around a [`FileServer`] that runs it as a [`spawn_blocking`] +/// task. +pub async fn run_file_server( + file_server: FileServer, + chans: C, + shutdown: S, +) -> Result +where + PP: PathsProvider + Send + 'static, + C: Sink<(Bytes, String)> + Unpin + Send + 'static, + >::Error: Error + Send, + S: Future + Unpin + Send + 'static, +{ + let span = info_span!("file_server"); + let join_handle = spawn_blocking(move || { + let _enter = span.enter(); + let result = file_server.run(chans, shutdown); + result.expect("file server exited with an error") + }); + join_handle.await +} + +/// Takes a `future` returning a result with an [`Infallible`] Ok-value and +/// a `signal`, and returns a future that completes when the `future` errors or +/// the `signal` completes. +/// If `signal` is sent or cancelled, the `future` is dropped (and not polled +/// anymore). +pub async fn cancel_on_signal(future: F, signal: S) -> Result<(), E> +where + F: Future>, + S: Future, +{ + pin_mut!(future); + pin_mut!(signal); + match select(future, signal).await { + Either::Left((future_result, _)) => match future_result { + Ok(_infallible) => unreachable!("ok value is infallible, thus impossible to reach"), + Err(err) => Err(err), + }, + Either::Right(((), _)) => Ok(()), + } +} + +pub async fn complete_with_deadline_on_signal( + future: F, + signal: S, + deadline: Duration, +) -> Result<::Output, tokio::time::Elapsed> +where + F: Future, + S: Future, +{ + pin_mut!(future); + pin_mut!(signal); + let future = match select(future, signal).await { + Either::Left((future_output, _)) => return Ok(future_output), + Either::Right(((), future)) => future, + }; + pin_mut!(future); + tokio::time::timeout(deadline, future).await +} diff --git a/src/sources/mod.rs b/src/sources/mod.rs index f6f597caf44aa..a2d5ee4e2dac9 100644 --- a/src/sources/mod.rs +++ b/src/sources/mod.rs @@ -15,6 +15,8 @@ pub mod internal_metrics; pub mod journald; #[cfg(all(feature = "sources-kafka", feature = "rdkafka"))] pub mod kafka; +#[cfg(feature = "sources-kubernetes-logs")] +pub mod kubernetes_logs; #[cfg(feature = "sources-logplex")] pub mod logplex; #[cfg(feature = "sources-prometheus")] diff --git a/src/test_util.rs b/src/test_util.rs index ed1bda3466376..9eed3a486559b 100644 --- a/src/test_util.rs +++ b/src/test_util.rs @@ -239,6 +239,16 @@ where rt.block_on(future) } +pub fn block_on_std(future: F) -> F::Output +where + F: std::future::Future + Send + 'static, + F::Output: Send + 'static, +{ + let mut rt = runtime(); + + rt.block_on_std(future) +} + pub fn runtime() -> Runtime { Runtime::single_threaded().unwrap() } diff --git a/src/transforms/mod.rs b/src/transforms/mod.rs index dd8af5e4b5da5..6e1498754cd95 100644 --- a/src/transforms/mod.rs +++ b/src/transforms/mod.rs @@ -1,7 +1,7 @@ use crate::Event; use snafu::Snafu; -mod util; +pub mod util; #[cfg(feature = "transforms-add_fields")] pub mod add_fields;