diff --git a/.github/ISSUE_TEMPLATE/minor-release.md b/.github/ISSUE_TEMPLATE/minor-release.md index 40f950e1f6982..0ea25a30c0432 100644 --- a/.github/ISSUE_TEMPLATE/minor-release.md +++ b/.github/ISSUE_TEMPLATE/minor-release.md @@ -11,6 +11,8 @@ The week before the release: - `git fetch && git checkout origin/master && git checkout -b v0. && git push -u` - [ ] Create a new release preparation branch from `master` - `git checkout -b prepare-v0. && git push -u` +- [ ] Check if there is a newer version of Alpine or Debian available to update the release images + in `distribution/docker/`. Update if so. - [ ] Run `cargo vdev build release-cue` to generate a new cue file for the release - [ ] Add `changelog` key to generated cue file - [ ] `git log --no-merges --cherry-pick --right-only ...` diff --git a/.github/actions/spelling/allow.txt b/.github/actions/spelling/allow.txt index 7b0334fa0e905..d9b13fcf742c0 100644 --- a/.github/actions/spelling/allow.txt +++ b/.github/actions/spelling/allow.txt @@ -52,6 +52,7 @@ Enot Evercoss Explay FAQs +FQDNs Fabro Figma Flipboard @@ -410,6 +411,7 @@ timespan timestamped tzdata ubuntu +upstreaminfo useragents usergroups userguide diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt index a21bb9d750a9a..ed5c241d38b0f 100644 --- a/.github/actions/spelling/expect.txt +++ b/.github/actions/spelling/expect.txt @@ -4,37 +4,26 @@ abcdefghijklmnopqrstuvwxyzand abced abortable acb -Acho ack'ing acking Acq -addrof -addtnl AEAD agentpayload aimd akx allowerased -alphanum -AMF amka -ampersat amping amqps amz amzn -anaana anchore androideabi andy -annanas ansicpg -ansix anumber anycondition -anymap anypb -ANZ apievent apipodspec apk @@ -45,16 +34,12 @@ aqf architecting archivable ARNOTAREALIDD -Arrowz arshiyasolei asdf asdfasdf -asis ASMS -aspiegel assertverify Asterix -Astring asynk atag atx @@ -77,20 +62,14 @@ autospawning autotools avro awscli -awseb awsec awslabs -aww axum Aziz azureresourceid babim -Baca -Bada badunit bak -baq -barbaz barfoo barieom baseof @@ -104,7 +83,6 @@ bcdea benchgen benchmarker benefritz -Berita bev bfb bgwriter @@ -113,7 +91,6 @@ Bincode bindgen bizbaz bla -blabla blabop blafoo blem @@ -121,19 +98,14 @@ blkio Bloblang Blockfi blockmanager -bloob blpop blt -bmobile -bononos bonzai -bools boop booper booperbot bopbla boringssl -boto bottlenecked bpower brackethighlighter @@ -203,7 +175,6 @@ cloudwatchlogs cmark CMK cnf -Coc CODEOWNERS colddb coldline @@ -211,13 +182,11 @@ commonmark comms compactarr compactmap -compiter componenterror componenteventsdropped componenteventsreceived componenteventssent composability -compvalue concating concats condrestart @@ -225,7 +194,6 @@ configkey configmap confl confy -Conkeror consigliere CONTEUDO cooldown @@ -248,28 +216,21 @@ cryptsoft csb cstats csvlog -cta Cthink -cubot customise customizability customtype cwl Dailywarehousing -damerau -darp daschl dashmap dataflows datafuselabs -Datanyze datasources datastream Datelike -dateref datid datname -davidhuie dbkind dbreader DBserver @@ -296,23 +257,19 @@ defattr defaultauthdb deff defghijklmnopqrstuvwxyzand -definit delaycompress deliverystream demoing -derp descriptorpb deser deserializations desync -DEUC developerguide devno DHAVE dhclient diffing diffs -DIGNO disintermediate distrib dld @@ -321,19 +278,16 @@ DNi dnsmsg docsearch docsrs -doesnotcrash dogcats Dogsketch dogsketches dogstatsd domhandler Doop -doot downcasted -DQUOTE -droid droptest dsl +dsmith dstat dstport dtype @@ -348,13 +302,11 @@ eabihf eay ebfcee edenhill -Edg edns eeyun efg efgh Elhage -Eluga emca EMON Emph @@ -362,7 +314,6 @@ emptypb emt Enableable encodable -Encryptor endef endler enduml @@ -372,7 +323,6 @@ enumdecl enumflags ENVARS envsubst -EOI EOIG EOL'ed Erfxl @@ -380,13 +330,11 @@ Err'ing errorf Errorsfor esb -ESPN esque etheus etl ETNUV etsy -EUA eur eventarray evented @@ -396,12 +344,10 @@ eventstoredb eventstreamsink evictingpages evmap -evntslog -EVO evt -EWENE ewma examplegroup +EXAMPLEn exitcodes exprhere extendedstatus @@ -412,20 +358,17 @@ extrepo Failable fakedata falco -fals fanatid fanouts fastcc fastmod fbarbar fbaro -FBAV fbf fcharset fcontext feeney festeburg -FFar ffebda ffeef fffffffff @@ -436,7 +379,6 @@ filecontents filterlist finalizable fingerprinter -firetv fizzaxbbuzz fizzbuzz fkn @@ -446,11 +388,7 @@ Flatbuffers flate flatmaps flattenings -fleep flork -florp -FME -fnclosure fng fnil Fomichev @@ -462,7 +400,6 @@ FOOBARy foobaz foobazbar foobla -foofoo foometric fooo foos @@ -470,6 +407,7 @@ footag footgunning Forcepoint formatdoc +FPa framestream FRecord freerunning @@ -481,20 +419,17 @@ fuchsnj fullhuman futs fuzzcheck -fuzzer fwfiles -FXPDEUC +GAmw GAPI gaugehistogram gaugor GC'ing gcra -gdx geh genericify genproto genrsa -Genx geoffleyland geolite getelementptr @@ -539,7 +474,6 @@ hashring hashset hashsum hba -Hbb hdrhistogram headchunk hec @@ -549,7 +483,6 @@ heka hereregexp herestring hexdump -Hfoo highlighters histo hname @@ -576,7 +509,6 @@ iamanapikey iamasplunkhectoken iana iarna -Ideos idhack IDML ified @@ -590,7 +522,6 @@ incrementalize indexmap indicatif indoc -Inettv influxdata ingesters ingestor @@ -599,19 +530,15 @@ initech Instrumentable interpolatedstring interpretervm -intex invalidauth invokefunction invtrapezium -inzone -IOA ioapiset ionicon iostat iouring iowait IPORHOST -iru isainfo isdbgrid isp @@ -639,7 +566,6 @@ jstype jsvs jszwedko jtype -JUC juchiast karppila kartar @@ -649,7 +575,6 @@ keybase keyclock keyid keypair -keystream keyxxxxx khvzak kib @@ -658,20 +583,20 @@ killproc kinesisfirehose kinit klog +Knx ktff kvlist kvs +Kxs labelmap lalrpop Lamport landingpad -lastitem lastword ldd leebenson leveldb lfd -lfoo libclang LIBGNUTLS liblogging @@ -682,7 +607,6 @@ linting listenfd litstr llen -Lme lnt lntable lntd @@ -692,7 +616,6 @@ logbar logdna logevents logfmt -logid lognamespace lognamespacing logplex @@ -702,20 +625,11 @@ logsense logsev logseverity logtypes -loguid -lookaround -lookupbufs losslessly lpop lpush -LPW -LQuery -LRule -LSQRBRACKET -lstr Luc luciofranco -luckystar lucperkins lukesteensen macports @@ -727,11 +641,9 @@ markability markdownify markdownlintrc marketo -matchall maxdepth maxed maxes -maxint maxs maxwritten maybeanothertest @@ -762,14 +674,11 @@ minioadmin miniodat minwindef mio -mirall misordering -Miui mkcert mkto mlua mmdb -mmdd Mmm moby mockwatchlogs @@ -777,13 +686,10 @@ modulesloaddir mooper moosh Mooshing -morefield moretags -morevalue mortems motivatingly MOZGIII -mpms mre msgpack mskv @@ -793,8 +699,6 @@ msv multiarch multievents multitenant -multiterm -multitermlookahead munging musleabihf muslueabihf @@ -812,12 +716,10 @@ mymachine mypod mytable myvalue -MZX nacked nacks Namazu namespacefoo -nananaman nananana nanosecs nats @@ -827,19 +729,14 @@ nbase ndarray ndjson nearline -nestedkey -NETTV neuronull newcerts newrelix nextest -nfield nfox ngx nightlies nindent -ning -nink nkey nmeta noack @@ -853,7 +750,6 @@ nomac NONALPHANUM nonbare noncycles -nonk nonsending nonstring noog @@ -864,11 +760,8 @@ nopqrstuvwxyz norc norecurse noreplace -norg -norgle norknoog norknork -no_run nosync notext notls @@ -883,40 +776,26 @@ nowin npipe NQTP nresamples -nullandnull nullishness numbackends -numericstart oap -Obar -Obigo OKD omfwd omitempty -ond -Onefootball oneline oneof onezone -onik -onk onlyfields onlyone ooba oobar ook -oopsie opcounters openstring opinsights oplog -opples -OPR optimizable -Optimus -organisations orgid -originsicname ostype otel otelcol @@ -926,10 +805,8 @@ otlp otlphttp ouicompat outputspatterns -outzone overaligned overalignment -overwritable owo oyaml pablosichert @@ -939,12 +816,9 @@ parallelizable pareto partitionable passthrough -patchelf pathbuf pathgen -peci peekable -peeker PEMS pgmajfault PII @@ -956,7 +830,6 @@ plork pnh podspec Ponge -ponk portpicker POSINT postinst @@ -972,9 +845,7 @@ prerot presetdir pretrunc prettydiff -prettytable primaryfont -printstd probot processname procid @@ -987,23 +858,15 @@ protoc protofbuf protosizer Prt -PRTG psv -PTST publickey purgecss pyld -QFn -QGIS -qmobile +Pzb qqq -qstr -queryroot -QUESTIONMARK quickcheck quix quuux -quuz qux quz qwe @@ -1012,7 +875,6 @@ rande RANDFILE rawconfig rawstring -rbaz rdkafka rdparty rdr @@ -1025,18 +887,15 @@ referenceable regexes regexset reinstantiate -Rekonq reloadable remapper remotehost reorganisation reparse -replacen replacepkgs replicaset replset reqwest -rer rereleased reserialize resharding @@ -1044,7 +903,6 @@ resourcemanager respawn restorecon retryable -rhv rkyv rmem rmi @@ -1053,18 +911,10 @@ rmpv rndc rngs rolledback -rootquery -roxmltree rpd rpush -rquery -RRRRRRRLLLLLLLLLLLLLLLLLLLLLLLL -RRule -rsplitn -RSQRBRACKET rstrings RTTs -rulenum runc runhcs rusoto @@ -1077,13 +927,11 @@ Rustinomicon rustls RUSTSEC rustup -rustyline rxi rxmsg rxs ryangjchandler ryu -saanich sadf samehost samenet @@ -1091,12 +939,10 @@ samerole sameuser sandboxed sandboxing -sby sccache schemaless schemars schoen -schucks scl sda sdata @@ -1104,16 +950,13 @@ SDID seahash secfrac Seedable -segmentbuf semanage sematext SEO -sequencenum serie serverlogs serviceaccount servicebus -Seznam sfixed sfrag sghall @@ -1121,8 +964,6 @@ shane sharedstatedir Shenzhen shiplift -shning -shnoog shortcode shortstat should've @@ -1140,7 +981,6 @@ sinknetworkbytessent sizecache Sizefor skinparam -SKIPDATA skywalking slashin slf @@ -1170,18 +1010,14 @@ spencergilbert splitn SPOF spog -spork springframework srcport SREs -SResult sret SRPMS ssekms ssn sspi -SSSZ -SSSZZ sstrings stabilises stackdrive @@ -1199,14 +1035,12 @@ strat strconv streamsink strng -strp structfield subchunks suberr subfolders subfooter sublimelinter -sublocation subsec substrategies subtagline @@ -1219,7 +1053,6 @@ supertrait suser sustainability svalue -Swiftfox Sya sysfs sysinit @@ -1229,7 +1062,6 @@ systemid Syu Szwedko tablesmap -tac tagfoo tagline tagset @@ -1239,18 +1071,11 @@ Takeaways targetgroup tarpit tcmalloc -tdkind -Techvision -tecno -Teleca -Telechips telecom -Telesys templatable templateable templating terabytes -termcolor terraform tes testevent @@ -1265,11 +1090,8 @@ thaweddb thicc Thimphu thinkies -Thisais thiserror thisisastring -thonk -thot threatmanager throughputs thrpt @@ -1288,7 +1110,6 @@ tobz tocbot todos tokio -Tolino Tomola tonydanza toolbars @@ -1296,41 +1117,30 @@ toolchains TOOLSDIRECTORY toolset toor -topbuzz topdir topojson toproto -torvec -Toughpad Toxiproxy Tpng Trauring Treemap Trello -Treo trialled triggerable tripwires Trivago trivy Troutwine -tru TRUSTSTORE TSDB Tsvg turbofish -Twitterbot twox txmsg txs -Tygron typechecked typetag -tzs uap -uaparser -uas -UCWEB udm UIDs uieao @@ -1340,9 +1150,7 @@ unacked undertagline underutilized underutilizing -Unescaping unevictable -ungrokkable unioning unitdir unmark @@ -1351,7 +1159,6 @@ unnests unstructuredlogentries unsync untuple -uol upgradable urql usecase @@ -1359,7 +1166,6 @@ userinfo userlands usermod userpass -ustr uucp UVY uwtable @@ -1367,7 +1173,6 @@ valfoo validpaths Varda vdev -Vdroid VECTORCFG vectordir vectordotdev @@ -1375,19 +1180,15 @@ vectorized vendored veryyyyyyy viceversa -VIERA viewkind visualising -VLC VMs VNQ volumeconfig vrl -VRR vts vvo vvv -VVVVVVVVRRRRRRRRRRRRRRRRR VYa wahhh waitsforfullbatch @@ -1398,16 +1199,11 @@ waninfo wasmtime watchexec watchlogs -Waterfox wayfor webgraphviz webservers websites -webviews weee -weekyear -Wellco -Weltbild wemustcontinuetodiscard wensite whoopsie @@ -1416,7 +1212,6 @@ willreturn winioctl wiredtiger wiremock -with'quote wix wixobj WIXUI @@ -1433,20 +1228,15 @@ writablelib writeback wrongpass wrongsecret -wronly wtcache wtime wtimeouts wtr wurstmeister wwang -xaablabla xact -xbar xcatsy Xcg -XENENE -Xiao xlarge xpack xscale @@ -1455,9 +1245,6 @@ XUtil xvf XVXv xxs -xxxxxxx -xxxxxxxxxxxxxxx -xxxxxxxxxxxxxxxxxxxxxxx xzy YAMLs YBv @@ -1466,20 +1253,18 @@ Yellowdog yippeee yolo YRjhx +Ystd ytt -zalgo zam -Zania ZDfz zerk zibble zieme -Zii zirp +Zkwcmo zoob zoobub zoog -zook zoop zork zorp diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 0097bd40aa4f0..8b1f14d85576a 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -9,7 +9,17 @@ updates: - "domain: deps" commit-message: prefix: "chore(deps)" - open-pull-requests-limit: 10 + open-pull-requests-limit: 100 + - package-ecosystem: "docker" + directory: "/distribution/docker/" + schedule: + interval: "daily" + time: "04:00" # UTC + labels: + - "domain: deps" + commit-message: + prefix: "chore(deps)" + open-pull-requests-limit: 100 - package-ecosystem: "github-actions" directory: "/" schedule: diff --git a/.github/labeler.yml b/.github/labeler.yml index 4ef2df868cff8..4f8b8a336c965 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -23,6 +23,9 @@ "domain: ci": - scripts/**/* +"domain: vdev": +- vdev/**/* + "domain: releasing": - distribution/**/* - scripts/package-* diff --git a/.github/workflows/changes.yml.upstream b/.github/workflows/changes.yml.upstream new file mode 100644 index 0000000000000..db6c4225073e0 --- /dev/null +++ b/.github/workflows/changes.yml.upstream @@ -0,0 +1,227 @@ +# This workflow identifies changes between the base and the head ref, for use in +# other workflows to decide if they should be executed. + +name: Identify Changes + +on: + workflow_call: + # These inputs allow the filter action to be able to access the correct refs for + # comparison in changes detection, it is required as this is called from the + # merge_group context. + inputs: + base_ref: + required: true + type: string + head_ref: + required: true + type: string + int_tests: + required: false + type: boolean + default: false + source: + required: false + type: boolean + default: true + outputs: + source: + value: ${{ jobs.source.outputs.source }} + dependencies: + value: ${{ jobs.source.outputs.dependencies }} + internal_events: + value: ${{ jobs.source.outputs.internal_events }} + cue: + value: ${{ jobs.source.outputs.cue }} + component_docs: + value: ${{ jobs.source.outputs.component_docs }} + markdown: + value: ${{ jobs.source.outputs.markdown }} + install: + value: ${{ jobs.source.outputs.install }} + k8s: + value: ${{ jobs.source.outputs.k8s }} + all-int: + value: ${{ jobs.int_tests.outputs.all-int }} + amqp: + value: ${{ jobs.int_tests.outputs.amqp }} + appsignal: + value: ${{ jobs.int_tests.outputs.appsignal }} + aws: + value: ${{ jobs.int_tests.outputs.aws }} + axiom: + value: ${{ jobs.int_tests.outputs.axiom }} + azure: + value: ${{ jobs.int_tests.outputs.azure }} + clickhouse: + value: ${{ jobs.int_tests.outputs.clickhouse }} + databend: + value: ${{ jobs.int_tests.outputs.databend }} + datadog: + value: ${{ jobs.int_tests.outputs.datadog }} + dnstap: + value: ${{ jobs.int_tests.outputs.dnstap }} + docker-logs: + value: ${{ jobs.int_tests.outputs.docker-logs }} + elasticsearch: + value: ${{ jobs.int_tests.outputs.elasticsearch }} + eventstoredb: + value: ${{ jobs.int_tests.outputs.eventstoredb }} + fluent: + value: ${{ jobs.int_tests.outputs.fluent }} + gcp: + value: ${{ jobs.int_tests.outputs.gcp }} + humio: + value: ${{ jobs.int_tests.outputs.humio }} + http-client: + value: ${{ jobs.int_tests.outputs.http-client }} + influxdb: + value: ${{ jobs.int_tests.outputs.influxdb }} + kafka: + value: ${{ jobs.int_tests.outputs.kafka }} + logstash: + value: ${{ jobs.int_tests.outputs.logstash }} + loki: + value: ${{ jobs.int_tests.outputs.loki }} + mongodb: + value: ${{ jobs.int_tests.outputs.mongodb }} + nats: + value: ${{ jobs.int_tests.outputs.nats }} + nginx: + value: ${{ jobs.int_tests.outputs.nginx }} + opentelemetry: + value: ${{ jobs.int_tests.outputs.opentelemetry }} + postgres: + value: ${{ jobs.int_tests.outputs.postgres }} + prometheus: + value: ${{ jobs.int_tests.outputs.prometheus }} + pulsar: + value: ${{ jobs.int_tests.outputs.pulsar }} + redis: + value: ${{ jobs.int_tests.outputs.redis }} + splunk: + value: ${{ jobs.int_tests.outputs.splunk }} + webhdfs: + value: ${{ jobs.int_tests.outputs.webhdfs }} + +jobs: + # Detects changes that are not specific to integration tests + source: + runs-on: ubuntu-20.04 + if: ${{ inputs.source }} + outputs: + source: ${{ steps.filter.outputs.source }} + dependencies: ${{ steps.filter.outputs.dependencies }} + internal_events: ${{ steps.filter.outputs.internal_events }} + cue: ${{ steps.filter.outputs.cue }} + component_docs: ${{ steps.filter.outputs.component_docs }} + markdown: ${{ steps.filter.outputs.markdown }} + install: ${{ steps.filter.outputs.install }} + k8s: ${{ steps.filter.outputs.k8s }} + steps: + - uses: actions/checkout@v3 + + - uses: dorny/paths-filter@v2 + id: filter + with: + base: ${{ inputs.base_ref }} + ref: ${{ inputs.head_ref }} + filters: | + source: + - ".github/workflows/test.yml" + - ".cargo/**" + - "benches/**" + - "lib/**" + - "proto/**" + - "scripts/**" + - "src/**" + - "tests/**" + - "build.rs" + - "Cargo.lock" + - "Cargo.toml" + - "Makefile" + - "rust-toolchain.toml" + - "vdev/**" + deny: + - 'deny.toml' + - "vdev/**" + dependencies: + - ".cargo/**" + - 'Cargo.toml' + - 'Cargo.lock' + - 'rust-toolchain.toml' + - '.github/workflows/pr.yml' + - 'Makefile' + - 'scripts/cross/**' + - "vdev/**" + cue: + - 'website/cue/**' + - "vdev" + component_docs: + - 'scripts/generate-component-docs.rb' + - "vdev/**" + markdown: + - '**/**.md' + - "vdev/**" + internal_events: + - 'src/internal_events/**' + - "vdev/**" + docker: + - 'distribution/docker/**' + - "vdev/**" + install: + - ".github/workflows/install-sh.yml" + - "distribution/install.sh" + k8s: + - "src/sources/kubernetes_logs/**" + + # Detects changes that are specific to integration tests + int_tests: + runs-on: ubuntu-latest + if: ${{ inputs.int_tests }} + outputs: + all-int: ${{ steps.filter.outputs.all-int}} + amqp: ${{ steps.filter.outputs.amqp }} + appsignal: ${{ steps.filter.outputs.appsignal}} + aws: ${{ steps.filter.outputs.aws }} + axiom: ${{ steps.filter.outputs.axiom }} + azure: ${{ steps.filter.outputs.azure }} + clickhouse: ${{ steps.filter.outputs.clickhouse }} + databend: ${{ steps.filter.outputs.databend }} + datadog: ${{ steps.filter.outputs.datadog }} + dnstap: ${{ steps.filter.outputs.dnstap }} + docker-logs: ${{ steps.filter.outputs.docker-logs }} + elasticsearch: ${{ steps.filter.outputs.elasticsearch }} + eventstoredb: ${{ steps.filter.outputs.eventstoredb }} + fluent: ${{ steps.filter.outputs.fluent }} + gcp: ${{ steps.filter.outputs.gcp }} + humio: ${{ steps.filter.outputs.humio }} + http-client: ${{ steps.filter.outputs.http-client }} + influxdb: ${{ steps.filter.outputs.influxdb }} + kafka: ${{ steps.filter.outputs.kafka }} + logstash: ${{ steps.filter.outputs.logstash }} + loki: ${{ steps.filter.outputs.loki }} + mongodb: ${{ steps.filter.outputs.mongodb }} + nats: ${{ steps.filter.outputs.nats }} + nginx: ${{ steps.filter.outputs.nginx }} + opentelemetry: ${{ steps.filter.outputs.opentelemetry }} + postgres: ${{ steps.filter.outputs.postgres }} + prometheus: ${{ steps.filter.outputs.prometheus }} + pulsar: ${{ steps.filter.outputs.pulsar }} + redis: ${{ steps.filter.outputs.redis }} + splunk: ${{ steps.filter.outputs.splunk }} + webhdfs: ${{ steps.filter.outputs.webhdfs }} + steps: + - uses: actions/checkout@v3 + + # creates a yaml file that contains the filters for each integration, + # extracted from the output of the `vdev int ci-paths` command, which + # sources the paths from the scripts/integration/.../test.yaml files + - name: Create filter rules for integrations + run: cargo vdev int ci-paths > int_test_filters.yaml + + - uses: dorny/paths-filter@v2 + id: filter + with: + base: ${{ inputs.base_ref }} + ref: ${{ inputs.head_ref }} + filters: int_test_filters.yaml diff --git a/.github/workflows/cli.yml.upstream b/.github/workflows/cli.yml.upstream new file mode 100644 index 0000000000000..9d22fda8527bb --- /dev/null +++ b/.github/workflows/cli.yml.upstream @@ -0,0 +1,63 @@ +name: CLI - Linux + +on: + workflow_call: + +jobs: + test-cli: + runs-on: ubuntu-latest + env: + CARGO_INCREMENTAL: 0 + steps: + - name: (PR comment) Get PR branch + if: ${{ github.event_name == 'issue_comment' }} + uses: xt0rted/pull-request-comment-branch@v2 + id: comment-branch + + - name: (PR comment) Set latest commit status as pending + if: ${{ github.event_name == 'issue_comment' }} + uses: myrotvorets/set-commit-status-action@v1.1.7 + with: + sha: ${{ steps.comment-branch.outputs.head_sha }} + token: ${{ secrets.GITHUB_TOKEN }} + context: CLI - Linux + status: pending + + - name: (PR comment) Checkout PR branch + if: ${{ github.event_name == 'issue_comment' }} + uses: actions/checkout@v3 + with: + ref: ${{ steps.comment-branch.outputs.head_ref }} + + - name: Checkout branch + if: ${{ github.event_name != 'issue_comment' }} + uses: actions/checkout@v3 + + - name: Cache Cargo registry + index + uses: actions/cache@v3 + with: + path: | + ~/.cargo/bin/ + ~/.cargo/registry/index/ + ~/.cargo/registry/cache/ + ~/.cargo/git/db/ + key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} + restore-keys: | + ${{ runner.os }}-cargo- + + - run: sudo -E bash scripts/environment/bootstrap-ubuntu-20.04.sh + - run: bash scripts/environment/prepare.sh + - run: echo "::add-matcher::.github/matchers/rust.json" + - run: make test-cli + - name: Upload test results + run: scripts/upload-test-results.sh + if: always() + + - name: (PR comment) Set latest commit status as ${{ job.status }} + uses: myrotvorets/set-commit-status-action@v1.1.7 + if: always() && github.event_name == 'issue_comment' + with: + sha: ${{ steps.comment-branch.outputs.head_sha }} + token: ${{ secrets.GITHUB_TOKEN }} + context: CLI - Linux + status: ${{ job.status }} diff --git a/.github/workflows/comment-trigger.yml.upstream b/.github/workflows/comment-trigger.yml.upstream new file mode 100644 index 0000000000000..84cabd825ccf3 --- /dev/null +++ b/.github/workflows/comment-trigger.yml.upstream @@ -0,0 +1,118 @@ +# Comment Trigger +# +# This workflow is a central point for triggering workflow runs that normally run only as part of the merge queue, +# on demand by a comment. The exception being the integration tests, which have their own workflow file for +# comment triggers as the logic is a bit more complex. +# +# The available triggers are: +# +# /ci-run-all : runs all of the below +# /ci-run-cli : runs CLI - Linux +# /ci-run-misc : runs Miscellaneous - Linux +# /ci-run-component-features : runs Component Features - Linux +# /ci-run-cross : runs Cross +# /ci-run-unit-mac : runs Unit - Mac +# /ci-run-unit-windows : runs Unit - Windows +# /ci-run-environment : runs Environment Suite +# /ci-run-regression : runs Regression Detection Suite + +name: Comment Trigger + +on: + issue_comment: + types: [created] + +env: + DD_ENV: "ci" + RUST_BACKTRACE: full + TEST_LOG: vector=debug + VERBOSE: true + CI: true + PROFILE: debug + # observing issues fetching boringssl via HTTPS in the OSX build, seeing if this helps + # can be removed when we switch back to the upstream openssl-sys crate + CARGO_NET_GIT_FETCH_WITH_CLI: true + +# The below concurrency group settings would let us cancel in progress runs that were triggered with the +# same comment on a given PR, which could save on time consuming runs. +# But GH does not currently support the github.event.comment.body as part of the concurrency name, this +# appears to be due to the potential length of it. +#concurrency: +# group: ${{ github.workflow }}-${{ github.event.issue.id }}-${{ github.event.comment.body }} +# cancel-in-progress: true + +jobs: + validate: + name: Validate comment + runs-on: ubuntu-latest + if: | + github.event.issue.pull_request && ( contains(github.event.comment.body, '/ci-run-all') + || contains(github.event.comment.body, '/ci-run-cli') + || contains(github.event.comment.body, '/ci-run-misc') + || contains(github.event.comment.body, '/ci-run-component-features') + || contains(github.event.comment.body, '/ci-run-cross') + || contains(github.event.comment.body, '/ci-run-unit-mac') + || contains(github.event.comment.body, '/ci-run-unit-windows') + || contains(github.event.comment.body, '/ci-run-environment') + || contains(github.event.comment.body, '/ci-run-regression') + ) + steps: + - name: Get PR comment author + id: comment + uses: tspascoal/get-user-teams-membership@v2 + with: + username: ${{ github.actor }} + team: 'Vector' + GITHUB_TOKEN: ${{ secrets.GH_PAT_ORG }} + + - name: Validate author membership + if: steps.comment.outputs.isTeamMember == 'false' + run: exit 1 + + cli: + needs: validate + if: contains(github.event.comment.body, '/ci-run-all') || contains(github.event.comment.body, '/ci-run-cli') + uses: ./.github/workflows/cli.yml + secrets: inherit + + misc: + needs: validate + if: contains(github.event.comment.body, '/ci-run-all') || contains(github.event.comment.body, '/ci-run-misc') + uses: ./.github/workflows/misc.yml + secrets: inherit + + component-features: + needs: validate + if: contains(github.event.comment.body, '/ci-run-all') || contains(github.event.comment.body, '/ci-run-component-features') + uses: ./.github/workflows/component_features.yml + secrets: inherit + + cross: + needs: validate + if: contains(github.event.comment.body, '/ci-run-all') || contains(github.event.comment.body, '/ci-run-cross') + uses: ./.github/workflows/cross.yml + secrets: inherit + + unit-mac: + needs: validate + if: contains(github.event.comment.body, '/ci-run-all') || contains(github.event.comment.body, '/ci-run-unit-mac') + uses: ./.github/workflows/unit_mac.yml + secrets: inherit + + unit-windows: + needs: validate + if: contains(github.event.comment.body, '/ci-run-all') || contains(github.event.comment.body, '/ci-run-unit-windows') + uses: ./.github/workflows/unit_windows.yml + secrets: inherit + + environment: + needs: validate + if: contains(github.event.comment.body, '/ci-run-all') || contains(github.event.comment.body, '/ci-run-environment') + uses: ./.github/workflows/environment.yml + secrets: inherit + + regression: + needs: validate + if: contains(github.event.comment.body, '/ci-run-all') || contains(github.event.comment.body, '/ci-run-regression') + uses: ./.github/workflows/regression.yml + secrets: inherit diff --git a/.github/workflows/baseline.yml.upstream b/.github/workflows/compilation-timings.yml.upstream similarity index 97% rename from .github/workflows/baseline.yml.upstream rename to .github/workflows/compilation-timings.yml.upstream index 2bc12be8c05c3..e96bea65ea946 100644 --- a/.github/workflows/baseline.yml.upstream +++ b/.github/workflows/compilation-timings.yml.upstream @@ -1,8 +1,8 @@ -# Executes various builds of vector to time the results in order to track build times. +# Executes various builds of vector to time the results in order to track compilation times. # # This workflow is unrelated to the Regression workflow. -name: Baseline Timings +name: Compilation Timings on: workflow_dispatch: diff --git a/.github/workflows/component_features.yml.upstream b/.github/workflows/component_features.yml.upstream new file mode 100644 index 0000000000000..d321a81f17964 --- /dev/null +++ b/.github/workflows/component_features.yml.upstream @@ -0,0 +1,61 @@ +# Component Features - Linux +# +# Validates that each component feature compiles +# +# Runs on: +# - scheduled UTC midnight Tues-Sat +# - on PR comment (see comment-trigger.yml) +# - on demand from github actions UI + +name: Component Features - Linux + +on: + workflow_call: + workflow_dispatch: + schedule: + # At midnight UTC Tue-Sat + - cron: '0 0 * * 2-6' + +jobs: + check-component-features: + # use free tier on schedule and 8 core to expedite results on demand invocation + runs-on: ${{ github.event_name == 'schedule' && 'ubuntu-latest' || fromJSON('["linux", "ubuntu-20.04-8core"]') }} + if: github.event_name == 'issue_comment' || github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' + steps: + - name: (PR comment) Get PR branch + if: github.event_name == 'issue_comment' + uses: xt0rted/pull-request-comment-branch@v2 + id: comment-branch + + - name: (PR comment) Set latest commit status as pending + if: github.event_name == 'issue_comment' + uses: myrotvorets/set-commit-status-action@v1.1.7 + with: + sha: ${{ steps.comment-branch.outputs.head_sha }} + token: ${{ secrets.GITHUB_TOKEN }} + context: Component Features - Linux + status: pending + + - name: (PR comment) Checkout PR branch + if: github.event_name == 'issue_comment' + uses: actions/checkout@v3 + with: + ref: ${{ steps.comment-branch.outputs.head_ref }} + + - name: Checkout branch + if: github.event_name != 'issue_comment' + uses: actions/checkout@v3 + + - run: sudo -E bash scripts/environment/bootstrap-ubuntu-20.04.sh + - run: bash scripts/environment/prepare.sh + - run: echo "::add-matcher::.github/matchers/rust.json" + - run: make check-component-features + + - name: (PR comment) Set latest commit status as ${{ job.status }} + if: always() && github.event_name == 'issue_comment' + uses: myrotvorets/set-commit-status-action@v1.1.7 + with: + sha: ${{ steps.comment-branch.outputs.head_sha }} + token: ${{ secrets.GITHUB_TOKEN }} + context: Component Features - Linux + status: ${{ job.status }} diff --git a/.github/workflows/cross.yml.upstream b/.github/workflows/cross.yml.upstream new file mode 100644 index 0000000000000..9d36cb4cb3240 --- /dev/null +++ b/.github/workflows/cross.yml.upstream @@ -0,0 +1,95 @@ +name: Cross + +on: + workflow_call: + +jobs: + cross-linux: + name: Cross - ${{ matrix.target }} + runs-on: ubuntu-latest + env: + CARGO_INCREMENTAL: 0 + strategy: + matrix: + target: + - x86_64-unknown-linux-gnu + - x86_64-unknown-linux-musl + - aarch64-unknown-linux-gnu + - aarch64-unknown-linux-musl + - armv7-unknown-linux-gnueabihf + - armv7-unknown-linux-musleabihf + steps: + + - name: (PR comment) Get PR branch + if: ${{ github.event_name == 'issue_comment' }} + uses: xt0rted/pull-request-comment-branch@v2 + id: comment-branch + + - name: (PR comment) Set latest commit status as pending + if: ${{ github.event_name == 'issue_comment' }} + uses: myrotvorets/set-commit-status-action@v1.1.7 + with: + sha: ${{ steps.comment-branch.outputs.head_sha }} + token: ${{ secrets.GITHUB_TOKEN }} + context: Cross + status: pending + + - name: (PR comment) Checkout PR branch + if: ${{ github.event_name == 'issue_comment' }} + uses: actions/checkout@v3 + with: + ref: ${{ steps.comment-branch.outputs.head_ref }} + + - name: Checkout branch + if: ${{ github.event_name != 'issue_comment' }} + uses: actions/checkout@v3 + + - uses: actions/cache@v3 + name: Cache Cargo registry + index + with: + path: | + ~/.cargo/bin/ + ~/.cargo/registry/index/ + ~/.cargo/registry/cache/ + ~/.cargo/git/db/ + key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} + restore-keys: | + ${{ runner.os }}-cargo- + + - run: echo "::add-matcher::.github/matchers/rust.json" + - run: 'cargo install cross --version 0.2.4 --force --locked' + # Why is this build, not check? Because we need to make sure the linking phase works. + # aarch64 and musl in particular are notoriously hard to link. + # While it may be tempting to slot a `check` in here for quickness, please don't. + - run: make cross-build-${{ matrix.target }} + - uses: actions/upload-artifact@v3 + with: + name: "vector-debug-${{ matrix.target }}" + path: "./target/${{ matrix.target }}/debug/vector" + + - name: (PR comment) Set latest commit status as failed + uses: myrotvorets/set-commit-status-action@v1.1.7 + if: failure() && github.event_name == 'issue_comment' + with: + sha: ${{ steps.comment-branch.outputs.head_sha }} + token: ${{ secrets.GITHUB_TOKEN }} + context: Cross + status: 'failure' + + update-pr-status: + name: (PR comment) Signal result to PR + runs-on: ubuntu-20.04 + needs: cross-linux + if: needs.cross-linux.result == 'success' && github.event_name == 'issue_comment' + steps: + - name: (PR comment) Get PR branch + uses: xt0rted/pull-request-comment-branch@v2 + id: comment-branch + + - name: (PR comment) Submit PR result as success + uses: myrotvorets/set-commit-status-action@v1.1.7 + with: + sha: ${{ steps.comment-branch.outputs.head_sha }} + token: ${{ secrets.GITHUB_TOKEN }} + context: Cross + status: 'success' diff --git a/.github/workflows/environment.yml.upstream b/.github/workflows/environment.yml.upstream index 2e08442f4eeef..cdddb0a980db8 100644 --- a/.github/workflows/environment.yml.upstream +++ b/.github/workflows/environment.yml.upstream @@ -1,11 +1,11 @@ name: Environment Suite on: - pull_request: {} + workflow_call: + workflow_dispatch: push: branches: - master - workflow_dispatch: env: VERBOSE: true @@ -15,12 +15,34 @@ jobs: publish-new-environment: runs-on: ubuntu-20.04 steps: - - name: Checkout + - name: (PR comment) Get PR branch + if: ${{ github.event_name == 'issue_comment' }} + uses: xt0rted/pull-request-comment-branch@v2 + id: comment-branch + + - name: (PR comment) Set latest commit status as pending + if: ${{ github.event_name == 'issue_comment' }} + uses: myrotvorets/set-commit-status-action@v1.1.7 + with: + sha: ${{ steps.comment-branch.outputs.head_sha }} + token: ${{ secrets.GITHUB_TOKEN }} + context: Environment Suite + status: pending + + - name: (PR comment) Checkout PR branch + if: ${{ github.event_name == 'issue_comment' }} uses: actions/checkout@v3 + with: + ref: ${{ steps.comment-branch.outputs.head_ref }} + + - name: Checkout branch + if: ${{ github.event_name != 'issue_comment' }} + uses: actions/checkout@v3 + - name: Set up QEMU - uses: docker/setup-qemu-action@v2.1.0 + uses: docker/setup-qemu-action@v2.2.0 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2.5.0 + uses: docker/setup-buildx-action@v2.8.0 - name: Login to DockerHub uses: docker/login-action@v2.1.0 if: github.ref == 'refs/heads/master' @@ -29,7 +51,7 @@ jobs: password: ${{ secrets.CI_DOCKER_PASSWORD }} - name: Extract metadata (tags, labels) for Docker id: meta - uses: docker/metadata-action@c4ee3adeed93b1fa6a762f209fb01608c1a22f1e + uses: docker/metadata-action@818d4b7b91585d195f67373fd9cb0332e31a7175 with: images: timberio/vector-dev flavor: | @@ -41,10 +63,19 @@ jobs: org.opencontainers.image.title=Vector development environment org.opencontainers.image.url=https://github.com/vectordotdev/vector - name: Build and push - uses: docker/build-push-action@v4.0.0 + uses: docker/build-push-action@v4.1.1 with: context: . file: ./scripts/environment/Dockerfile push: ${{ github.ref == 'refs/heads/master' }} tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} + + - name: (PR comment) Set latest commit status as ${{ job.status }} + uses: myrotvorets/set-commit-status-action@v1.1.7 + if: always() && github.event_name == 'issue_comment' + with: + sha: ${{ steps.comment-branch.outputs.head_sha }} + token: ${{ secrets.GITHUB_TOKEN }} + context: Environment Suite + status: ${{ job.status }} diff --git a/.github/workflows/gardener_issue_comment.yml.upstream b/.github/workflows/gardener_issue_comment.yml.upstream new file mode 100644 index 0000000000000..8ea89fb315145 --- /dev/null +++ b/.github/workflows/gardener_issue_comment.yml.upstream @@ -0,0 +1,89 @@ +# Gardener Issue Comment +# +# This workflow moves GH issues from the Gardener board's "Blocked / Waiting" column +# to the "Triage", so that the Gardener can assess the issue in light of new information. + +name: Gardener Issue Comment + +on: + issue_comment: + types: [created] + +jobs: + move-to-backlog: + name: Move issues back to Gardener project board Triage + runs-on: ubuntu-latest + if: contains(github.event.issue.url, 'issues') + steps: + - name: Move issue back to Triage if status is Blocked/Waiting + env: + GH_TOKEN: ${{ secrets.GH_PROJECT_PAT }} + run: | + issue_id=${{ github.event.issue.node_id }} + + # IDs fetched from https://docs.github.com/en/graphql/overview/explorer + project_id="PVT_kwDOAQFeYs4AAsTr" # Gardener + status_field_id="PVTF_lADOAQFeYs4AAsTrzgAXRuU" # Status + triage_option_id="2a08fafa" + + # ensures that the issue is already on board but also seems to be the only way to fetch + # the item id + item_id="$(gh api graphql -f query=' + mutation($project_id: ID!, $content_id: ID!) { + addProjectV2ItemById(input: {projectId: $project_id, contentId: $content_id}) { + item { + id + } + } + }' -f project_id="$project_id" -f content_id="$issue_id" -q '.data.addProjectV2ItemById.item.id' + )" + + echo "item_id: $item_id" + + if [ -z "$item_id" ] ; then + echo "Issue not found in Gardener board" + exit 0 + else + echo "Found issue on Gardener board" + fi + + current_status="$(gh api graphql -f query=' + query($item_id: ID!) { + node(id: $item_id) { + ... on ProjectV2Item { + fieldValueByName(name: "Status") { + ... on ProjectV2ItemFieldSingleSelectValue { + name + } + } + } + } + }' -f item_id="$item_id" + )" + + current_status=$(echo $current_status | jq -c -r '.["data"]["node"]["fieldValueByName"]["name"]') + + echo "Current issue status is: '${current_status}'" + + if [ "$current_status" = "Blocked / Waiting" ] ; then + echo "Moving issue from 'Blocked / Waiting' to 'Triage'" + gh api graphql -f query=' + mutation($project_id: ID!, $item_id: ID!, $field_id: ID!, $option_id: String) { + updateProjectV2ItemFieldValue( + input: { + projectId: $project_id + itemId: $item_id + fieldId: $field_id + value: { + singleSelectOptionId: $option_id + } + } + ) { + projectV2Item { + id + } + } + }' -f project_id="$project_id" -f item_id="$item_id" -f field_id="$status_field_id" -f option_id="$triage_option_id" + else + echo "Issue is in '${current_status}', not moving." + fi diff --git a/.github/workflows/gardener_open_pr.yml.upstream b/.github/workflows/gardener_open_pr.yml.upstream index 701bfbacae7f4..e9b0fef67ba05 100644 --- a/.github/workflows/gardener_open_pr.yml.upstream +++ b/.github/workflows/gardener_open_pr.yml.upstream @@ -18,7 +18,7 @@ jobs: with: username: ${{ github.actor }} team: vector - GITHUB_TOKEN: ${{ secrets.GH_PROJECT_PAT }} + GITHUB_TOKEN: ${{ secrets.GH_PAT_ORG }} - uses: actions/add-to-project@v0.5.0 if: ${{ steps.checkVectorMember.outputs.isTeamMember == 'false' }} with: diff --git a/.github/workflows/install-sh.yml.upstream b/.github/workflows/install-sh.yml.upstream index f9619dba9e542..045319a191642 100644 --- a/.github/workflows/install-sh.yml.upstream +++ b/.github/workflows/install-sh.yml.upstream @@ -1,46 +1,71 @@ name: Update install.sh Suite on: - push: - branches: - - master - paths: - - '.github/workflows/install-sh.yml' - - 'distribution/install.sh' + workflow_call: workflow_dispatch: - jobs: + sync-install: runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v3 + - name: (PR comment) Get PR branch + if: ${{ github.event_name == 'issue_comment' }} + uses: xt0rted/pull-request-comment-branch@v2 + id: comment-branch + + - name: (PR comment) Set latest commit status as pending + if: ${{ github.event_name == 'issue_comment' }} + uses: myrotvorets/set-commit-status-action@v1.1.7 + with: + sha: ${{ steps.comment-branch.outputs.head_sha }} + token: ${{ secrets.GITHUB_TOKEN }} + context: Update install.sh Suite + status: pending + + - name: (PR comment) Checkout PR branch + if: ${{ github.event_name == 'issue_comment' }} + uses: actions/checkout@v3 + with: + ref: ${{ steps.comment-branch.outputs.head_ref }} + + - name: Checkout branch + if: ${{ github.event_name != 'issue_comment' }} + uses: actions/checkout@v3 + - run: pip3 install awscli --upgrade --user - env: AWS_ACCESS_KEY_ID: "${{ secrets.CI_AWS_ACCESS_KEY_ID }}" AWS_SECRET_ACCESS_KEY: "${{ secrets.CI_AWS_SECRET_ACCESS_KEY }}" run: make sync-install + - name: (PR comment) Set latest commit status as failed + uses: myrotvorets/set-commit-status-action@v1.1.7 + if: failure() && github.event_name == 'issue_comment' + with: + sha: ${{ steps.comment-branch.outputs.head_sha }} + token: ${{ secrets.GITHUB_TOKEN }} + context: Update install.sh Suite + status: 'failure' + test-install: - needs: - - sync-install + needs: sync-install runs-on: ubuntu-20.04 steps: - run: sudo apt-get install --yes curl bc - run: curl --proto '=https' --tlsv1.2 -sSf https://sh.vector.dev | bash -s -- -y - run: ~/.vector/bin/vector --version - install-shell-failure: - name: install-shell-failure - if: failure() - needs: - - sync-install - - test-install - runs-on: ubuntu-20.04 - steps: - - name: Discord notification - env: - DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK }} - uses: Ilshidur/action-discord@0.3.2 - with: - args: "Update of sh.vector.dev failed: " + - name: (PR comment) Get PR branch + if: github.event_name == 'issue_comment' + uses: xt0rted/pull-request-comment-branch@v2 + id: comment-branch + + - name: (PR comment) Set latest commit status as ${{ job.status }} + if: github.event_name == 'issue_comment' + uses: myrotvorets/set-commit-status-action@v1.1.7 + with: + sha: ${{ steps.comment-branch.outputs.head_sha }} + token: ${{ secrets.GITHUB_TOKEN }} + context: Update install.sh Suite + status: ${{ job.status }} diff --git a/.github/workflows/integration-comment.yml.upstream b/.github/workflows/integration-comment.yml.upstream new file mode 100644 index 0000000000000..d915a956e2bb8 --- /dev/null +++ b/.github/workflows/integration-comment.yml.upstream @@ -0,0 +1,176 @@ +# Integration Test Comment +# +# This workflow runs one or more integration tests triggered by a comment in a PR. +# The comment include '/ci-run-integration'. +# Then, any if the integration names will trigger that specific integration. +# 'all' will trigger every integration to run. +# The order does not matter and can be anywhere inside the comment body. +# +# Examples: +# +# 1. Run a single integration test: +# +# /ci-run-integration amqp +# +# 2. Run three specific integration tests: +# +# /ci-run-integration dnstap redis amqp +# +# 3. Run all integration tests: +# +# /ci-run-integration all + +name: Integration Test Comment + +on: + issue_comment: + types: [created] + +env: + AWS_ACCESS_KEY_ID: "dummy" + AWS_SECRET_ACCESS_KEY: "dummy" + AXIOM_TOKEN: ${{ secrets.AXIOM_TOKEN }} + TEST_APPSIGNAL_PUSH_API_KEY: ${{ secrets.TEST_APPSIGNAL_PUSH_API_KEY }} + TEST_DATADOG_API_KEY: ${{ secrets.CI_TEST_DATADOG_API_KEY }} + CONTAINER_TOOL: "docker" + DD_ENV: "ci" + DD_API_KEY: ${{ secrets.DD_API_KEY }} + RUST_BACKTRACE: full + TEST_LOG: vector=debug + VERBOSE: true + CI: true + PROFILE: debug + +jobs: + prep-pr: + name: (PR comment) Signal pending to PR + runs-on: ubuntu-latest + if: contains(github.event.comment.body, '/ci-run-integration') || contains(github.event.comment.body, '/ci-run-all') + steps: + - name: Get PR comment author + id: comment + uses: tspascoal/get-user-teams-membership@v2 + with: + username: ${{ github.actor }} + team: 'Vector' + GITHUB_TOKEN: ${{ secrets.GH_PAT_ORG }} + + - name: Validate author membership + if: steps.comment.outputs.isTeamMember == 'false' + run: exit 1 + + - name: (PR comment) Get PR branch + uses: xt0rted/pull-request-comment-branch@v2 + id: comment-branch + + - name: (PR comment) Set latest commit status as pending + uses: myrotvorets/set-commit-status-action@v1.1.7 + with: + sha: ${{ steps.comment-branch.outputs.head_sha }} + token: ${{ secrets.GITHUB_TOKEN }} + status: pending + + test-integration: + uses: ./.github/workflows/integration-test.yml + with: + if: ${{ matrix.run.if }} + test_name: ${{ matrix.run.test_name }} + needs: prep-pr + secrets: inherit + strategy: + fail-fast: false + matrix: + run: + - test_name: 'amqp' + if: ${{ contains(github.event.comment.body, '/ci-run-integration-amqp') || contains(github.event.comment.body, '/ci-run-all') }} + - test_name: 'appsignal' + if: ${{ contains(github.event.comment.body, '/ci-run-integration-appsignal') || contains(github.event.comment.body, '/ci-run-all') }} + - test_name: 'aws' + if: ${{ contains(github.event.comment.body, '/ci-run-integration-aws') || contains(github.event.comment.body, '/ci-run-all') }} + - test_name: 'axiom' + if: ${{ contains(github.event.comment.body, '/ci-run-integration-axiom') || contains(github.event.comment.body, '/ci-run-all') }} + - test_name: 'azure' + if: ${{ contains(github.event.comment.body, '/ci-run-integration-azure') || contains(github.event.comment.body, '/ci-run-all') }} + - test_name: 'clickhouse' + if: ${{ contains(github.event.comment.body, '/ci-run-integration-clickhouse') || contains(github.event.comment.body, '/ci-run-all') }} + - test_name: 'databend' + if: ${{ contains(github.event.comment.body, '/ci-run-integration-databend') || contains(github.event.comment.body, '/ci-run-all') }} + - test_name: 'datadog-agent' + if: ${{ contains(github.event.comment.body, '/ci-run-integration-datadog') || contains(github.event.comment.body, '/ci-run-all') }} + - test_name: 'datadog-logs' + if: ${{ contains(github.event.comment.body, '/ci-run-integration-datadog') || contains(github.event.comment.body, '/ci-run-all') }} + - test_name: 'datadog-metrics' + if: ${{ contains(github.event.comment.body, '/ci-run-integration-datadog') || contains(github.event.comment.body, '/ci-run-all') }} + - test_name: 'datadog-traces' + if: ${{ contains(github.event.comment.body, '/ci-run-integration-datadog') || contains(github.event.comment.body, '/ci-run-all') }} + - test_name: 'dnstap' + if: ${{ contains(github.event.comment.body, '/ci-run-integration-dnstap') || contains(github.event.comment.body, '/ci-run-all') }} + - test_name: 'docker-logs' + if: ${{ contains(github.event.comment.body, '/ci-run-integration-docker-logs') || contains(github.event.comment.body, '/ci-run-all') }} + - test_name: 'elasticsearch' + if: ${{ contains(github.event.comment.body, '/ci-run-integration-elasticsearch') || contains(github.event.comment.body, '/ci-run-all') }} + - test_name: 'eventstoredb' + if: ${{ contains(github.event.comment.body, '/ci-run-integration-eventstoredb') || contains(github.event.comment.body, '/ci-run-all') }} + - test_name: 'fluent' + if: ${{ contains(github.event.comment.body, '/ci-run-integration-fluent') || contains(github.event.comment.body, '/ci-run-all') }} + - test_name: 'gcp' + if: ${{ contains(github.event.comment.body, '/ci-run-integration-gcp') || contains(github.event.comment.body, '/ci-run-all') }} + - test_name: 'humio' + if: ${{ contains(github.event.comment.body, '/ci-run-integration-humio') || contains(github.event.comment.body, '/ci-run-all') }} + - test_name: 'http-client' + if: ${{ contains(github.event.comment.body, '/ci-run-integration-http-client') || contains(github.event.comment.body, '/ci-run-all') }} + - test_name: 'influxdb' + if: ${{ contains(github.event.comment.body, '/ci-run-integration-influxdb') || contains(github.event.comment.body, '/ci-run-all') }} + - test_name: 'kafka' + if: ${{ contains(github.event.comment.body, '/ci-run-integration-kafka') || contains(github.event.comment.body, '/ci-run-all') }} + - test_name: 'logstash' + if: ${{ contains(github.event.comment.body, '/ci-run-integration-logstash') || contains(github.event.comment.body, '/ci-run-all') }} + - test_name: 'loki' + if: ${{ contains(github.event.comment.body, '/ci-run-integration-loki') || contains(github.event.comment.body, '/ci-run-all') }} + - test_name: 'mongodb' + if: ${{ contains(github.event.comment.body, '/ci-run-integration-mongodb') || contains(github.event.comment.body, '/ci-run-all') }} + - test_name: 'nats' + if: ${{ contains(github.event.comment.body, '/ci-run-integration-nats') || contains(github.event.comment.body, '/ci-run-all') }} + - test_name: 'nginx' + if: ${{ contains(github.event.comment.body, '/ci-run-integration-nginx') || contains(github.event.comment.body, '/ci-run-all') }} + - test_name: 'opentelemetry' + if: ${{ contains(github.event.comment.body, '/ci-run-integration-opentelemetry') || contains(github.event.comment.body, '/ci-run-all') }} + - test_name: 'postgres' + if: ${{ contains(github.event.comment.body, '/ci-run-integration-postgres') || contains(github.event.comment.body, '/ci-run-all') }} + - test_name: 'prometheus' + if: ${{ contains(github.event.comment.body, '/ci-run-integration-prometheus') || contains(github.event.comment.body, '/ci-run-all') }} + - test_name: 'pulsar' + if: ${{ contains(github.event.comment.body, '/ci-run-integration-pulsar') || contains(github.event.comment.body, '/ci-run-all') }} + - test_name: 'redis' + if: ${{ contains(github.event.comment.body, '/ci-run-integration-redis') || contains(github.event.comment.body, '/ci-run-all') }} + - test_name: 'shutdown' + if: ${{ contains(github.event.comment.body, '/ci-run-integration-shutdown') || contains(github.event.comment.body, '/ci-run-all') }} + - test_name: 'splunk' + if: ${{ contains(github.event.comment.body, '/ci-run-integration-splunk') || contains(github.event.comment.body, '/ci-run-all') }} + - test_name: 'webhdfs' + if: ${{ contains(github.event.comment.body, '/ci-run-integration-webhdfs') || contains(github.event.comment.body, '/ci-run-all') }} + + update-pr-status: + name: Signal result to PR + runs-on: ubuntu-latest + needs: test-integration + if: always() && (contains(github.event.comment.body, '/ci-run-integration') || contains(github.event.comment.body, '/ci-run-all')) + steps: + - name: Validate issue comment + if: github.event_name == 'issue_comment' + uses: tspascoal/get-user-teams-membership@v2 + with: + username: ${{ github.actor }} + team: 'Vector' + GITHUB_TOKEN: ${{ secrets.GH_PAT_ORG }} + + - name: (PR comment) Get PR branch + uses: xt0rted/pull-request-comment-branch@v2 + id: comment-branch + + - name: (PR comment) Submit PR result as ${{ needs.test-integration.result }} + uses: myrotvorets/set-commit-status-action@v1.1.7 + with: + sha: ${{ steps.comment-branch.outputs.head_sha }} + token: ${{ secrets.GITHUB_TOKEN }} + status: ${{ needs.test-integration.result }} diff --git a/.github/workflows/integration-test.yml.upstream b/.github/workflows/integration-test.yml.upstream index beef0d404454a..4d8d635fcb477 100644 --- a/.github/workflows/integration-test.yml.upstream +++ b/.github/workflows/integration-test.yml.upstream @@ -1,33 +1,28 @@ -name: Integration Test Suite +# This workflow is used to run an integration test. +# The most common use case is that it is triggered by another workflow, +# such as the Master Merge Queue Suite, or the Integration Comment. +# +# It can also be triggered on manual dispatch in CI however. +# In that use case, an input for the test name needs to be provided. +# TODO: check if the input is "all" , and run all, without a timeout? + +name: Integration Test on: + workflow_call: + inputs: + if: + required: false + type: boolean + test_name: + required: true + type: string workflow_dispatch: - push: - branches: - - master - paths: - - ".github/workflows/integration-test.yml" - - ".cargo/**" - - "benches/**" - - "lib/**" - - "proto/**" - - "scripts/**" - - "src/**" - - "tests/**" - - "build.rs" - - "Cargo.lock" - - "Cargo.toml" - - "Makefile" - - "rust-toolchain" - pull_request: - -concurrency: - # For pull requests, cancel running workflows, for master, run all - # - # `github.event.number` exists for pull requests, otherwise fall back to SHA - # for master - group: ${{ github.workflow }}-${{ github.event.number || github.sha }} - cancel-in-progress: true + inputs: + test_name: + description: "Which integration to test." + required: true + type: string env: AWS_ACCESS_KEY_ID: "dummy" @@ -45,84 +40,36 @@ env: jobs: test-integration: - name: Integration - Linux, ${{ matrix.test }} - runs-on: [linux, ubuntu-20.04-8core] - if: | - !github.event.pull_request - || contains(github.event.pull_request.labels.*.name, 'ci-condition: integration tests enable') - strategy: - fail-fast: false - matrix: - include: - - test: 'amqp' - - test: 'appsignal' - - test: 'aws' - - test: 'axiom' - - test: 'azure' - - test: 'clickhouse' - - test: 'databend' - - test: 'datadog-agent' - - test: 'datadog-logs' - - test: 'datadog-metrics' - - test: 'datadog-traces' - - test: 'dnstap' - - test: 'docker-logs' - - test: 'elasticsearch' - - test: 'eventstoredb' - - test: 'fluent' - - test: 'gcp' - - test: 'humio' - - test: 'http-client' - - test: 'influxdb' - - test: 'kafka' - - test: 'logstash' - - test: 'loki' - - test: 'mongodb' - - test: 'nats' - - test: 'nginx' - - test: 'postgres' - - test: 'prometheus' - - test: 'pulsar' - - test: 'redis' - - test: 'shutdown' - - test: 'splunk' - - test: 'webhdfs' - timeout-minutes: 30 + runs-on: [linux, ubuntu-20.04-4core] + timeout-minutes: 40 + if: inputs.if || github.event_name == 'workflow_dispatch' steps: - - uses: actions/checkout@v3 + - name: (PR comment) Get PR branch + if: ${{ github.event_name == 'issue_comment' }} + uses: xt0rted/pull-request-comment-branch@v2 + id: comment-branch + + - name: (PR comment) Checkout PR branch + if: ${{ github.event_name == 'issue_comment' }} + uses: actions/checkout@v3 + with: + ref: ${{ steps.comment-branch.outputs.head_ref }} + + - name: Checkout branch + if: ${{ github.event_name != 'issue_comment' }} + uses: actions/checkout@v3 + - run: sudo npm -g install @datadog/datadog-ci - - run: make test-integration-${{ matrix.test }} + + - run: make test-integration-${{ inputs.test_name }} env: TEST_DATADOG_API_KEY: ${{ secrets.CI_TEST_DATADOG_API_KEY }} - SPLUNK_VERSION: ${{ matrix.env.SPLUNK_VERSION }} + - name: Upload test results run: scripts/upload-test-results.sh if: always() - - run: make test-integration-${{ matrix.test }}-cleanup + + - run: make test-integration-${{ inputs.test_name }}-cleanup if: ${{ always() }} env: TEST_DATADOG_API_KEY: ${{ secrets.CI_TEST_DATADOG_API_KEY }} - SPLUNK_VERSION: ${{ matrix.env.SPLUNK_VERSION }} - - test-integration-check: - name: test-integration-check - runs-on: ubuntu-20.04 - needs: - - test-integration - steps: - - name: validate - run: echo "OK" - - master-failure: - name: master-failure - if: failure() && github.ref == 'refs/heads/master' - needs: - - test-integration-check - runs-on: ubuntu-20.04 - steps: - - name: Discord notification - env: - DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK }} - uses: Ilshidur/action-discord@0.3.2 - with: - args: "Master integration tests failed: " diff --git a/.github/workflows/integration.yml.upstream b/.github/workflows/integration.yml.upstream new file mode 100644 index 0000000000000..7f85870e7dff4 --- /dev/null +++ b/.github/workflows/integration.yml.upstream @@ -0,0 +1,142 @@ +# Integration Test Suite +# +# This workflow runs the integration tests. If the workflow is triggered in the merge queue, all integration tests +# are run. If the workflow is triggered in a PR commit, then the files changed in the PR are evaluated to determine +# if any integration tests will run. + +name: Integration Test Suite + +on: + pull_request: + merge_group: + types: [checks_requested] + +concurrency: + # `github.event.number` exists for pull requests, otherwise fall back to SHA for merge queue + group: ${{ github.workflow }}-${{ github.event.number || github.event.merge_group.head_sha }} + cancel-in-progress: true + +env: + AWS_ACCESS_KEY_ID: "dummy" + AWS_SECRET_ACCESS_KEY: "dummy" + CONTAINER_TOOL: "docker" + DD_ENV: "ci" + DD_API_KEY: ${{ secrets.DD_API_KEY }} + RUST_BACKTRACE: full + TEST_LOG: vector=debug + VERBOSE: true + CI: true + PROFILE: debug + # observing issues fetching boringssl via HTTPS in the OSX build, seeing if this helps + # can be removed when we switch back to the upstream openssl-sys crate + CARGO_NET_GIT_FETCH_WITH_CLI: true + +jobs: + + changes: + uses: ./.github/workflows/changes.yml + with: + base_ref: ${{ github.event.pull_request.base.ref }} + head_ref: ${{ github.event.pull_request.head.ref }} + source: false + int_tests: true + secrets: inherit + + # Calls the Integration Test workflow for each integration that was detected to have files changed that impact it. + integration-matrix: + uses: ./.github/workflows/integration-test.yml + with: + if: ${{ matrix.run.if }} + test_name: ${{ matrix.run.test_name }} + secrets: inherit + needs: changes + strategy: + fail-fast: false + matrix: + run: + - test_name: 'amqp' + if: ${{ github.event_name == 'merge_group' || needs.changes.outputs.int-all == 'true' || needs.changes.outputs.amqp == 'true' }} + - test_name: 'appsignal' + if: ${{ github.event_name == 'merge_group' || needs.changes.outputs.int-all == 'true' || needs.changes.outputs.appsignal == 'true' }} + - test_name: 'aws' + if: ${{ github.event_name == 'merge_group' || needs.changes.outputs.int-all == 'true' || needs.changes.outputs.aws == 'true' }} + - test_name: 'axiom' + if: ${{ github.event_name == 'merge_group' || needs.changes.outputs.int-all == 'true' || needs.changes.outputs.axiom == 'true' }} + - test_name: 'azure' + if: ${{ github.event_name == 'merge_group' || needs.changes.outputs.int-all == 'true' || needs.changes.outputs.azure == 'true' }} + - test_name: 'clickhouse' + if: ${{ github.event_name == 'merge_group' || needs.changes.outputs.int-all == 'true' || needs.changes.outputs.clickhouse == 'true' }} + - test_name: 'databend' + if: ${{ github.event_name == 'merge_group' || needs.changes.outputs.int-all == 'true' || needs.changes.outputs.databend == 'true' }} + - test_name: 'datadog-agent' + if: ${{ github.event_name == 'merge_group' || needs.changes.outputs.int-all == 'true' || needs.changes.outputs.datadog == 'true' }} + - test_name: 'datadog-logs' + if: ${{ github.event_name == 'merge_group' || needs.changes.outputs.int-all == 'true' || needs.changes.outputs.datadog == 'true' }} + - test_name: 'datadog-metrics' + if: ${{ github.event_name == 'merge_group' || needs.changes.outputs.int-all == 'true' || needs.changes.outputs.datadog == 'true' }} + - test_name: 'datadog-traces' + if: ${{ github.event_name == 'merge_group' || needs.changes.outputs.int-all == 'true' || needs.changes.outputs.datadog == 'true' }} + - test_name: 'dnstap' + if: ${{ github.event_name == 'merge_group' || needs.changes.outputs.int-all == 'true' || needs.changes.outputs.dnstap == 'true' }} + - test_name: 'docker-logs' + if: ${{ github.event_name == 'merge_group' || needs.changes.outputs.int-all == 'true' || needs.changes.outputs.docker-logs == 'true' }} + - test_name: 'elasticsearch' + if: ${{ github.event_name == 'merge_group' || needs.changes.outputs.int-all == 'true' || needs.changes.outputs.elasticsearch == 'true' }} + - test_name: 'eventstoredb' + if: ${{ github.event_name == 'merge_group' || needs.changes.outputs.int-all == 'true' || needs.changes.outputs.eventstoredb == 'true' }} + - test_name: 'fluent' + if: ${{ github.event_name == 'merge_group' || needs.changes.outputs.int-all == 'true' || needs.changes.outputs.fluent == 'true' }} + - test_name: 'gcp' + if: ${{ github.event_name == 'merge_group' || needs.changes.outputs.int-all == 'true' || needs.changes.outputs.gcp == 'true' }} + - test_name: 'humio' + if: ${{ github.event_name == 'merge_group' || needs.changes.outputs.int-all == 'true' || needs.changes.outputs.humio == 'true' }} + - test_name: 'http-client' + if: ${{ github.event_name == 'merge_group' || needs.changes.outputs.int-all == 'true' || needs.changes.outputs.http-client == 'true' }} + - test_name: 'influxdb' + if: ${{ github.event_name == 'merge_group' || needs.changes.outputs.int-all == 'true' || needs.changes.outputs.influxdb == 'true' }} + - test_name: 'kafka' + if: ${{ github.event_name == 'merge_group' || needs.changes.outputs.int-all == 'true' || needs.changes.outputs.kafka == 'true' }} + - test_name: 'logstash' + if: ${{ github.event_name == 'merge_group' || needs.changes.outputs.int-all == 'true' || needs.changes.outputs.logstash == 'true' }} + - test_name: 'loki' + if: ${{ github.event_name == 'merge_group' || needs.changes.outputs.int-all == 'true' || needs.changes.outputs.loki == 'true' }} + - test_name: 'mongodb' + if: ${{ github.event_name == 'merge_group' || needs.changes.outputs.int-all == 'true' || needs.changes.outputs.mongodb == 'true' }} + - test_name: 'nats' + if: ${{ github.event_name == 'merge_group' || needs.changes.outputs.int-all == 'true' || needs.changes.outputs.nats == 'true' }} + - test_name: 'nginx' + if: ${{ github.event_name == 'merge_group' || needs.changes.outputs.int-all == 'true' || needs.changes.outputs.nginx == 'true' }} + - test_name: 'opentelemetry' + if: ${{ github.event_name == 'merge_group' || needs.changes.outputs.int-all == 'true' || needs.changes.outputs.opentelemetry == 'true' }} + - test_name: 'postgres' + if: ${{ github.event_name == 'merge_group' || needs.changes.outputs.int-all == 'true' || needs.changes.outputs.postgres == 'true' }} + - test_name: 'prometheus' + if: ${{ github.event_name == 'merge_group' || needs.changes.outputs.int-all == 'true' || needs.changes.outputs.prometheus == 'true' }} + - test_name: 'pulsar' + if: ${{ github.event_name == 'merge_group' || needs.changes.outputs.int-all == 'true' || needs.changes.outputs.pulsar == 'true' }} + - test_name: 'redis' + if: ${{ github.event_name == 'merge_group' || needs.changes.outputs.int-all == 'true' || needs.changes.outputs.redis == 'true' }} + - test_name: 'shutdown' + if: ${{ github.event_name == 'merge_group' || needs.changes.outputs.int-all == 'true' }} + - test_name: 'splunk' + if: ${{ github.event_name == 'merge_group' || needs.changes.outputs.int-all == 'true' || needs.changes.outputs.splunk == 'true' }} + - test_name: 'webhdfs' + if: ${{ github.event_name == 'merge_group' || needs.changes.outputs.int-all == 'true' || needs.changes.outputs.webhdfs == 'true' }} + + # This is a required status check, so it always needs to run if prior jobs failed, in order to mark the status correctly. + integration: + name: Integration Test Suite + runs-on: ubuntu-latest + if: always() + needs: + - integration-matrix + env: + FAILED: ${{ contains(needs.*.result, 'failure') }} + steps: + - run: | + echo "failed=${{ env.FAILED }}" + if [[ "$FAILED" == "true" ]] ; then + exit 1 + else + exit 0 + fi diff --git a/.github/workflows/k8s_e2e.yml.upstream b/.github/workflows/k8s_e2e.yml.upstream index dd1a0a13ac490..e7faa3a59555a 100644 --- a/.github/workflows/k8s_e2e.yml.upstream +++ b/.github/workflows/k8s_e2e.yml.upstream @@ -1,33 +1,33 @@ +# K8s E2E Suite +# +# This workflow runs under any of the following conditions: +# - manual dispatch in GH UI +# - on a PR commit if the kubernetes_logs source was changed +# - in the merge queue +# - on a schedule at midnight UTC Tue-Sat +# - on demand by either of the following comments in a PR: +# - '/ci-run-k8s' +# - '/ci-run-all' +# +# If the workflow trigger is the nightly schedule, all the k8s versions +# are run in the matrix, otherwise, only the latest is run. + name: K8S E2E Suite on: workflow_dispatch: - push: - branches: - - master - paths: - - ".github/workflows/k8s_e2e.yml" - - ".cargo/**" - - "benches/**" - - "lib/**" - - "proto/**" - - "scripts/**" - - "src/**" - - "tests/**" - - "build.rs" - - "Cargo.lock" - - "Cargo.toml" - - "Makefile" - - "rust-toolchain" - - "distribution/**" pull_request: + issue_comment: + types: [created] + merge_group: + types: [checks_requested] + schedule: + # At midnight UTC Tue-Sat + - cron: '0 0 * * 2-6' concurrency: - # For pull requests, cancel running workflows, for master, run all - # - # `github.event.number` exists for pull requests, otherwise fall back to SHA - # for master - group: ${{ github.workflow }}-${{ github.event.number || github.sha }} + group: ${{ github.workflow }}-${{ github.event.number || github.event.comment.html_url || github.event.merge_group.head_sha || github.event.schedule || github.sha }} + cancel-in-progress: true env: @@ -42,35 +42,96 @@ env: PROFILE: debug jobs: + validate: + name: Validate comment + runs-on: ubuntu-latest + if: | + github.event_name != 'issue_comment' || + ( github.event.issue.pull_request && + ( contains(github.event.comment.body, '/ci-run-all') || + contains(github.event.comment.body, '/ci-run-k8s') + ) + ) + steps: + - name: Get PR comment author + id: comment + uses: tspascoal/get-user-teams-membership@v2 + with: + username: ${{ github.actor }} + team: 'Vector' + GITHUB_TOKEN: ${{ secrets.GH_PAT_ORG }} + + - name: Validate author membership + if: steps.comment.outputs.isTeamMember == 'false' + run: exit 1 + + changes: + needs: validate + uses: ./.github/workflows/changes.yml + with: + base_ref: ${{ github.event.pull_request.base.ref }} + head_ref: ${{ github.event.pull_request.head.ref }} + secrets: inherit + build-x86_64-unknown-linux-gnu: name: Build - x86_64-unknown-linux-gnu - runs-on: [linux, ubuntu-20.04-8core] - if: | - !github.event.pull_request - || contains(github.event.pull_request.labels.*.name, 'ci-condition: k8s e2e tests enable') - || contains(github.event.pull_request.labels.*.name, 'ci-condition: k8s e2e all targets') + runs-on: [linux, ubuntu-20.04-4core] + needs: [changes, validate] + if: github.event_name != 'pull_request' || needs.changes.outputs.k8s == 'true' # cargo-deb requires a release build, but we don't need optimizations for tests env: CARGO_PROFILE_RELEASE_OPT_LEVEL: 0 CARGO_PROFILE_RELEASE_CODEGEN_UNITS: 256 CARGO_INCREMENTAL: 0 steps: - - uses: actions/checkout@v3 + - name: (PR comment) Get PR branch + if: ${{ github.event_name == 'issue_comment' }} + uses: xt0rted/pull-request-comment-branch@v2 + id: comment-branch + + - name: (PR comment) Set latest commit status as pending + if: ${{ github.event_name == 'issue_comment' }} + uses: myrotvorets/set-commit-status-action@v1.1.7 + with: + sha: ${{ steps.comment-branch.outputs.head_sha }} + token: ${{ secrets.GITHUB_TOKEN }} + status: pending + + - name: (PR comment) Checkout PR branch + if: ${{ github.event_name == 'issue_comment' }} + uses: actions/checkout@v3 + with: + ref: ${{ steps.comment-branch.outputs.head_ref }} + + - name: Checkout branch + if: ${{ github.event_name != 'issue_comment' }} + uses: actions/checkout@v3 + - uses: actions/cache@v3 with: path: | ~/.cargo/registry ~/.cargo/git key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} + - run: sudo -E bash scripts/environment/bootstrap-ubuntu-20.04.sh - run: bash scripts/environment/prepare.sh - run: echo "::add-matcher::.github/matchers/rust.json" - run: VECTOR_VERSION="$(cargo vdev version)" make package-deb-x86_64-unknown-linux-gnu + - uses: actions/upload-artifact@v3 with: name: e2e-test-deb-package path: target/artifacts/* + - name: (PR comment) Set latest commit status as 'failure' + uses: myrotvorets/set-commit-status-action@v1.1.7 + if: failure() && github.event_name == 'issue_comment' + with: + sha: ${{ steps.comment-branch.outputs.head_sha }} + token: ${{ secrets.GITHUB_TOKEN }} + status: 'failure' + # GitHub Actions don't support `matrix` at the job-level `if:` condition. # We apply this workaround - compute `matrix` in a preceding job, and assign # it's value dynamically at the actual test job. @@ -79,13 +140,11 @@ jobs: # See https://github.community/t/feature-request-and-use-case-example-to-allow-matrix-in-if-s/126067 compute-k8s-test-plan: name: Compute K8s test plan - runs-on: [linux, ubuntu-20.04-8core] + runs-on: ubuntu-latest + needs: [changes, validate] + if: github.event_name != 'pull_request' || needs.changes.outputs.k8s == 'true' outputs: matrix: ${{ steps.set-matrix.outputs.matrix }} - if: | - !github.event.pull_request - || contains(github.event.pull_request.labels.*.name, 'ci-condition: k8s e2e tests enable') - || contains(github.event.pull_request.labels.*.name, 'ci-condition: k8s e2e all targets') steps: - uses: actions/github-script@v6.4.1 id: set-matrix @@ -101,11 +160,11 @@ jobs: // https://cloud.google.com/kubernetes-engine/docs/release-notes // https://docs.microsoft.com/en-us/azure/aks/supported-kubernetes-versions?tabs=azure-cli#aks-kubernetes-release-calendar const kubernetes_version = [ - { version: "v1.23.3", is_essential: true }, - { version: "v1.22.5", is_essential: true }, - { version: "v1.21.8", is_essential: true }, - { version: "v1.20.14", is_essential: true }, - { version: "v1.19.8" }, + { version: "v1.23.3", is_essential: true }, + { version: "v1.22.5", is_essential: false }, + { version: "v1.21.8", is_essential: false }, + { version: "v1.20.14", is_essential: false }, + { version: "v1.19.8", is_essential: false }, ] const container_runtime = [ "docker", @@ -113,15 +172,10 @@ jobs: // https://github.com/kubernetes/minikube/issues/12928 // "crio", ] - const ci_condition_label = 'ci-condition: k8s e2e all targets' - // Planing. - const is_in_pull_request = !!context.payload.pull_request; - const should_test_all_targets = ( - !is_in_pull_request || - context.payload.pull_request.labels.some(label => label.name === ci_condition_label) - ) - const filter_targets = array => array.filter(val => should_test_all_targets || val.is_essential) + // Run all versions if triggered by nightly schedule. Otherwise only run latest. + const run_all = context.eventName == "schedule"; + const filter_targets = array => array.filter(val => run_all || val.is_essential) const matrix = { minikube_version, @@ -139,44 +193,83 @@ jobs: test-e2e-kubernetes: name: K8s ${{ matrix.kubernetes_version.version }} / ${{ matrix.container_runtime }} (${{ matrix.kubernetes_version.role }}) - runs-on: [linux, ubuntu-20.04-8core] + runs-on: [linux, ubuntu-20.04-4core] needs: + - validate - build-x86_64-unknown-linux-gnu - compute-k8s-test-plan strategy: matrix: ${{ fromJson(needs.compute-k8s-test-plan.outputs.matrix) }} fail-fast: false steps: - - name: Checkout + - name: (PR comment) Get PR branch + if: ${{ github.event_name == 'issue_comment' }} + uses: xt0rted/pull-request-comment-branch@v2 + id: comment-branch + + - name: (PR comment) Checkout PR branch + if: ${{ github.event_name == 'issue_comment' }} + uses: actions/checkout@v3 + with: + ref: ${{ steps.comment-branch.outputs.head_ref }} + + - name: Checkout branch + if: ${{ github.event_name != 'issue_comment' }} uses: actions/checkout@v3 + - uses: actions/download-artifact@v3 with: name: e2e-test-deb-package path: target/artifacts + - name: Setup Minikube run: scripts/ci-setup-minikube.sh env: KUBERNETES_VERSION: ${{ matrix.kubernetes_version.version }} MINIKUBE_VERSION: ${{ matrix.minikube_version }} CONTAINER_RUNTIME: ${{ matrix.container_runtime }} + - run: make test-e2e-kubernetes env: USE_MINIKUBE_CACHE: "true" SKIP_PACKAGE_DEB: "true" CARGO_INCREMENTAL: 0 - master-failure: - name: master-failure - if: failure() && github.ref == 'refs/heads/master' - needs: - - build-x86_64-unknown-linux-gnu - - compute-k8s-test-plan - - test-e2e-kubernetes - runs-on: ubuntu-20.04 + - name: (PR comment) Set latest commit status as failure + uses: myrotvorets/set-commit-status-action@v1.1.7 + if: failure() && github.event_name == 'issue_comment' + with: + sha: ${{ steps.comment-branch.outputs.head_sha }} + token: ${{ secrets.GITHUB_TOKEN }} + status: 'failure' + + final-result: + name: K8s E2E Suite + runs-on: ubuntu-latest + needs: [test-e2e-kubernetes, validate] + if: | + always() && (github.event_name != 'issue_comment' || (github.event.issue.pull_request + && (contains(github.event.comment.body, '/ci-run-k8s') || contains(github.event.comment.body, '/ci-run-all')))) + env: + FAILED: ${{ contains(needs.*.result, 'failure') }} steps: - - name: Discord notification - env: - DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK }} - uses: Ilshidur/action-discord@0.3.2 - with: - args: "Master k8s e2e tests failed: " + - name: (PR comment) Get PR branch + if: success() && github.event_name == 'issue_comment' + uses: xt0rted/pull-request-comment-branch@v2 + id: comment-branch + + - name: (PR comment) Submit PR result as success + if: success() && github.event_name == 'issue_comment' + uses: myrotvorets/set-commit-status-action@v1.1.7 + with: + sha: ${{ steps.comment-branch.outputs.head_sha }} + token: ${{ secrets.GITHUB_TOKEN }} + status: 'success' + + - run: | + echo "failed=${{ env.FAILED }}" + if [[ "$FAILED" == "true" ]] ; then + exit 1 + else + exit 0 + fi diff --git a/.github/workflows/master_merge_queue.yml.upstream b/.github/workflows/master_merge_queue.yml.upstream new file mode 100644 index 0000000000000..63b0a8805a2dd --- /dev/null +++ b/.github/workflows/master_merge_queue.yml.upstream @@ -0,0 +1,128 @@ +# Master Merge Queue Test Suite +# +# This workflow orchestrates a collection of workflows that are required for the merge queue check. +# +# Most of the workflows that are jobs within this one, are able to be run on demand +# by issuing a PR comment with the respective command to trigger said workflow. +# +# The design of this workflow relies on the first real job "changes" to detect file +# changes against the base, and each downstream workflow after that will only be +# called if the files for that area have changed. +# + +name: Master Merge Queue Test Suite + +on: + # Only want to run this on merge queue, but because GH doesn't allow specifying different required checks + # for pull request and merge queue, we need to "run" it in pull request, but in the jobs we will just auto pass. + pull_request: + merge_group: + types: [checks_requested] + +concurrency: + # `github.event.number` exists for pull requests, otherwise fall back to SHA for merge queue + group: ${{ github.workflow }}-${{ github.event.number || github.event.merge_group.head_sha }} + cancel-in-progress: true + +env: + AWS_ACCESS_KEY_ID: "dummy" + AWS_SECRET_ACCESS_KEY: "dummy" + CONTAINER_TOOL: "docker" + DD_ENV: "ci" + DD_API_KEY: ${{ secrets.DD_API_KEY }} + RUST_BACKTRACE: full + TEST_LOG: vector=debug + VERBOSE: true + CI: true + PROFILE: debug + # observing issues fetching boringssl via HTTPS in the OSX build, seeing if this helps + # can be removed when we switch back to the upstream openssl-sys crate + CARGO_NET_GIT_FETCH_WITH_CLI: true + +jobs: + # This is the entry job which is required for all the actual tests in this workflow. + # If we don't run this job (such as in a pull request), then by consequence all downstream + # test jobs are not run. This allows us to not have to check for merge group in each job. + changes: + if: ${{ github.event_name == 'merge_group' }} + uses: ./.github/workflows/changes.yml + with: + base_ref: ${{ github.event.merge_group.base_ref }} + head_ref: ${{ github.event.merge_group.head_ref }} + secrets: inherit + + test-cli: + if: needs.changes.outputs.source == 'true' + uses: ./.github/workflows/cli.yml + needs: changes + secrets: inherit + + test-misc: + if: needs.changes.outputs.source == 'true' + uses: ./.github/workflows/misc.yml + needs: changes + secrets: inherit + + test-environment: + uses: ./.github/workflows/environment.yml + needs: changes + secrets: inherit + + check-msrv: + if: needs.changes.outputs.source == 'true' + uses: ./.github/workflows/msrv.yml + needs: changes + secrets: inherit + + cross-linux: + # We run cross checks when dependencies change to ensure they still build. + # This helps us avoid adopting dependencies that aren't compatible with other architectures. + if: needs.changes.outputs.dependencies == 'true' + uses: ./.github/workflows/cross.yml + needs: changes + secrets: inherit + + unit-mac: + if: needs.changes.outputs.source == 'true' + uses: ./.github/workflows/unit_mac.yml + needs: changes + secrets: inherit + + unit-windows: + if: needs.changes.outputs.source == 'true' + uses: ./.github/workflows/unit_windows.yml + needs: changes + secrets: inherit + + install-sh: + if: needs.changes.outputs.install == 'true' + uses: ./.github/workflows/install-sh.yml + needs: changes + secrets: inherit + + master-merge-queue-check: + name: Master Merge Queue Suite + # Always run this so that pull_request triggers are marked as success. + if: always() + runs-on: ubuntu-20.04 + needs: + - changes + - test-cli + - test-misc + - test-environment + - check-msrv + - cross-linux + - unit-mac + - unit-windows + - install-sh + env: + FAILED: ${{ contains(needs.*.result, 'failure') }} + steps: + - name: exit + run: | + echo "failed=${{ env.FAILED }}" + if [[ "$FAILED" == "true" ]] ; then + exit 1 + else + exit 0 + fi diff --git a/.github/workflows/misc.yml.upstream b/.github/workflows/misc.yml.upstream new file mode 100644 index 0000000000000..4af7ab44a3bef --- /dev/null +++ b/.github/workflows/misc.yml.upstream @@ -0,0 +1,62 @@ +name: Miscellaneous - Linux + +on: + workflow_call: + +jobs: + test-misc: + runs-on: [linux, ubuntu-20.04-4core] + env: + CARGO_INCREMENTAL: 0 + steps: + - name: (PR comment) Get PR branch + if: ${{ github.event_name == 'issue_comment' }} + uses: xt0rted/pull-request-comment-branch@v2 + id: comment-branch + + - name: (PR comment) Set latest commit status as pending + if: ${{ github.event_name == 'issue_comment' }} + uses: myrotvorets/set-commit-status-action@v1.1.7 + with: + sha: ${{ steps.comment-branch.outputs.head_sha }} + token: ${{ secrets.GITHUB_TOKEN }} + context: Miscellaneous - Linux + status: pending + + - name: (PR comment) Checkout PR branch + if: ${{ github.event_name == 'issue_comment' }} + uses: actions/checkout@v3 + with: + ref: ${{ steps.comment-branch.outputs.head_ref }} + + - name: Checkout branch + if: ${{ github.event_name != 'issue_comment' }} + uses: actions/checkout@v3 + + - uses: actions/cache@v3 + name: Cache Cargo registry + index + with: + path: | + ~/.cargo/bin/ + ~/.cargo/registry/index/ + ~/.cargo/registry/cache/ + ~/.cargo/git/db/ + key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} + restore-keys: | + ${{ runner.os }}-cargo- + + - run: sudo -E bash scripts/environment/bootstrap-ubuntu-20.04.sh + - run: bash scripts/environment/prepare.sh + - run: echo "::add-matcher::.github/matchers/rust.json" + - run: make test-behavior + - run: make check-examples + - run: make test-docs + + - name: (PR comment) Set latest commit status as ${{ job.status }} + uses: myrotvorets/set-commit-status-action@v1.1.7 + if: always() && github.event_name == 'issue_comment' + with: + sha: ${{ steps.comment-branch.outputs.head_sha }} + token: ${{ secrets.GITHUB_TOKEN }} + context: Miscellaneous - Linux + status: ${{ job.status }} diff --git a/.github/workflows/msrv.yml.upstream b/.github/workflows/msrv.yml.upstream new file mode 100644 index 0000000000000..9a82d1ddecbd0 --- /dev/null +++ b/.github/workflows/msrv.yml.upstream @@ -0,0 +1,21 @@ +name: Check minimum supported Rust version + +on: + workflow_call: + +env: + RUST_BACKTRACE: full + CI: true + PROFILE: debug + # observing issues fetching boringssl via HTTPS in the OSX build, seeing if this helps + # can be removed when we switch back to the upstream openssl-sys crate + CARGO_NET_GIT_FETCH_WITH_CLI: true + +jobs: + check-msrv: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - run: sudo -E bash scripts/environment/bootstrap-ubuntu-20.04.sh + - run: cargo install cargo-msrv --version 0.15.1 + - run: cargo msrv verify diff --git a/.github/workflows/publish.yml.upstream b/.github/workflows/publish.yml.upstream index cec3d34d2fc1f..8265f10e6f5c0 100644 --- a/.github/workflows/publish.yml.upstream +++ b/.github/workflows/publish.yml.upstream @@ -209,7 +209,7 @@ jobs: build-x86_64-apple-darwin-packages: name: Build Vector for x86_64-apple-darwin (.tar.gz) - runs-on: macos-11 + runs-on: macos-latest-xl needs: generate-publish-metadata env: VECTOR_VERSION: ${{ needs.generate-publish-metadata.outputs.vector_version }} @@ -292,11 +292,11 @@ jobs: strategy: matrix: container: - - ubuntu:14.04 - ubuntu:16.04 - ubuntu:18.04 - ubuntu:20.04 - ubuntu:22.04 + - ubuntu:22.10 - ubuntu:23.04 - debian:10 - debian:11 @@ -436,12 +436,12 @@ jobs: username: ${{ secrets.CI_DOCKER_USERNAME }} password: ${{ secrets.CI_DOCKER_PASSWORD }} - name: Set up QEMU - uses: docker/setup-qemu-action@v2.1.0 + uses: docker/setup-qemu-action@v2.2.0 with: platforms: all - name: Set up Docker Buildx id: buildx - uses: docker/setup-buildx-action@v2.5.0 + uses: docker/setup-buildx-action@v2.8.0 with: version: latest install: true @@ -554,7 +554,7 @@ jobs: publish-github: name: Publish to GitHub # We only publish to GitHub for versioned releases, not nightlies. - if: ${{ inputs.channel == 'release' }} + if: inputs.channel == 'release' runs-on: ubuntu-20.04 needs: - generate-publish-metadata @@ -624,7 +624,7 @@ jobs: publish-homebrew: name: Publish to Homebrew # We only publish to Homebrew for versioned releases, not nightlies. - if: ${{ inputs.channel == 'release' }} + if: inputs.channel == 'release' runs-on: ubuntu-20.04 needs: - generate-publish-metadata @@ -644,7 +644,7 @@ jobs: publish-cloudsmith: name: Publish to Cloudsmith # We only publish to CloudSmith for versioned releases, not nightlies. - if: ${{ inputs.channel == 'release' }} + if: inputs.channel == 'release' runs-on: ubuntu-20.04 needs: - generate-publish-metadata @@ -754,32 +754,3 @@ jobs: release: "any-version" republish: "true" file: "target/artifacts/vector-${{ env.VECTOR_VERSION }}-1.armv7.rpm" - - publish-failure: - name: Send Publish Failure Notification - if: ${{ inputs.channel != 'custom' }} && failure() - runs-on: ubuntu-20.04 - needs: - - generate-publish-metadata - - build-x86_64-unknown-linux-gnu-packages - - build-x86_64-unknown-linux-musl-packages - - build-aarch64-unknown-linux-musl-packages - - build-aarch64-unknown-linux-gnu-packages - - build-x86_64-apple-darwin-packages - - build-x86_64-pc-windows-msvc-packages - - build-armv7-unknown-linux-gnueabihf-packages - - build-armv7-unknown-linux-musleabihf-packages - - deb-verify - - rpm-verify - - macos-verify - - publish-docker - - publish-s3 - - publish-cloudsmith - - publish-github - steps: - - name: Send notification to Discord - env: - DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK }} - uses: Ilshidur/action-discord@0.3.2 - with: - args: "${{ inputs.channel }} failed: " diff --git a/.github/workflows/regression.yml.upstream b/.github/workflows/regression.yml.upstream index ec11abd37f65c..4d770d496486f 100644 --- a/.github/workflows/regression.yml.upstream +++ b/.github/workflows/regression.yml.upstream @@ -1,93 +1,286 @@ -# Regression Detection +# Regression Detection Suite +# +# This workflow runs under the following conditions: +# - in the merge queue if any source files were modified, added or deleted. +# - on demand by a PR comment matching either of: +# - '/ci-run-regression' +# - '/ci-run-all' +# (the comment issuer must be a member of the Vector GH team) # # This workflow runs our regression detection experiments, which are relative -# evaluations of the base SHA for the PR to whatever SHA was just pushed into -# the project (unless that SHA happens to be master branch HEAD). The goal is to -# give quick-ish feedback on all-up Vector for a variety of configs as to -# whether throughput performance has gone down, gotten more variable in the +# evaluations of the base SHA and head SHA, whose determination depends on how +# the workflow is invoked. +# +# The goal is to give quick-ish feedback on all-up Vector for a variety of configs +# as to whether throughput performance has gone down, gotten more variable in the # pushed SHA. # # Regression detection is always done relative to the pushed SHA, meaning any # changes you introduce to the experiment will be picked up both for the base -# SHA variant and your current SHA. Tags are SHA-SHA. The first SHA is the one -# that triggered this workflow, the second is the one of the Vector being -# tested. For comparison images the two SHAs are identical. +# SHA variant and your current SHA. +# +# Docker image tags are SHA-SHA. The first SHA is the one that triggered this +# workflow, the second is the one of the Vector being tested. +# For comparison images the two SHAs are identical. -name: Regression Detector +name: Regression Detection Suite on: - pull_request: - paths-ignore: - - "docs/**" - - "rfcs/**" - - "website/**" merge_group: types: [checks_requested] + workflow_call: + # Don't want to run this on each PR commit, but because GH doesn't allow specifying different required checks + # for pull request and merge queue, we need to "run" it in pull request, but in the jobs we will just auto pass. + pull_request: + +concurrency: + group: ${{ github.workflow }}-${{ github.event.merge_group.head_sha || github.sha }} + cancel-in-progress: true jobs: - cancel-previous: - runs-on: ubuntu-22.04 - timeout-minutes: 3 + + # Only run this workflow if files changed in areas that could possibly introduce a regression + should-run: + runs-on: ubuntu-latest + if: github.event_name != 'pull_request' + outputs: + source_changed: ${{ steps.filter.outputs.SOURCE_CHANGED }} + comment_valid: ${{ steps.comment.outputs.isTeamMember }} steps: - - uses: styfle/cancel-workflow-action@0.11.0 - with: - access_token: ${{ secrets.GITHUB_TOKEN }} - all_but_latest: true # can cancel workflows scheduled later + - uses: actions/checkout@v3 + + - name: Collect file changes + id: changes + if: github.event_name == 'merge_group' + uses: dorny/paths-filter@v2 + with: + base: ${{ github.event.merge_group.base_ref }} + ref: ${{ github.event.merge_group.head_ref }} + list-files: shell + filters: | + all_changed: + - added|deleted|modified: "**" + ignore: + - "./.github/**" + - "./.gitignore" + - "distribution/**" + - "rust-doc/**" + - "docs/**" + - "rfcs/**" + - "testing/**" + - "tilt/**" + - "website/**" + - "*.md" + - "Tiltfile" + - "netlify.toml" + - "NOTICE" + - "LICENSE-3rdparty.csv" + - "LICENSE" + + # This step allows us to conservatively run the tests if we added a new + # file or directory for source code, but forgot to add it to this workflow. + # Instead, we may unnecessarily run the test on new file or dir additions that + # wouldn't likely introduce regressions. + - name: Determine if should not run due to irrelevant file changes + id: filter + if: github.event_name == 'merge_group' + env: + ALL: ${{ steps.changes.outputs.all_changed_files }} + IGNORE: ${{ steps.changes.outputs.ignore_files }} + run: | + echo "ALL='${{ env.ALL }}'" + echo "IGNORE='${{ env.IGNORE }}'" + export SOURCE_CHANGED=$(comm -2 -3 <(printf "%s\n" "${{ env.ALL }}") <(printf "%s\n" "${{ env.IGNORE }}")) + echo "SOURCE_CHANGED='${SOURCE_CHANGED}'" + + if [ "${SOURCE_CHANGED}" == "" ]; then + export SOURCE_CHANGED="false" + else + export SOURCE_CHANGED="true" + fi + + echo "SOURCE_CHANGED='${SOURCE_CHANGED}'" + echo "SOURCE_CHANGED=${SOURCE_CHANGED}" >> $GITHUB_OUTPUT compute-metadata: - name: Compute metadata for regression experiments + name: Compute metadata runs-on: ubuntu-22.04 + needs: should-run + if: github.event_name != 'merge_group' || needs.should-run.outputs.source_changed == 'true' outputs: - pr-number: ${{ steps.pr-metadata.outputs.PR_NUMBER }} + pr-number: ${{ steps.pr-metadata-merge-queue.outputs.PR_NUMBER || steps.pr-metadata-comment.outputs.PR_NUMBER }} + baseline-sha: ${{ steps.pr-metadata-merge-queue.outputs.BASELINE_SHA || steps.pr-metadata-comment.outputs.BASELINE_SHA }} + baseline-tag: ${{ steps.pr-metadata-merge-queue.outputs.BASELINE_TAG || steps.pr-metadata-comment.outputs.BASELINE_TAG }} + comparison-sha: ${{ steps.pr-metadata-merge-queue.outputs.COMPARISON_SHA || steps.pr-metadata-comment.outputs.COMPARISON_SHA }} + comparison-tag: ${{ steps.pr-metadata-merge-queue.outputs.COMPARISON_TAG || steps.pr-metadata-comment.outputs.COMPARISON_TAG }} + + # below are used in the experiment/analyze jobs + cpus: ${{ steps.system.outputs.CPUS }} + memory: ${{ steps.system.outputs.MEMORY }} + vector-cpus: ${{ steps.system.outputs.VECTOR_CPUS }} - comparison-sha: ${{ steps.comparison.outputs.COMPARISON }} - comparison-tag: ${{ steps.comparison.outputs.COMPARISON_TAG }} - baseline-sha: ${{ steps.baseline.outputs.BASELINE }} - baseline-tag: ${{ steps.baseline.outputs.BASELINE_TAG }} + replicas: ${{ steps.experimental-meta.outputs.REPLICAS }} + warmup-seconds: ${{ steps.experimental-meta.outputs.WARMUP_SECONDS }} + total-samples: ${{ steps.experimental-meta.outputs.TOTAL_SAMPLES }} + p-value: ${{ steps.experimental-meta.outputs.P_VALUE }} + smp-version: ${{ steps.experimental-meta.outputs.SMP_CRATE_VERSION }} + lading-version: ${{ steps.experimental-meta.outputs.LADING_VERSION }} steps: - uses: actions/checkout@v3 with: - ref: ${{ github.base_ref }} - path: baseline-vector + fetch-depth: 1000 - - name: Setup PR metadata - id: pr-metadata + # If triggered by issue comment, the event payload doesn't directly contain the head and base sha from the PR. + # But, we can retrieve this info from some commands. + - name: Get PR metadata (issue_comment) + id: pr-metadata-comment + if: github.event_name == 'issue_comment' + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | - echo "PR_NUMBER=${{ github.event.number }}" >> $GITHUB_OUTPUT + export PR_NUMBER=${{ github.event.issue.number }} + echo "PR_NUMBER=${PR_NUMBER}" >> $GITHUB_OUTPUT - - name: Setup baseline variables - id: baseline - run: | - pushd baseline-vector - export BASELINE_SHA=$(git rev-parse HEAD) - popd + gh pr checkout ${{ github.event.issue.number }} + + export BASELINE_SHA=$(git merge-base master HEAD) + echo "BASELINE_SHA=${BASELINE_SHA}" >> $GITHUB_OUTPUT + + export COMPARISON_SHA=$(git rev-parse HEAD) + echo "COMPARISON_SHA=${COMPARISON_SHA}" >> $GITHUB_OUTPUT + + export BASELINE_TAG="${PR_NUMBER}-${COMPARISON_SHA}-${BASELINE_SHA}" + echo "BASELINE_TAG=${BASELINE_TAG}" >> $GITHUB_OUTPUT + + export COMPARISON_TAG="${PR_NUMBER}-${COMPARISON_SHA}-${COMPARISON_SHA}" + echo "COMPARISON_TAG=${COMPARISON_TAG}" >> $GITHUB_OUTPUT + + echo "pr number is: ${PR_NUMBER}" - export BASELINE_TAG="${{ github.event.pull_request.head.sha }}-${BASELINE_SHA}" echo "baseline sha is: ${BASELINE_SHA}" echo "baseline tag is: ${BASELINE_TAG}" - echo "BASELINE=${BASELINE_SHA}" >> $GITHUB_OUTPUT - echo "BASELINE_TAG=${BASELINE_TAG}" >> $GITHUB_OUTPUT + echo "comparison sha is: ${COMPARISON_SHA}" + echo "comparison tag is: ${COMPARISON_TAG}" + + if [ "${BASELINE_SHA}" = "" ] ; then + echo "BASELINE_SHA not found, exiting." + exit 1 + fi + + if [ "${COMPARISON_SHA}" = "" ] ; then + echo "COMPARISON_SHA not found, exiting." + exit 1 + fi + + if [ "${PR_NUMBER}" = "" ] ; then + echo "PR_NUMBER not found, exiting." + exit 1 + fi - - name: Setup comparison variables - id: comparison + # If triggered by merge queue, the PR number is not available in the payload. While we restrict the number of PRs in the + # queue to 1, we can get the PR number by parsing the merge queue temp branch's ref. + - name: Get PR metadata (merge queue) + id: pr-metadata-merge-queue + if: github.event_name != 'issue_comment' run: | - export COMPARISON_SHA=${{ github.event.pull_request.head.sha }} - export COMPARISON_TAG="${{ github.event.pull_request.head.sha }}-${{ github.event.pull_request.head.sha }}" + export PR_NUMBER=$(echo "${{ github.ref }}" | sed -n 's|^refs/heads/gh-readonly-queue/master/pr-\([0-9]*\)-.*$|\1|p') + echo "PR_NUMBER=${PR_NUMBER}" >> $GITHUB_OUTPUT + + export BASELINE_SHA=${{ github.event.merge_group.base_sha }} + echo "BASELINE_SHA=${BASELINE_SHA}" >> $GITHUB_OUTPUT + + export COMPARISON_SHA=${{ github.event.merge_group.head_sha }} + echo "COMPARISON_SHA=${COMPARISON_SHA}" >> $GITHUB_OUTPUT + + export BASELINE_TAG="${PR_NUMBER}-${COMPARISON_SHA}-${BASELINE_SHA}" + echo "BASELINE_TAG=${BASELINE_TAG}" >> $GITHUB_OUTPUT + + export COMPARISON_TAG="${PR_NUMBER}-${COMPARISON_SHA}-${COMPARISON_SHA}" + echo "COMPARISON_TAG=${COMPARISON_TAG}" >> $GITHUB_OUTPUT + + echo "pr number is: ${PR_NUMBER}" + + echo "baseline sha is: ${BASELINE_SHA}" + echo "baseline tag is: ${BASELINE_TAG}" echo "comparison sha is: ${COMPARISON_SHA}" echo "comparison tag is: ${COMPARISON_TAG}" - echo "COMPARISON=${COMPARISON_SHA}" >> $GITHUB_OUTPUT - echo "COMPARISON_TAG=${COMPARISON_TAG}" >> $GITHUB_OUTPUT + if [ "${BASELINE_SHA}" = "" ] ; then + echo "BASELINE_SHA not found, exiting." + exit 1 + fi + + if [ "${COMPARISON_SHA}" = "" ] ; then + echo "COMPARISON_SHA not found, exiting." + exit 1 + fi + + if [ "${PR_NUMBER}" = "" ] ; then + echo "PR_NUMBER not found, exiting." + exit 1 + fi + + - name: Setup experimental metadata + id: experimental-meta + run: | + export WARMUP_SECONDS="45" + export REPLICAS="10" + export TOTAL_SAMPLES="600" + export P_VALUE="0.1" + export SMP_CRATE_VERSION="0.7.3" + export LADING_VERSION="0.12.0" + + echo "warmup seconds: ${WARMUP_SECONDS}" + echo "replicas: ${REPLICAS}" + echo "total samples: ${TOTAL_SAMPLES}" + echo "regression p-value: ${P_VALUE}" + echo "smp crate version: ${SMP_CRATE_VERSION}" + echo "lading version: ${LADING_VERSION}" + + echo "WARMUP_SECONDS=${WARMUP_SECONDS}" >> $GITHUB_OUTPUT + echo "REPLICAS=${REPLICAS}" >> $GITHUB_OUTPUT + echo "TOTAL_SAMPLES=${TOTAL_SAMPLES}" >> $GITHUB_OUTPUT + echo "P_VALUE=${P_VALUE}" >> $GITHUB_OUTPUT + echo "SMP_CRATE_VERSION=${SMP_CRATE_VERSION}" >> $GITHUB_OUTPUT + echo "LADING_VERSION=${LADING_VERSION}" >> $GITHUB_OUTPUT + - name: Setup system details + id: system + run: | + export CPUS="7" + export MEMORY="30g" + export VECTOR_CPUS="4" + + echo "cpus total: ${CPUS}" + echo "memory total: ${MEMORY}" + echo "vector cpus: ${VECTOR_CPUS}" + + echo "CPUS=${CPUS}" >> $GITHUB_OUTPUT + echo "MEMORY=${MEMORY}" >> $GITHUB_OUTPUT + echo "VECTOR_CPUS=${VECTOR_CPUS}" >> $GITHUB_OUTPUT + + - name: (PR comment) Get PR branch + if: ${{ github.event_name == 'issue_comment' }} + uses: xt0rted/pull-request-comment-branch@v2 + id: comment-branch + + - name: (PR comment) Set latest commit status as pending + if: ${{ github.event_name == 'issue_comment' }} + uses: myrotvorets/set-commit-status-action@v1.1.7 + with: + sha: ${{ steps.pr-metadata-comment.outputs.COMPARISON_SHA }} + token: ${{ secrets.GITHUB_TOKEN }} + context: Regression Detection Suite + status: pending ## ## BUILD ## build-baseline: name: Build baseline Vector container - runs-on: [linux, ubuntu-20.04-8core] + runs-on: [linux, ubuntu-20.04-4core] needs: - compute-metadata steps: @@ -102,10 +295,10 @@ jobs: - name: Set up Docker Buildx id: buildx - uses: docker/setup-buildx-action@v2.5.0 + uses: docker/setup-buildx-action@v2.8.0 - name: Build 'vector' target image - uses: docker/build-push-action@v4.0.0 + uses: docker/build-push-action@v4.1.1 with: context: baseline-vector/ cache-from: type=gha @@ -114,7 +307,7 @@ jobs: builder: ${{ steps.buildx.outputs.name }} outputs: type=docker,dest=${{ runner.temp }}/baseline-image.tar tags: | - vector:${{ needs.compute-metadata.outputs.pr-number }}-${{ needs.compute-metadata.outputs.baseline-tag }} + vector:${{ needs.compute-metadata.outputs.baseline-tag }} - name: Upload image as artifact uses: actions/upload-artifact@v3 @@ -124,7 +317,7 @@ jobs: build-comparison: name: Build comparison Vector container - runs-on: [linux, soak-builder] + runs-on: [linux, ubuntu-20.04-4core] needs: - compute-metadata steps: @@ -139,10 +332,10 @@ jobs: - name: Set up Docker Buildx id: buildx - uses: docker/setup-buildx-action@v2.5.0 + uses: docker/setup-buildx-action@v2.8.0 - name: Build 'vector' target image - uses: docker/build-push-action@v4.0.0 + uses: docker/build-push-action@v4.1.1 with: context: comparison-vector/ cache-from: type=gha @@ -151,7 +344,7 @@ jobs: builder: ${{ steps.buildx.outputs.name }} outputs: type=docker,dest=${{ runner.temp }}/comparison-image.tar tags: | - vector:${{ needs.compute-metadata.outputs.pr-number }}-${{ needs.compute-metadata.outputs.comparison-tag }} + vector:${{ needs.compute-metadata.outputs.comparison-tag }} - name: Upload image as artifact uses: actions/upload-artifact@v3 @@ -159,25 +352,472 @@ jobs: name: comparison-image path: "${{ runner.temp }}/comparison-image.tar" - transmit-metadata: - name: Transmit metadata to trusted workflow + confirm-valid-credentials: + name: Confirm AWS credentials are minimally valid + runs-on: ubuntu-22.04 + needs: + - compute-metadata + steps: + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v2.2.0 + with: + aws-access-key-id: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_SECRET_ACCESS_KEY }} + aws-region: us-west-2 + + - name: Download SMP binary + run: | + aws s3 cp s3://smp-cli-releases/v${{ needs.compute-metadata.outputs.smp-version }}/x86_64-unknown-linux-gnu/smp ${{ runner.temp }}/bin/smp + + ## + ## SUBMIT + ## + + upload-baseline-image-to-ecr: + name: Upload baseline images to ECR + runs-on: ubuntu-22.04 + needs: + - compute-metadata + - confirm-valid-credentials + - build-baseline + steps: + - name: 'Download baseline image' + uses: actions/download-artifact@v3 + with: + name: baseline-image + + - name: Load baseline image + run: | + docker load --input baseline-image.tar + + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v2.2.0 + with: + aws-access-key-id: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_SECRET_ACCESS_KEY }} + aws-region: us-west-2 + + - name: Login to Amazon ECR + id: login-ecr + uses: aws-actions/amazon-ecr-login@v1 + + - name: Docker Login to ECR + uses: docker/login-action@v2 + with: + registry: ${{ steps.login-ecr.outputs.registry }} + + - name: Tag & push baseline image + run: | + docker tag vector:${{ needs.compute-metadata.outputs.baseline-tag }} ${{ steps.login-ecr.outputs.registry }}/${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }}-vector:${{ needs.compute-metadata.outputs.baseline-tag }} + docker push ${{ steps.login-ecr.outputs.registry }}/${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }}-vector:${{ needs.compute-metadata.outputs.baseline-tag }} + + upload-comparison-image-to-ecr: + name: Upload comparison images to ECR + runs-on: ubuntu-22.04 + needs: + - compute-metadata + - confirm-valid-credentials + - build-comparison + steps: + - name: 'Download comparison image' + uses: actions/download-artifact@v3 + with: + name: comparison-image + + - name: Load comparison image + run: | + docker load --input comparison-image.tar + + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v2.2.0 + with: + aws-access-key-id: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_SECRET_ACCESS_KEY }} + aws-region: us-west-2 + + - name: Login to Amazon ECR + id: login-ecr + uses: aws-actions/amazon-ecr-login@v1 + + - name: Docker Login to ECR + uses: docker/login-action@v2 + with: + registry: ${{ steps.login-ecr.outputs.registry }} + + - name: Tag & push comparison image + run: | + docker tag vector:${{ needs.compute-metadata.outputs.comparison-tag }} ${{ steps.login-ecr.outputs.registry }}/${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }}-vector:${{ needs.compute-metadata.outputs.comparison-tag }} + docker push ${{ steps.login-ecr.outputs.registry }}/${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }}-vector:${{ needs.compute-metadata.outputs.comparison-tag }} + + submit-job: + name: Submit regression job runs-on: ubuntu-22.04 needs: - compute-metadata + - upload-baseline-image-to-ecr + - upload-comparison-image-to-ecr steps: - - name: Write out metadata - run: | - echo "COMPARISON_TAG=${{ needs.compute-metadata.outputs.pr-number }}-${{ needs.compute-metadata.outputs.comparison-tag }}" > ${{ runner.temp }}/meta - echo "COMPARISON_SHA=${{ needs.compute-metadata.outputs.comparison-sha }}" >> ${{ runner.temp }}/meta - echo "BASELINE_TAG=${{ needs.compute-metadata.outputs.pr-number }}-${{ needs.compute-metadata.outputs.baseline-tag }}" >> ${{ runner.temp }}/meta - echo "BASELINE_SHA=${{ needs.compute-metadata.outputs.baseline-sha }}" >> ${{ runner.temp }}/meta - echo "CHECKOUT_SHA=${{ github.sha }}" >> ${{ runner.temp }}/meta - echo "HEAD_SHA=${{ github.event.pull_request.head.sha }}" >> ${{ runner.temp }}/meta - echo "BASE_SHA=${{ github.event.pull_request.base.sha }}" >> ${{ runner.temp }}/meta - echo "GITHUB_EVENT_NUMBER=${{ github.event.number }}" >> ${{ runner.temp }}/meta - - - name: Upload metadata + - name: Check status, in-progress + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + gh api \ + --method POST \ + -H "Accept: application/vnd.github+json" \ + /repos/${{ github.repository }}/statuses/${{ needs.compute-metadata.outputs.comparison-sha }} \ + -f state='pending' \ + -f description='Experiments submitted to the Regression Detection cluster.' \ + -f context='Regression Detection Suite / submission' \ + -f target_url=${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + + - uses: actions/checkout@v3 + with: + ref: ${{ needs.compute-metadata.outputs.comparison-sha }} + + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v2.2.0 + with: + aws-access-key-id: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_SECRET_ACCESS_KEY }} + aws-region: us-west-2 + + - name: Login to Amazon ECR + id: login-ecr + uses: aws-actions/amazon-ecr-login@v1 + + - name: Download SMP binary + run: | + aws s3 cp s3://smp-cli-releases/v${{ needs.compute-metadata.outputs.smp-version }}/x86_64-unknown-linux-gnu/smp ${{ runner.temp }}/bin/smp + + - name: Submit job + env: + RUST_LOG: info + run: | + chmod +x ${{ runner.temp }}/bin/smp + + ${{ runner.temp }}/bin/smp --team-id ${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }} job submit \ + --lading-version ${{ needs.compute-metadata.outputs.lading-version }} \ + --total-samples ${{ needs.compute-metadata.outputs.total-samples }} \ + --warmup-seconds ${{ needs.compute-metadata.outputs.warmup-seconds }} \ + --replicas ${{ needs.compute-metadata.outputs.replicas }} \ + --baseline-image ${{ steps.login-ecr.outputs.registry }}/${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }}-vector:${{ needs.compute-metadata.outputs.baseline-tag }} \ + --comparison-image ${{ steps.login-ecr.outputs.registry }}/${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }}-vector:${{ needs.compute-metadata.outputs.comparison-tag }} \ + --baseline-sha ${{ needs.compute-metadata.outputs.baseline-sha }} \ + --comparison-sha ${{ needs.compute-metadata.outputs.comparison-sha }} \ + --target-command "/usr/local/bin/vector" \ + --target-config-dir ${{ github.workspace }}/regression/ \ + --target-cpu-allotment "${{ needs.compute-metadata.outputs.cpus }}" \ + --target-memory-allotment "${{ needs.compute-metadata.outputs.memory }}" \ + --target-environment-variables "VECTOR_THREADS=${{ needs.compute-metadata.outputs.vector-cpus }},VECTOR_REQUIRE_HEALTHY=true" \ + --target-name vector \ + --submission-metadata ${{ runner.temp }}/submission-metadata + + - uses: actions/upload-artifact@v3 + with: + name: vector-submission-metadata + path: ${{ runner.temp }}/submission-metadata + + - name: Await job + timeout-minutes: 120 + env: + RUST_LOG: info + run: | + chmod +x ${{ runner.temp }}/bin/smp + + ${{ runner.temp }}/bin/smp --team-id ${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }} job status \ + --wait \ + --wait-delay-seconds 60 \ + --wait-timeout-minutes 90 \ + --submission-metadata ${{ runner.temp }}/submission-metadata + + - name: Handle cancellation if necessary + if: ${{ cancelled() }} + env: + RUST_LOG: info + run: | + chmod +x ${{ runner.temp }}/bin/smp + ${{ runner.temp }}/bin/smp --team-id ${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }} job cancel \ + --submission-metadata ${{ runner.temp }}/submission-metadata + + - name: Check status, cancelled + if: ${{ cancelled() }} + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + gh api \ + --method POST \ + -H "Accept: application/vnd.github+json" \ + /repos/${{ github.repository }}/statuses/${{ needs.compute-metadata.outputs.comparison-sha }} \ + -f state='failure' \ + -f description='Experiments submitted to the Regression Detection cluster cancelled.' \ + -f context='Regression Detection Suite / submission' \ + -f target_url=${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + + - name: Check status, success + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + gh api \ + --method POST \ + -H "Accept: application/vnd.github+json" \ + /repos/${{ github.repository }}/statuses/${{ needs.compute-metadata.outputs.comparison-sha }} \ + -f state='success' \ + -f description='Experiments submitted to the Regression Detection cluster successfully.' \ + -f context='Regression Detection Suite / submission' \ + -f target_url=${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + + - name: Check status, failure + if: ${{ failure() }} + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + gh api \ + --method POST \ + -H "Accept: application/vnd.github+json" \ + /repos/${{ github.repository }}/statuses/${{ needs.compute-metadata.outputs.comparison-sha }} \ + -f state='success' \ + -f description='Experiments submitted to the Regression Detection Suite failed.' \ + -f context='Regression Detection Suite / submission' \ + -f target_url=${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + + ## + ## ANALYZE + ## + + detect-regression: + name: Determine regression status + runs-on: ubuntu-22.04 + needs: + - submit-job + - compute-metadata + steps: + - uses: actions/checkout@v3 + + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v2.2.0 + with: + aws-access-key-id: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_SECRET_ACCESS_KEY }} + aws-region: us-west-2 + + - name: Download SMP binary + run: | + aws s3 cp s3://smp-cli-releases/v${{ needs.compute-metadata.outputs.smp-version }}/x86_64-unknown-linux-gnu/smp ${{ runner.temp }}/bin/smp + + - name: Download submission metadata + uses: actions/download-artifact@v3 + with: + name: vector-submission-metadata + path: ${{ runner.temp }}/ + + - name: Determine if PR introduced a regression + env: + RUST_LOG: info + run: | + chmod +x ${{ runner.temp }}/bin/smp + + ${{ runner.temp }}/bin/smp --team-id ${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }} job result \ + --submission-metadata ${{ runner.temp }}/submission-metadata + + - name: Check status, cancelled + if: ${{ cancelled() }} + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + gh api \ + --method POST \ + -H "Accept: application/vnd.github+json" \ + /repos/${{ github.repository }}/statuses/${{ needs.compute-metadata.outputs.comparison-sha }} \ + -f state='failure' \ + -f description='Analyze experimental results from Regression Detection Suite cancelled.' \ + -f context='Regression Detection Suite / detect-regression' \ + -f target_url=${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + + - name: Check status, success + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + gh api \ + --method POST \ + -H "Accept: application/vnd.github+json" \ + /repos/${{ github.repository }}/statuses/${{ needs.compute-metadata.outputs.comparison-sha }} \ + -f state='success' \ + -f description='Analyze experimental results from Regression Detection Suite succeeded.' \ + -f context='Regression Detection Suite / detect-regression' \ + -f target_url=${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + + - name: Check status, failure + if: ${{ failure() }} + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + gh api \ + --method POST \ + -H "Accept: application/vnd.github+json" \ + /repos/${{ github.repository }}/statuses/${{ needs.compute-metadata.outputs.comparison-sha }} \ + -f state='failure' \ + -f description='Analyze experimental results from Regression Detection Suite failed.' \ + -f context='Regression Detection Suite / detect-regression' \ + -f target_url=${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + + analyze-experiment: + name: Download regression analysis & upload report + runs-on: ubuntu-22.04 + needs: + - submit-job + - compute-metadata + steps: + - name: Check status, in-progress + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + gh api \ + --method POST \ + -H "Accept: application/vnd.github+json" \ + /repos/${{ github.repository }}/statuses/${{ needs.compute-metadata.outputs.comparison-sha }} \ + -f state='pending' \ + -f description='Analyze experimental results from Regression Detection Suite.' \ + -f context='Regression Detection Suite / analyze-experiment' \ + -f target_url=${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + + - uses: actions/checkout@v3 + with: + ref: ${{ needs.compute-metadata.outputs.comparison-sha }} + + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v2.2.0 + with: + aws-access-key-id: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_SECRET_ACCESS_KEY }} + aws-region: us-west-2 + + - name: Download SMP binary + run: | + aws s3 cp s3://smp-cli-releases/v${{ needs.compute-metadata.outputs.smp-version }}/x86_64-unknown-linux-gnu/smp ${{ runner.temp }}/bin/smp + + - name: Download submission metadata + uses: actions/download-artifact@v3 + with: + name: vector-submission-metadata + path: ${{ runner.temp }}/ + + - name: Sync regression report to local system + env: + RUST_LOG: info + run: | + chmod +x ${{ runner.temp }}/bin/smp + + ${{ runner.temp }}/bin/smp --team-id ${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }} job sync \ + --submission-metadata ${{ runner.temp }}/submission-metadata \ + --output-path "${{ runner.temp }}/outputs" + + - name: Read regression report + id: read-analysis + uses: juliangruber/read-file-action@v1 + with: + path: ${{ runner.temp }}/outputs/report.html + + - name: Post report to PR + uses: peter-evans/create-or-update-comment@v3 + with: + issue-number: ${{ needs.compute-metadata.outputs.pr-number }} + edit-mode: append + body: ${{ steps.read-analysis.outputs.content }} + + - name: Upload regression report to artifacts uses: actions/upload-artifact@v3 with: - name: meta - path: "${{ runner.temp }}/meta" + name: capture-artifacts + path: ${{ runner.temp }}/outputs/* + + - name: Check status, cancelled + if: ${{ cancelled() }} + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + gh api \ + --method POST \ + -H "Accept: application/vnd.github+json" \ + /repos/${{ github.repository }}/statuses/${{ needs.compute-metadata.outputs.comparison-sha }} \ + -f state='failure' \ + -f description='Analyze experimental results from Regression Detection Suite cancelled.' \ + -f context='Regression Detection Suite / analyze-experiment' \ + -f target_url=${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + + - name: Check status, success + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + gh api \ + --method POST \ + -H "Accept: application/vnd.github+json" \ + /repos/${{ github.repository }}/statuses/${{ needs.compute-metadata.outputs.comparison-sha }} \ + -f state='success' \ + -f description='Analyze experimental results from Regression Detection Suite succeeded.' \ + -f context='Regression Detection Suite / analyze-experiment' \ + -f target_url=${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + + - name: Check status, failure + if: ${{ failure() }} + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + gh api \ + --method POST \ + -H "Accept: application/vnd.github+json" \ + /repos/${{ github.repository }}/statuses/${{ needs.compute-metadata.outputs.comparison-sha }} \ + -f state='failure' \ + -f description='Analyze experimental results from Regression Detection Suite failed.' \ + -f context='Regression Detection Suite / analyze-experiment' \ + -f target_url=${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + + # This job always runs- if an issue_comment triggered it, we need to update the check status in the PR, + # and if a pull_request triggered it, we need to flag the check status as a success. + regression-detection-suite: + name: Regression Detection Suite + runs-on: ubuntu-latest + if: always() + needs: + - compute-metadata + - build-baseline + - build-comparison + - confirm-valid-credentials + - upload-baseline-image-to-ecr + - upload-comparison-image-to-ecr + - submit-job + - detect-regression + - analyze-experiment + env: + FAILED: ${{ contains(needs.*.result, 'failure') }} + steps: + - name: (PR comment) Get PR branch + if: github.event_name == 'issue_comment' + uses: xt0rted/pull-request-comment-branch@v2 + id: comment-branch + + - name: (PR comment) Submit PR result as failed + if: github.event_name == 'issue_comment' && env.FAILED == 'true' + uses: myrotvorets/set-commit-status-action@v1.1.7 + with: + sha: ${{ steps.comment-branch.outputs.head_sha }} + token: ${{ secrets.GITHUB_TOKEN }} + context: Regression Detection Suite + status: 'failure' + + - name: (PR comment) Submit PR result as success + if: github.event_name == 'issue_comment' && env.FAILED != 'true' + uses: myrotvorets/set-commit-status-action@v1.1.7 + with: + sha: ${{ steps.comment-branch.outputs.head_sha }} + token: ${{ secrets.GITHUB_TOKEN }} + context: Regression Detection Suite + status: 'success' + + - name: exit + run: | + echo "failed=${{ env.FAILED }}" + if [[ "$FAILED" == "true" ]] ; then + exit 1 + else + exit 0 + fi diff --git a/.github/workflows/regression_trusted.yml.upstream b/.github/workflows/regression_trusted.yml.upstream deleted file mode 100644 index 52de4d14d3236..0000000000000 --- a/.github/workflows/regression_trusted.yml.upstream +++ /dev/null @@ -1,595 +0,0 @@ -name: Regression Detector (trusted) - -on: - workflow_run: - workflows: ["Regression Detector"] - types: - - completed - -jobs: - compute-metadata: - name: Compute metadata for regression experiments - runs-on: ubuntu-22.04 - if: > - github.event.workflow_run.event == 'pull_request' && - github.event.workflow_run.conclusion == 'success' - outputs: - cpus: ${{ steps.system.outputs.CPUS }} - memory: ${{ steps.system.outputs.MEMORY }} - vector-cpus: ${{ steps.system.outputs.VECTOR_CPUS }} - - comparison-sha: ${{ steps.metadata.outputs.COMPARISON_SHA }} - comparison-tag: ${{ steps.metadata.outputs.COMPARISON_TAG }} - baseline-sha: ${{ steps.metadata.outputs.BASELINE_SHA }} - baseline-tag: ${{ steps.metadata.outputs.BASELINE_TAG }} - head-sha: ${{ steps.metadata.outputs.HEAD_SHA }} - checkout-sha: ${{ steps.metadata.outputs.CHECKOUT_SHA }} - github-event-number: ${{ steps.metadata.outputs.GITHUB_EVENT_NUMBER }} - - replicas: ${{ steps.experimental-meta.outputs.REPLICAS }} - warmup-seconds: ${{ steps.experimental-meta.outputs.WARMUP_SECONDS }} - total-samples: ${{ steps.experimental-meta.outputs.TOTAL_SAMPLES }} - p-value: ${{ steps.experimental-meta.outputs.P_VALUE }} - smp-version: ${{ steps.experimental-meta.outputs.SMP_CRATE_VERSION }} - lading-version: ${{ steps.experimental-meta.outputs.LADING_VERSION }} - - steps: - - name: Setup experimental metadata - id: experimental-meta - run: | - export WARMUP_SECONDS="45" - export REPLICAS="10" - export TOTAL_SAMPLES="600" - export P_VALUE="0.1" - export SMP_CRATE_VERSION="0.7.3" - export LADING_VERSION="0.12.0" - - echo "warmup seconds: ${WARMUP_SECONDS}" - echo "replicas: ${REPLICAS}" - echo "total samples: ${TOTAL_SAMPLES}" - echo "regression p-value: ${P_VALUE}" - echo "smp crate version: ${SMP_CRATE_VERSION}" - echo "lading version: ${LADING_VERSION}" - - echo "WARMUP_SECONDS=${WARMUP_SECONDS}" >> $GITHUB_OUTPUT - echo "REPLICAS=${REPLICAS}" >> $GITHUB_OUTPUT - echo "TOTAL_SAMPLES=${TOTAL_SAMPLES}" >> $GITHUB_OUTPUT - echo "P_VALUE=${P_VALUE}" >> $GITHUB_OUTPUT - echo "SMP_CRATE_VERSION=${SMP_CRATE_VERSION}" >> $GITHUB_OUTPUT - echo "LADING_VERSION=${LADING_VERSION}" >> $GITHUB_OUTPUT - - - name: Setup system details - id: system - run: | - export CPUS="7" - export MEMORY="30g" - export VECTOR_CPUS="4" - - echo "cpus total: ${CPUS}" - echo "memory total: ${MEMORY}" - echo "vector cpus: ${VECTOR_CPUS}" - - echo "CPUS=${CPUS}" >> $GITHUB_OUTPUT - echo "MEMORY=${MEMORY}" >> $GITHUB_OUTPUT - echo "VECTOR_CPUS=${VECTOR_CPUS}" >> $GITHUB_OUTPUT - - # github.rest.actions.listWorkflowRunArtifacts only returns first 30 - # artifacts, and returns a { data, headers, status, url } object. The - # "data" part of this object contains the artifact data we care about. - # The fields of this data object correspond to the fields in the - # "Example Response" JSON object in - # https://docs.github.com/en/rest/actions/artifacts#list-workflow-run-artifacts. - # To return more than 30 responses, use the github.paginate API in - # https://octokit.github.io/rest.js/v19#custom-requests - # `github-script` aliases `octokit` to the `github` namespace. - - name: 'Download metadata' - uses: actions/github-script@v6.4.1 - with: - script: | - var artifacts = await github.rest.actions.listWorkflowRunArtifacts({ - owner: context.repo.owner, - repo: context.repo.repo, - run_id: ${{github.event.workflow_run.id }}, - }); - - var matchArtifact = artifacts.data.artifacts.filter((artifact) => { - return artifact.name == "meta" - })[0]; - - console.log("Downloading artifact %s", matchArtifact.id); - - var download = await github.rest.actions.downloadArtifact({ - owner: context.repo.owner, - repo: context.repo.repo, - artifact_id: matchArtifact.id, - archive_format: 'zip', - }); - var fs = require('fs'); - fs.writeFileSync('${{github.workspace}}/meta.zip', Buffer.from(download.data)); - - - run: unzip meta.zip - - - name: Setup metadata - id: metadata - run: | - cat meta - cat meta >> $GITHUB_OUTPUT - - confirm-valid-credentials: - name: Confirm AWS credentials are minimally valid - runs-on: ubuntu-22.04 - needs: - - compute-metadata - steps: - - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v2.0.0 - with: - aws-access-key-id: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_SECRET_ACCESS_KEY }} - aws-region: us-west-2 - - - name: Download SMP binary - run: | - aws s3 cp s3://smp-cli-releases/v${{ needs.compute-metadata.outputs.smp-version }}/x86_64-unknown-linux-gnu/smp ${{ runner.temp }}/bin/smp - - ## - ## SUBMIT - ## - - upload-baseline-image-to-ecr: - name: Upload images to ECR - runs-on: ubuntu-22.04 - needs: - - compute-metadata - - confirm-valid-credentials - steps: - # github.rest.actions.listWorkflowRunArtifacts only returns first 30 - # artifacts, and returns a { data, headers, status, url } object. The - # "data" part of this object contains the artifact data we care about. - # The fields of this data object correspond to the fields in the - # "Example Response" JSON object in - # https://docs.github.com/en/rest/actions/artifacts#list-workflow-run-artifacts. - # To return more than 30 responses, use the github.paginate API in - # https://octokit.github.io/rest.js/v19#custom-requests - # `github-script` aliases `octokit` to the `github` namespace. - - name: 'Download baseline image' - uses: actions/github-script@v6.4.1 - with: - script: | - var artifacts = await github.rest.actions.listWorkflowRunArtifacts({ - owner: context.repo.owner, - repo: context.repo.repo, - run_id: ${{github.event.workflow_run.id }}, - }); - - var matchArtifact = artifacts.data.artifacts.filter((artifact) => { - return artifact.name == "baseline-image" - })[0]; - - console.log("Downloading artifact %s", matchArtifact.id); - - var download = await github.rest.actions.downloadArtifact({ - owner: context.repo.owner, - repo: context.repo.repo, - artifact_id: matchArtifact.id, - archive_format: 'zip', - }); - var fs = require('fs'); - fs.writeFileSync('${{github.workspace}}/baseline-image.zip', Buffer.from(download.data)); - - - run: unzip baseline-image.zip - - - name: Load baseline image - run: | - docker load --input baseline-image.tar - - - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v2.0.0 - with: - aws-access-key-id: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_SECRET_ACCESS_KEY }} - aws-region: us-west-2 - - - name: Login to Amazon ECR - id: login-ecr - uses: aws-actions/amazon-ecr-login@v1 - - - name: Docker Login to ECR - uses: docker/login-action@v2 - with: - registry: ${{ steps.login-ecr.outputs.registry }} - - - name: Tag & push baseline image - run: | - docker tag vector:${{ needs.compute-metadata.outputs.baseline-tag }} ${{ steps.login-ecr.outputs.registry }}/${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }}-vector:${{ needs.compute-metadata.outputs.baseline-tag }} - docker push ${{ steps.login-ecr.outputs.registry }}/${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }}-vector:${{ needs.compute-metadata.outputs.baseline-tag }} - - upload-comparison-image-to-ecr: - name: Upload images to ECR - runs-on: ubuntu-22.04 - needs: - - compute-metadata - - confirm-valid-credentials - steps: - # github.rest.actions.listWorkflowRunArtifacts only returns first 30 - # artifacts, and returns a { data, headers, status, url } object. The - # "data" part of this object contains the artifact data we care about. - # The fields of this data object correspond to the fields in the - # "Example Response" JSON object in - # https://docs.github.com/en/rest/actions/artifacts#list-workflow-run-artifacts. - # To return more than 30 responses, use the github.paginate API in - # https://octokit.github.io/rest.js/v19#custom-requests - # `github-script` aliases `octokit` to the `github` namespace. - - name: 'Download comparison image' - uses: actions/github-script@v6.4.1 - with: - script: | - var artifacts = await github.rest.actions.listWorkflowRunArtifacts({ - owner: context.repo.owner, - repo: context.repo.repo, - run_id: ${{github.event.workflow_run.id }}, - }); - - var matchArtifact = artifacts.data.artifacts.filter((artifact) => { - return artifact.name == "comparison-image" - })[0]; - - console.log("Downloading artifact %s", matchArtifact.id); - - var download = await github.rest.actions.downloadArtifact({ - owner: context.repo.owner, - repo: context.repo.repo, - artifact_id: matchArtifact.id, - archive_format: 'zip', - }); - var fs = require('fs'); - fs.writeFileSync('${{github.workspace}}/comparison-image.zip', Buffer.from(download.data)); - - - run: unzip comparison-image.zip - - - name: Load comparison image - run: | - docker load --input comparison-image.tar - - - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v2.0.0 - with: - aws-access-key-id: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_SECRET_ACCESS_KEY }} - aws-region: us-west-2 - - - name: Login to Amazon ECR - id: login-ecr - uses: aws-actions/amazon-ecr-login@v1 - - - name: Docker Login to ECR - uses: docker/login-action@v2 - with: - registry: ${{ steps.login-ecr.outputs.registry }} - - - name: Tag & push comparison image - run: | - docker tag vector:${{ needs.compute-metadata.outputs.comparison-tag }} ${{ steps.login-ecr.outputs.registry }}/${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }}-vector:${{ needs.compute-metadata.outputs.comparison-tag }} - docker push ${{ steps.login-ecr.outputs.registry }}/${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }}-vector:${{ needs.compute-metadata.outputs.comparison-tag }} - - submit-job: - name: Submit regression job - runs-on: ubuntu-22.04 - needs: - - compute-metadata - - upload-baseline-image-to-ecr - - upload-comparison-image-to-ecr - steps: - - name: Check status, in-progress - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - gh api \ - --method POST \ - -H "Accept: application/vnd.github+json" \ - /repos/${{ github.repository }}/statuses/${{ needs.compute-metadata.outputs.head-sha }} \ - -f state='pending' \ - -f description='Experiments submitted to the Regression Detector cluster.' \ - -f context='Regression Detector / submission' \ - -f target_url=${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} - - - uses: actions/checkout@v3 - with: - ref: ${{ needs.compute-metadata.outputs.checkout-sha }} - - - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v2.0.0 - with: - aws-access-key-id: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_SECRET_ACCESS_KEY }} - aws-region: us-west-2 - - - name: Login to Amazon ECR - id: login-ecr - uses: aws-actions/amazon-ecr-login@v1 - - - name: Download SMP binary - run: | - aws s3 cp s3://smp-cli-releases/v${{ needs.compute-metadata.outputs.smp-version }}/x86_64-unknown-linux-gnu/smp ${{ runner.temp }}/bin/smp - - - name: Submit job - env: - RUST_LOG: info - run: | - chmod +x ${{ runner.temp }}/bin/smp - - ${{ runner.temp }}/bin/smp --team-id ${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }} job submit \ - --lading-version ${{ needs.compute-metadata.outputs.lading-version }} \ - --total-samples ${{ needs.compute-metadata.outputs.total-samples }} \ - --warmup-seconds ${{ needs.compute-metadata.outputs.warmup-seconds }} \ - --replicas ${{ needs.compute-metadata.outputs.replicas }} \ - --baseline-image ${{ steps.login-ecr.outputs.registry }}/${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }}-vector:${{ needs.compute-metadata.outputs.baseline-tag }} \ - --comparison-image ${{ steps.login-ecr.outputs.registry }}/${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }}-vector:${{ needs.compute-metadata.outputs.comparison-tag }} \ - --baseline-sha ${{ needs.compute-metadata.outputs.baseline-sha }} \ - --comparison-sha ${{ needs.compute-metadata.outputs.comparison-sha }} \ - --target-command "/usr/local/bin/vector" \ - --target-config-dir ${{ github.workspace }}/regression/ \ - --target-cpu-allotment "${{ needs.compute-metadata.outputs.cpus }}" \ - --target-memory-allotment "${{ needs.compute-metadata.outputs.memory }}" \ - --target-environment-variables "VECTOR_THREADS=${{ needs.compute-metadata.outputs.vector-cpus }},VECTOR_REQUIRE_HEALTHY=true" \ - --target-name vector \ - --submission-metadata ${{ runner.temp }}/submission-metadata - - - uses: actions/upload-artifact@v3 - with: - name: vector-submission-metadata - path: ${{ runner.temp }}/submission-metadata - - - name: Await job - timeout-minutes: 120 - env: - RUST_LOG: info - run: | - chmod +x ${{ runner.temp }}/bin/smp - - ${{ runner.temp }}/bin/smp --team-id ${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }} job status \ - --wait \ - --wait-delay-seconds 60 \ - --wait-timeout-minutes 90 \ - --submission-metadata ${{ runner.temp }}/submission-metadata - - - name: Handle cancellation if necessary - if: ${{ cancelled() }} - env: - RUST_LOG: info - run: | - chmod +x ${{ runner.temp }}/bin/smp - ${{ runner.temp }}/bin/smp --team-id ${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }} job cancel \ - --submission-metadata ${{ runner.temp }}/submission-metadata - - - name: Check status, cancelled - if: ${{ cancelled() }} - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - gh api \ - --method POST \ - -H "Accept: application/vnd.github+json" \ - /repos/${{ github.repository }}/statuses/${{ needs.compute-metadata.outputs.head-sha }} \ - -f state='failure' \ - -f description='Experiments submitted to the Regression Detector cluster cancelled.' \ - -f context='Regression Detector / submission' \ - -f target_url=${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} - - - name: Check status, success - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - gh api \ - --method POST \ - -H "Accept: application/vnd.github+json" \ - /repos/${{ github.repository }}/statuses/${{ needs.compute-metadata.outputs.head-sha }} \ - -f state='success' \ - -f description='Experiments submitted to the Regression Detector cluster successfully.' \ - -f context='Regression Detector / submission' \ - -f target_url=${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} - - - name: Check status, failure - if: ${{ failure() }} - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - gh api \ - --method POST \ - -H "Accept: application/vnd.github+json" \ - /repos/${{ github.repository }}/statuses/${{ needs.compute-metadata.outputs.head-sha }} \ - -f state='success' \ - -f description='Experiments submitted to the Regression Detector cluster failed.' \ - -f context='Regression Detector / submission' \ - -f target_url=${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} - - ## - ## ANALYZE - ## - - detect-regression: - name: Determine regression status - runs-on: ubuntu-22.04 - needs: - - submit-job - - compute-metadata - steps: - - uses: actions/checkout@v3 - - - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v2.0.0 - with: - aws-access-key-id: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_SECRET_ACCESS_KEY }} - aws-region: us-west-2 - - - name: Download SMP binary - run: | - aws s3 cp s3://smp-cli-releases/v${{ needs.compute-metadata.outputs.smp-version }}/x86_64-unknown-linux-gnu/smp ${{ runner.temp }}/bin/smp - - - name: Download submission metadata - uses: actions/download-artifact@v3 - with: - name: vector-submission-metadata - path: ${{ runner.temp }}/ - - - name: Determine if PR introduced a regression - env: - RUST_LOG: info - run: | - chmod +x ${{ runner.temp }}/bin/smp - - ${{ runner.temp }}/bin/smp --team-id ${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }} job result \ - --submission-metadata ${{ runner.temp }}/submission-metadata - - - name: Check status, cancelled - if: ${{ cancelled() }} - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - gh api \ - --method POST \ - -H "Accept: application/vnd.github+json" \ - /repos/${{ github.repository }}/statuses/${{ needs.compute-metadata.outputs.head-sha }} \ - -f state='failure' \ - -f description='Analyze experimental results from Regression Detector cancelled.' \ - -f context='Regression Detector / detect-regression' \ - -f target_url=${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} - - - name: Check status, success - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - gh api \ - --method POST \ - -H "Accept: application/vnd.github+json" \ - /repos/${{ github.repository }}/statuses/${{ needs.compute-metadata.outputs.head-sha }} \ - -f state='success' \ - -f description='Analyze experimental results from Regression Detector succeeded.' \ - -f context='Regression Detector / detect-regression' \ - -f target_url=${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} - - - name: Check status, failure - if: ${{ failure() }} - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - gh api \ - --method POST \ - -H "Accept: application/vnd.github+json" \ - /repos/${{ github.repository }}/statuses/${{ needs.compute-metadata.outputs.head-sha }} \ - -f state='success' \ - -f description='Analyze experimental results from Regression Detector failed.' \ - -f context='Regression Detector / detect-regression' \ - -f target_url=${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} - - analyze-experiment: - name: Download regression analysis & upload report - runs-on: ubuntu-22.04 - needs: - - submit-job - - compute-metadata - steps: - - name: Check status, in-progress - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - gh api \ - --method POST \ - -H "Accept: application/vnd.github+json" \ - /repos/${{ github.repository }}/statuses/${{ needs.compute-metadata.outputs.head-sha }} \ - -f state='pending' \ - -f description='Analyze experimental results from Regression Detector.' \ - -f context='Regression Detector / analyze-experiment' \ - -f target_url=${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} - - - uses: actions/checkout@v3 - with: - ref: ${{ needs.compute-metadata.outputs.checkout-sha }} - - - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v2.0.0 - with: - aws-access-key-id: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_SECRET_ACCESS_KEY }} - aws-region: us-west-2 - - - name: Download SMP binary - run: | - aws s3 cp s3://smp-cli-releases/v${{ needs.compute-metadata.outputs.smp-version }}/x86_64-unknown-linux-gnu/smp ${{ runner.temp }}/bin/smp - - - name: Download submission metadata - uses: actions/download-artifact@v3 - with: - name: vector-submission-metadata - path: ${{ runner.temp }}/ - - - name: Sync regression report to local system - env: - RUST_LOG: info - run: | - chmod +x ${{ runner.temp }}/bin/smp - - ${{ runner.temp }}/bin/smp --team-id ${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }} job sync \ - --submission-metadata ${{ runner.temp }}/submission-metadata \ - --output-path "${{ runner.temp }}/outputs" - - - name: Read regression report - id: read-analysis - uses: juliangruber/read-file-action@v1 - with: - path: ${{ runner.temp }}/outputs/report.html - - - name: Post report to PR - uses: peter-evans/create-or-update-comment@v3 - with: - issue-number: ${{ needs.compute-metadata.outputs.github-event-number }} - edit-mode: append - body: ${{ steps.read-analysis.outputs.content }} - - - name: Upload regression report to artifacts - uses: actions/upload-artifact@v3 - with: - name: capture-artifacts - path: ${{ runner.temp }}/outputs/* - - - name: Check status, cancelled - if: ${{ cancelled() }} - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - gh api \ - --method POST \ - -H "Accept: application/vnd.github+json" \ - /repos/${{ github.repository }}/statuses/${{ needs.compute-metadata.outputs.head-sha }} \ - -f state='failure' \ - -f description='Analyze experimental results from Regression Detector cancelled.' \ - -f context='Regression Detector / analyze-experiment' \ - -f target_url=${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} - - - name: Check status, success - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - gh api \ - --method POST \ - -H "Accept: application/vnd.github+json" \ - /repos/${{ github.repository }}/statuses/${{ needs.compute-metadata.outputs.head-sha }} \ - -f state='success' \ - -f description='Analyze experimental results from Regression Detector succeeded.' \ - -f context='Regression Detector / analyze-experiment' \ - -f target_url=${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} - - - name: Check status, failure - if: ${{ failure() }} - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - gh api \ - --method POST \ - -H "Accept: application/vnd.github+json" \ - /repos/${{ github.repository }}/statuses/${{ needs.compute-metadata.outputs.head-sha }} \ - -f state='success' \ - -f description='Analyze experimental results from Regression Detector failed.' \ - -f context='Regression Detector / analyze-experiment' \ - -f target_url=${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} diff --git a/.github/workflows/spelling.yml.upstream b/.github/workflows/spelling.yml.upstream index 9d17e26393e5a..6bc9e822ba966 100644 --- a/.github/workflows/spelling.yml.upstream +++ b/.github/workflows/spelling.yml.upstream @@ -57,11 +57,6 @@ name: Check Spelling # ... otherwise adjust the `with:` as you wish on: - push: - branches: - - "**" - tags-ignore: - - "**" pull_request_target: branches: - "**" diff --git a/.github/workflows/test.yml.upstream b/.github/workflows/test.yml.upstream index 2dde4ad47b7d5..918880c6b8ada 100644 --- a/.github/workflows/test.yml.upstream +++ b/.github/workflows/test.yml.upstream @@ -4,16 +4,10 @@ on: pull_request: merge_group: types: [checks_requested] - push: - branches: - - master concurrency: - # For pull requests, cancel running workflows, for master, run all - # - # `github.event.number` exists for pull requests, otherwise fall back to SHA - # for master - group: ${{ github.workflow }}-${{ github.event.number || github.sha }} + # `github.event.number` exists for pull requests, otherwise fall back to SHA for merge queue + group: ${{ github.workflow }}-${{ github.event.number || github.event.merge_group.head_sha }} cancel-in-progress: true env: @@ -30,105 +24,24 @@ env: jobs: changes: - runs-on: ubuntu-20.04 - # Set job outputs to values from filter step - outputs: - source: ${{ steps.filter.outputs.source }} - dependencies: ${{ steps.filter.outputs.dependencies }} - internal_events: ${{ steps.filter.outputs.internal_events }} - cue: ${{ steps.filter.outputs.cue }} - component_docs: ${{ steps.filter.outputs.component_docs }} - markdown: ${{ steps.filter.outputs.markdown }} - steps: - - uses: actions/checkout@v3 - - uses: dorny/paths-filter@v2 - id: filter - with: - filters: | - source: - - ".github/workflows/test.yml" - - ".cargo/**" - - "benches/**" - - "lib/**" - - "proto/**" - - "scripts/**" - - "src/**" - - "tests/**" - - "build.rs" - - "Cargo.lock" - - "Cargo.toml" - - "Makefile" - - "rust-toolchain.toml" - - "vdev/**" - deny: - - 'deny.toml' - - "vdev/**" - dependencies: - - ".cargo/**" - - 'Cargo.toml' - - 'Cargo.lock' - - 'rust-toolchain.toml' - - '.github/workflows/pr.yml' - - 'Makefile' - - 'scripts/cross/**' - - "vdev/**" - cue: - - 'website/cue/**' - - "vdev" - component_docs: - - 'scripts/generate-component-docs.rb' - - "vdev/**" - markdown: - - '**/**.md' - - "vdev/**" - internal_events: - - 'src/internal_events/**' - - "vdev/**" - docker: - - 'distribution/docker/**' - - "vdev/**" + uses: ./.github/workflows/changes.yml + secrets: inherit + with: + base_ref: ${{ github.event.merge_group.base_ref || github.event.pull_request.base.ref }} + head_ref: ${{ github.event.merge_group.head_ref || github.event.pull_request.head.ref }} - # Remove this once https://github.com/vectordotdev/vector/issues/3771 is closed. - # Then, modify the `cross-linux` job to run `test` instead of `build`. - test-linux: - name: Unit - x86_64-unknown-linux-gnu + checks: + name: Checks runs-on: [linux, ubuntu-20.04-8core] needs: changes env: CARGO_INCREMENTAL: 0 - if: ${{ needs.changes.outputs.source == 'true' }} steps: - uses: actions/checkout@v3 - - uses: actions/cache@v3 - name: Cache Cargo registry + index with: - path: | - ~/.cargo/bin/ - ~/.cargo/registry/index/ - ~/.cargo/registry/cache/ - ~/.cargo/git/db/ - key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} - restore-keys: | - ${{ runner.os }}-cargo- - - run: sudo -E bash scripts/environment/bootstrap-ubuntu-20.04.sh - - run: bash scripts/environment/prepare.sh - - run: echo "::add-matcher::.github/matchers/rust.json" - - run: make test - env: - CARGO_BUILD_JOBS: 5 - - name: Upload test results - run: scripts/upload-test-results.sh - if: always() + # check-version needs tags + fetch-depth: 0 # fetch everything - test-cli: - name: CLI - Linux - runs-on: [linux, ubuntu-20.04-8core] - needs: changes - env: - CARGO_INCREMENTAL: 0 - if: ${{ needs.changes.outputs.source == 'true' }} - steps: - - uses: actions/checkout@v3 - uses: actions/cache@v3 name: Cache Cargo registry + index with: @@ -140,252 +53,86 @@ jobs: key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} restore-keys: | ${{ runner.os }}-cargo- - - run: sudo -E bash scripts/environment/bootstrap-ubuntu-20.04.sh - - run: bash scripts/environment/prepare.sh - - run: echo "::add-matcher::.github/matchers/rust.json" - - run: make test-cli - - name: Upload test results - run: scripts/upload-test-results.sh - if: always() - test-misc: - name: Miscellaneous - Linux - runs-on: [linux, ubuntu-20.04-8core] - needs: changes - env: - CARGO_INCREMENTAL: 0 - if: ${{ needs.changes.outputs.source == 'true' }} - steps: - - uses: actions/checkout@v3 - - uses: actions/cache@v3 - name: Cache Cargo registry + index - with: - path: | - ~/.cargo/bin/ - ~/.cargo/registry/index/ - ~/.cargo/registry/cache/ - ~/.cargo/git/db/ - key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} - restore-keys: | - ${{ runner.os }}-cargo- - run: sudo -E bash scripts/environment/bootstrap-ubuntu-20.04.sh - - run: bash scripts/environment/prepare.sh - - run: echo "::add-matcher::.github/matchers/rust.json" - - run: make test-behavior - - run: make check-examples - - run: make test-docs - cross-linux: - name: Cross - ${{ matrix.target }} - runs-on: [linux, ubuntu-20.04-8core] - needs: changes - env: - CARGO_INCREMENTAL: 0 - strategy: - matrix: - target: - - x86_64-unknown-linux-gnu - - x86_64-unknown-linux-musl - - aarch64-unknown-linux-gnu - - aarch64-unknown-linux-musl - - armv7-unknown-linux-gnueabihf - - armv7-unknown-linux-musleabihf + - uses: ruby/setup-ruby@v1 - # We run cross checks when dependencies change to ensure they still build. - # This helps us avoid adopting dependencies that aren't compatible with other architectures. - if: ${{ needs.changes.outputs.dependencies == 'true' }} - steps: - - uses: actions/checkout@v3 - - uses: actions/cache@v3 - name: Cache Cargo registry + index - with: - path: | - ~/.cargo/bin/ - ~/.cargo/registry/index/ - ~/.cargo/registry/cache/ - ~/.cargo/git/db/ - key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} - restore-keys: | - ${{ runner.os }}-cargo- - - run: echo "::add-matcher::.github/matchers/rust.json" - - run: 'cargo install cross --version 0.2.4 --force --locked' - # Why is this build, not check? Because we need to make sure the linking phase works. - # aarch64 and musl in particular are notoriously hard to link. - # While it may be tempting to slot a `check` in here for quickness, please don't. - - run: make cross-build-${{ matrix.target }} - - uses: actions/upload-artifact@v3 - with: - name: "vector-debug-${{ matrix.target }}" - path: "./target/${{ matrix.target }}/debug/vector" - - cross-linux-check: - if: ${{ needs.changes.outputs.dependencies == 'true' }} - runs-on: ubuntu-20.04 - name: Cross - Linux - needs: cross-linux - steps: - - name: Check cross matrix status - if: ${{ needs.cross-linux.result != 'success' }} - run: exit 1 - - test-mac: - name: Unit - Mac - # Full CI suites for this platform were only recently introduced. - # Some failures are permitted until we can properly correct them. - continue-on-error: true - runs-on: macos-11 - needs: changes - env: - CARGO_INCREMENTAL: 0 - if: ${{ needs.changes.outputs.source == 'true' }} - steps: - - uses: actions/checkout@v3 - - uses: actions/cache@v3 - name: Cache Cargo registry + index - with: - path: | - ~/.cargo/bin/ - ~/.cargo/registry/index/ - ~/.cargo/registry/cache/ - ~/.cargo/git/db/ - key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} - restore-keys: | - ${{ runner.os }}-cargo- - - run: bash scripts/environment/bootstrap-macos-10.sh - run: bash scripts/environment/prepare.sh - - run: echo "::add-matcher::.github/matchers/rust.json" - - run: make test - - run: make test-behavior - test-windows: - name: Unit - Windows - runs-on: [windows, windows-2019-8core] - needs: changes - if: ${{ needs.changes.outputs.source == 'true' }} - steps: - - uses: actions/checkout@v3 - - run: .\scripts\environment\bootstrap-windows-2019.ps1 - - run: make test - - test-vrl: - name: VRL - Linux - continue-on-error: true - runs-on: [linux, ubuntu-20.04-8core] - needs: changes - if: ${{ needs.changes.outputs.source == 'true' || needs.changes.outputs.cue == 'true' }} - steps: - - uses: actions/checkout@v3 - - run: sudo -E bash scripts/environment/bootstrap-ubuntu-20.04.sh - - run: bash scripts/environment/prepare.sh - - run: cargo vdev test-vrl - - check-component-features: - name: Component Features - Linux - runs-on: [linux, ubuntu-20.04-8core] - needs: changes - if: ${{ needs.changes.outputs.source == 'true' }} - steps: - - uses: actions/checkout@v3 - - run: sudo -E bash scripts/environment/bootstrap-ubuntu-20.04.sh - - run: bash scripts/environment/prepare.sh - - run: echo "::add-matcher::.github/matchers/rust.json" - - run: make check-component-features - - check-msrv: - name: Check minimum supported Rust version - runs-on: [linux, ubuntu-20.04-8core] - needs: changes - if: ${{ needs.changes.outputs.source == 'true' }} - steps: - - uses: actions/checkout@v3 - - run: sudo -E bash scripts/environment/bootstrap-ubuntu-20.04.sh - - run: cargo install cargo-msrv --version 0.15.1 - - run: cargo msrv verify - - checks: - name: Checks - runs-on: [linux, ubuntu-20.04-8core] - needs: changes - env: - CARGO_INCREMENTAL: 0 - steps: - - uses: actions/checkout@v3 - with: - # check-version needs tags - fetch-depth: 0 # fetch everything - - run: sudo -E bash scripts/environment/bootstrap-ubuntu-20.04.sh - - run: bash scripts/environment/prepare.sh - - uses: actions/cache@v3 - name: Cache Cargo registry + index - with: - path: | - ~/.cargo/bin/ - ~/.cargo/registry/index/ - ~/.cargo/registry/cache/ - ~/.cargo/git/db/ - key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} - restore-keys: | - ${{ runner.os }}-cargo- - name: Enable Rust matcher run: echo "::add-matcher::.github/matchers/rust.json" + - name: Check code format run: make check-fmt + - name: Check clippy if: needs.changes.outputs.source == 'true' run: make check-clippy + + # Remove this once https://github.com/vectordotdev/vector/issues/3771 is closed. + # Then, modify the `cross-linux` job to run `test` instead of `build`. + - name: Unit - x86_64-unknown-linux-gnu + run: make test + env: + CARGO_BUILD_JOBS: 5 + + - name: Upload test results + run: scripts/upload-test-results.sh + if: always() + - name: Check version run: make check-version + - name: Check scripts run: make check-scripts + - name: Check events if: needs.changes.outputs.source == 'true' run: make check-events - - name: Check cargo deny advisories - if: needs.changes.outputs.dependencies == 'true' || needs.changes.outputs.deny == 'true' - uses: EmbarkStudios/cargo-deny-action@v1.3.1 - with: - command: check advisories - - name: Check cargo deny licenses + + - name: Check cargo deny advisories/licenses if: needs.changes.outputs.dependencies == 'true' || needs.changes.outputs.deny == 'true' - uses: EmbarkStudios/cargo-deny-action@v1.3.1 - with: - command: check licenses + run: make check-deny + + - name: Check that the 3rd-party license file is up to date + if: needs.changes.outputs.dependencies == 'true' + run: make check-licenses + - name: Check Cue docs if: needs.changes.outputs.cue == 'true' run: make check-docs + - name: Check Markdown if: needs.changes.outputs.markdown == 'true' run: make check-markdown + - name: Check Component Docs if: needs.changes.outputs.source == 'true' || needs.changes.outputs.component_docs == 'true' run: make check-component-docs + - name: Check Rust Docs if: needs.changes.outputs.source == 'true' run: cd rust-doc && make docs - - uses: actions/upload-artifact@v3 - with: - name: "config-schema.json" - path: "/tmp/vector-config-schema.json" - if: success() || failure() - master-failure: - name: master-failure - if: failure() && github.ref == 'refs/heads/master' - needs: - - changes - - cross-linux - - test-misc - - test-linux - - test-mac - - test-windows - - test-vrl - - check-component-features - - checks + - name: VRL - Linux + if: needs.changes.outputs.source == 'true' || needs.changes.outputs.cue == 'true' + run: cargo vdev test-vrl + + # This is a required status check, so it always needs to run if prior jobs failed, in order to mark the status correctly. + all-checks: + name: Test Suite runs-on: ubuntu-20.04 + if: always() + needs: checks + env: + FAILED: ${{ contains(needs.*.result, 'failure') }} steps: - - name: Discord notification - env: - DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK }} - uses: Ilshidur/action-discord@0.3.2 - with: - args: "Master tests failed: " + - run: | + echo "failed=${{ env.FAILED }}" + if [[ "$FAILED" == "true" ]] ; then + exit 1 + else + exit 0 + fi + diff --git a/.github/workflows/unit_mac.yml.upstream b/.github/workflows/unit_mac.yml.upstream new file mode 100644 index 0000000000000..abda6ae1e177f --- /dev/null +++ b/.github/workflows/unit_mac.yml.upstream @@ -0,0 +1,65 @@ +name: Unit - Mac + +on: + workflow_call: + +jobs: + unit-mac: + # Full CI suites for this platform were only recently introduced. + # Some failures are permitted until we can properly correct them. + continue-on-error: true + runs-on: macos-11 + env: + CARGO_INCREMENTAL: 0 + steps: + + - name: (PR comment) Get PR branch + if: ${{ github.event_name == 'issue_comment' }} + uses: xt0rted/pull-request-comment-branch@v2 + id: comment-branch + + - name: (PR comment) Set latest commit status as pending + if: ${{ github.event_name == 'issue_comment' }} + uses: myrotvorets/set-commit-status-action@v1.1.7 + with: + sha: ${{ steps.comment-branch.outputs.head_sha }} + token: ${{ secrets.GITHUB_TOKEN }} + context: Unit - Mac + status: pending + + - name: (PR comment) Checkout PR branch + if: ${{ github.event_name == 'issue_comment' }} + uses: actions/checkout@v3 + with: + ref: ${{ steps.comment-branch.outputs.head_ref }} + + - name: Checkout branch + if: ${{ github.event_name != 'issue_comment' }} + uses: actions/checkout@v3 + + - uses: actions/cache@v3 + name: Cache Cargo registry + index + with: + path: | + ~/.cargo/bin/ + ~/.cargo/registry/index/ + ~/.cargo/registry/cache/ + ~/.cargo/git/db/ + key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} + restore-keys: | + ${{ runner.os }}-cargo- + + - run: bash scripts/environment/bootstrap-macos-10.sh + - run: bash scripts/environment/prepare.sh + - run: echo "::add-matcher::.github/matchers/rust.json" + - run: make test + - run: make test-behavior + + - name: (PR comment) Set latest commit status as ${{ job.status }} + uses: myrotvorets/set-commit-status-action@v1.1.7 + if: always() && github.event_name == 'issue_comment' + with: + sha: ${{ steps.comment-branch.outputs.head_sha }} + token: ${{ secrets.GITHUB_TOKEN }} + context: Unit - Mac + status: ${{ job.status }} diff --git a/.github/workflows/unit_windows.yml.upstream b/.github/workflows/unit_windows.yml.upstream new file mode 100644 index 0000000000000..479d18938e4ec --- /dev/null +++ b/.github/workflows/unit_windows.yml.upstream @@ -0,0 +1,45 @@ +name: Unit - Windows + +on: + workflow_call: + +jobs: + + test-windows: + runs-on: [windows, windows-2019-8core] + steps: + - name: (PR comment) Get PR branch + if: ${{ github.event_name == 'issue_comment' }} + uses: xt0rted/pull-request-comment-branch@v2 + id: comment-branch + + - name: (PR comment) Set latest commit status as pending + if: ${{ github.event_name == 'issue_comment' }} + uses: myrotvorets/set-commit-status-action@v1.1.7 + with: + sha: ${{ steps.comment-branch.outputs.head_sha }} + token: ${{ secrets.GITHUB_TOKEN }} + context: Unit - Windows + status: pending + + - name: (PR comment) Checkout PR branch + if: ${{ github.event_name == 'issue_comment' }} + uses: actions/checkout@v3 + with: + ref: ${{ steps.comment-branch.outputs.head_ref }} + + - name: Checkout branch + if: ${{ github.event_name != 'issue_comment' }} + uses: actions/checkout@v3 + + - run: .\scripts\environment\bootstrap-windows-2019.ps1 + - run: make test + + - name: (PR comment) Set latest commit status as ${{ job.status }} + uses: myrotvorets/set-commit-status-action@v1.1.7 + if: always() && github.event_name == 'issue_comment' + with: + sha: ${{ steps.comment-branch.outputs.head_sha }} + token: ${{ secrets.GITHUB_TOKEN }} + context: Unit - Windows + status: ${{ job.status }} diff --git a/.ruby-version b/.ruby-version new file mode 100644 index 0000000000000..0aec50e6ede78 --- /dev/null +++ b/.ruby-version @@ -0,0 +1 @@ +3.1.4 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md deleted file mode 120000 index 49d1b98f97e06..0000000000000 --- a/CONTRIBUTING.md +++ /dev/null @@ -1 +0,0 @@ -docs/CONTRIBUTING.md \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000000000..c993c8de4dc1a --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,268 @@ +# Contributing + +First, thank you for contributing to Vector! The goal of this document is to +provide everything you need to start contributing to Vector. The +following TOC is sorted progressively, starting with the basics and +expanding into more specifics. Everyone from a first time contributor to a +Vector team member will find this document useful. + +- [Introduction](#introduction) +- [Your First Contribution](#your-first-contribution) + - [New sources, sinks, and transforms](#new-sources-sinks-and-transforms) +- [Workflow](#workflow) + - [Git Branches](#git-branches) + - [Git Commits](#git-commits) + - [Style](#style) + - [GitHub Pull Requests](#github-pull-requests) + - [Title](#title) + - [Reviews & Approvals](#reviews--approvals) + - [Merge Style](#merge-style) + - [CI](#ci) + - [Releasing](#releasing) + - [Testing](#testing) + - [Skipping tests](#skipping-tests) + - [Daily tests](#daily-tests) + - [Flakey tests](#flakey-tests) + - [Test harness](#test-harness) + - [Running Tests Locally](#running-tests-locally) + - [Deprecations](#deprecations) + - [Dependencies](#dependencies) +- [Next steps](#next-steps) +- [Legal](#legal) + - [Contributor License Agreement](#contributor-license-agreement) + - [Granted rights and copyright assignment](#granted-rights-and-copyright-assignment) + +## Introduction + +1. **You're familiar with [GitHub](https://github.com) and the pull request + workflow.** +2. **You've read Vector's [docs](https://vector.dev/docs/).** +3. **You know about the [Vector community](https://vector.dev/community/). + Please use this for help.** + +## Your First Contribution + +1. Ensure your change has an issue! Find an + [existing issue][urls.existing_issues] or [open a new issue][urls.new_issue]. + - This is where you can get a feel if the change will be accepted or not. + Changes that are questionable will have a `needs: approval` label. +2. Once approved, [fork the Vector repository][urls.fork_repo] in your own + GitHub account (only applicable to outside contributors). +3. [Create a new Git branch][urls.create_branch]. +4. Make your changes. +5. [Submit the branch as a pull request][urls.submit_pr] to the main Vector + repo. A Vector team member should comment and/or review your pull request + within a few days. Although, depending on the circumstances, it may take + longer. + +### New sources, sinks, and transforms + +If you're thinking of contributing a new source, sink, or transform to Vector, thank you that's way cool! The answers to +the below questions are required for each newly proposed component and depending on the answers, we may elect to not +include the proposed component. If you're having trouble with any of the questions, we're available to help you. + +**Prior to beginning work on a new source or sink if a GitHub Issue does not already exist, please open one to discuss +the introduction of the new integration.** Maintainers will review the proposal with the following checklist in mind, +try and consider them when sharing your proposal to reduce the amount of time it takes to review your proposal. This +list is not exhaustive, and may be updated over time. + +- [ ] Can the proposed component’s functionality be replicated by an existing component, with a specific configuration? +(ex: Azure Event Hub as a `kafka` sink configuration) + - [ ] Alternatively implemented as a wrapper around an existing component. (ex. `axiom` wrapping `elasticsearch`) +- [ ] Can an existing component replicate the proposed component’s functionality, with non-breaking changes? +- [ ] Can an existing component be rewritten in a more generic fashion to cover both the existing and proposed functions? +- [ ] Is the proposed component generically usable or is it specific to a particular service? + - [ ] How established is the target of the integration, what is the relative market share of the integrated service? +- [ ] Is there sufficient demand for the component? + - [ ] If the integration can be served with a workaround or more generic component, how painful is this for users? +- [ ] Is the contribution from an individual or the organization owning the integrated service? (examples of +organization backed integrations: `databend` sink, `axiom` sink) + - [ ] Is the contributor committed to maintaining the integration if it is accepted? +- [ ] What is the overall complexity of the proposed design of this integration from a technical and functional +standpoint, and what is the expected ongoing maintenance burden? +- [ ] How will this integration be tested and QA’d for any changes and fixes? + - [ ] Will we have access to an account with the service if the integration is not open source? + +To merge a new source, sink, or transform, the pull request is required to: + +- [ ] Add tests, especially integration tests if your contribution connects to an external service. +- [ ] Add instrumentation so folks using your integration can get insight into how it's working and performing. You can +see some [example of instrumentation in existing integrations](https://github.com/vectordotdev/vector/tree/master/src/internal_events). +- [ ] Add documentation. You can see [examples in the `docs` directory](https://github.com/vectordotdev/vector/blob/master/docs). + +When adding new integration tests, the following changes are needed in the GitHub Workflows: + +- in `.github/workflows/integration.yml`, add another entry in the matrix definition for the new integration. +- in `.github/workflows/integration-comment.yml`, add another entry in the matrix definition for the new integration. +- in `.github/workflows/changes.yml`, add a new filter definition for files changed, and update the `changes` job +outputs to reference the filter, and finally update the outputs of `workflow_call` to include the new filter. + +## Workflow + +### Git Branches + +_All_ changes must be made in a branch and submitted as [pull requests](#github-pull-requests). +Vector does not adopt any type of branch naming style, but please use something +descriptive of your changes. + +### Git Commits + +#### Style + +Please ensure your commits are small and focused; they should tell a story of +your change. This helps reviewers to follow your changes, especially for more +complex changes. + +### GitHub Pull Requests + +Once your changes are ready you must submit your branch as a [pull request](https://github.com/vectordotdev/vector/pulls). + +#### Title + +The pull request title must follow the format outlined in the [conventional commits spec](https://www.conventionalcommits.org). +[Conventional commits](https://www.conventionalcommits.org) is a standardized +format for commit messages. Vector only requires this format for commits on +the `master` branch. And because Vector squashes commits before merging +branches, this means that only the pull request title must conform to this +format. Vector performs a pull request check to verify the pull request title +in case you forget. + +A list of allowed sub-categories is defined +[here](https://github.com/vectordotdev/vector/tree/master/.github). + +The following are all good examples of pull request titles: + +```text +feat(new sink): new `xyz` sink +feat(tcp source): add foo bar baz feature +fix(tcp source): fix foo bar baz bug +chore: improve build process +docs: fix typos +``` + +#### Reviews & Approvals + +All pull requests should be reviewed by: + +- No review required for cosmetic changes like whitespace, typos, and spelling + by a maintainer +- One Vector team member for minor changes or trivial changes from contributors +- Two Vector team members for major changes +- Three Vector team members for RFCs + +If CODEOWNERS are assigned, a review from an individual from each of the sets of owners is required. + +#### Merge Style + +All pull requests are squashed and merged. We generally discourage large pull +requests that are over 300-500 lines of diff. If you would like to propose a +change that is larger we suggest coming onto our [Discord server](https://chat.vector.dev/) and discuss it +with one of our engineers. This way we can talk through the solution and +discuss if a change that large is even needed! This will produce a quicker +response to the change and likely produce code that aligns better with our +process. + +### CI + +Currently, Vector uses GitHub Actions to run tests. The workflows are defined in +`.github/workflows`. + +#### Releasing + +GitHub Actions is responsible for releasing updated versions of Vector through +various channels. + +#### Testing + +##### Skipping tests + +Tests are run for all changes except those that have the label: + +```text +ci-condition: skip +``` + +##### Daily tests + +Some long-running tests are only run daily, rather than on every pull request. +If needed, an administrator can kick off these tests manually via the button on +the [nightly build action +page](https://github.com/vectordotdev/vector/actions?query=workflow%3Anightly) + +#### Flakey tests + +Historically, we've had some trouble with tests being flakey. If your PR does +not have passing tests: + +- Ensure that the test failures are unrelated to your change + - Is it failing on master? + - Does it fail if you rerun CI? + - Can you reproduce locally? +- Find or open an issue for the test failure + ([example](https://github.com/vectordotdev/vector/issues/3781)) +- Link the PR in the issue for the failing test so that there are more examples + +##### Test harness + +You can invoke the [test harness][urls.vector_test_harness] by commenting on +any pull request with: + +```bash +/test -t +``` + +### Running Tests Locally + +To run tests locally, use [cargo vdev](https://github.com/vectordotdev/vector/blob/master/vdev/README.md). + +Unit tests can be run by calling `cargo vdev test`. + +Integration tests are not run by default when running +`cargo vdev test`. Instead, they are accessible via the integration subcommand (example: +`cargo vdev int test aws` runs aws-related integration tests). You can find the list of available integration tests using `cargo vdev int show`. Integration tests require docker or podman to run. + + +### Deprecations + +When deprecating functionality in Vector, see [DEPRECATION.md](DEPRECATION.md). + +### Dependencies + +When adding, modifying, or removing a dependency in Vector you may find that you need to update the +inventory of third-party licenses maintained in `LICENSE-3rdparty.csv`. This file is generated using +[rust-license-tool](https://github.com/DataDog/rust-license-tool.git) and can be updated using +`cargo vdev build licenses`. + +## Next steps + +As discussed in the [`README`](README.md), you should continue to the following +documents: + +1. **[DEVELOPING.md](DEVELOPING.md)** - Everything necessary to develop +2. **[DOCUMENTING.md](DOCUMENTING.md)** - Preparing your change for Vector users +3. **[DEPRECATION.md](DEPRECATION.md)** - Deprecating functionality in Vector + +## Legal + +To protect all users of Vector, the following legal requirements are made. +If you have additional questions, please [contact us]. + +### Contributor License Agreement + +Vector requires all contributors to sign the Contributor License Agreement +(CLA). This gives Vector the right to use your contribution as well as ensuring +that you own your contributions and can use them for other purposes. + +The full text of the CLA can be found at [https://cla.datadoghq.com/vectordotdev/vector](https://cla.datadoghq.com/vectordotdev/vector). + +### Granted rights and copyright assignment + +This is covered by the CLA. + +[contact us]: https://vector.dev/community +[urls.create_branch]: https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-and-deleting-branches-within-your-repository +[urls.existing_issues]: https://github.com/vectordotdev/vector/issues +[urls.fork_repo]: https://help.github.com/en/github/getting-started-with-github/fork-a-repo +[urls.new_issue]: https://github.com/vectordotdev/vector/issues/new +[urls.submit_pr]: https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request-from-a-fork +[urls.vector_test_harness]: https://github.com/vectordotdev/vector-test-harness/ diff --git a/Cargo.lock b/Cargo.lock index aa64e84c3ca83..d7be1748f0a38 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -18,6 +18,15 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b5ace29ee3216de37c0546865ad08edef58b0f9e76838ed8959a84a990e58c5" +[[package]] +name = "addr2line" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" +dependencies = [ + "gimli", +] + [[package]] name = "adler" version = "1.0.2" @@ -47,21 +56,22 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" dependencies = [ - "getrandom 0.2.9", + "getrandom 0.2.10", "once_cell", "version_check", ] [[package]] name = "ahash" -version = "0.8.2" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf6ccdb167abbf410dcb915cabd428929d7f6a04980b54a11f26a39f1c7f7107" +checksum = "91429305e9f0a25f6205c5b8e0d2db09e0708a7a6df0f42212bb56c32c8ac97a" dependencies = [ "cfg-if", - "getrandom 0.2.9", + "getrandom 0.2.10", "once_cell", "version_check", + "zerocopy 0.7.31", ] [[package]] @@ -130,6 +140,12 @@ dependencies = [ "url", ] +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + [[package]] name = "android_system_properties" version = "0.1.5" @@ -185,7 +201,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8cf4144857f9e4d7dd6cc4ba4c78efd2a46bad682b029bd0d91e76a021af1b2a" dependencies = [ "byteorder", - "digest 0.10.6", + "digest 0.10.7", "lazy_static", "libflate", "log", @@ -200,7 +216,7 @@ dependencies = [ "thiserror", "typed-builder", "uuid", - "zerocopy", + "zerocopy 0.6.1", ] [[package]] @@ -212,6 +228,15 @@ dependencies = [ "num-traits", ] +[[package]] +name = "arbitrary" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2d098ff73c1ca148721f37baad5ea6a465a13f9573aba8641fbbbae8164a54e" +dependencies = [ + "derive_arbitrary", +] + [[package]] name = "arc-swap" version = "1.6.0" @@ -290,6 +315,17 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "assert-json-diff" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4259cbe96513d2f1073027a259fc2ca917feb3026a5a8d984e3628e490255cc0" +dependencies = [ + "extend", + "serde", + "serde_json", +] + [[package]] name = "assert-json-diff" version = "2.0.2" @@ -307,7 +343,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "86d6b683edf8d1119fe420a94f8a7e389239666aa72e65495d91c00462510151" dependencies = [ "anstyle 1.0.0", - "bstr 1.4.0", + "bstr 1.5.0", "doc-comment", "predicates 3.0.1", "predicates-core", @@ -339,19 +375,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "async-compression" -version = "0.3.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "942c7cd7ae39e91bde4820d74132e9862e62c2f386c3aa90ccf55949f5bad63a" -dependencies = [ - "flate2", - "futures-core", - "memchr", - "pin-project-lite", - "tokio", -] - [[package]] name = "async-compression" version = "0.4.0" @@ -363,7 +386,7 @@ dependencies = [ "memchr", "pin-project-lite", "tokio", - "zstd 0.12.3+zstd.1.5.2", + "zstd 0.12.4", "zstd-safe 6.0.3+zstd.1.5.2", ] @@ -421,9 +444,9 @@ dependencies = [ [[package]] name = "async-graphql" -version = "5.0.8" +version = "5.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ae09afb01514b3dbd6328547b2b11fcbcb0205d9c5e6f2e17e60cb166a82d7f" +checksum = "b35ef8f9be23ee30fe1eb1cf175c689bc33517c6c6d0fd0669dade611e5ced7f" dependencies = [ "async-graphql-derive", "async-graphql-parser", @@ -436,7 +459,7 @@ dependencies = [ "fnv", "futures-util", "http", - "indexmap", + "indexmap 1.9.3", "mime", "multer", "num-traits", @@ -452,15 +475,15 @@ dependencies = [ [[package]] name = "async-graphql-derive" -version = "5.0.8" +version = "5.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60ae62851dd3ff9a7550aee75e848e8834b75285b458753e98dd71d0733ad3f2" +checksum = "1a0f6ceed3640b4825424da70a5107e79d48d9b2bc6318dfc666b2fc4777f8c4" dependencies = [ "Inflector", "async-graphql-parser", "darling 0.14.2", "proc-macro-crate 1.2.1", - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", "syn 1.0.109", "thiserror", @@ -468,9 +491,9 @@ dependencies = [ [[package]] name = "async-graphql-parser" -version = "5.0.8" +version = "5.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e6ee332acd99d2c50c3443beae46e9ed784c205eead9a668b7b5118b4a60a8b" +checksum = "ecc308cd3bc611ee86c9cf19182d2b5ee583da40761970e41207f088be3db18f" dependencies = [ "async-graphql-value", "pest", @@ -480,21 +503,21 @@ dependencies = [ [[package]] name = "async-graphql-value" -version = "5.0.8" +version = "5.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "122da50452383410545b9428b579f4cda5616feb6aa0aff0003500c53fcff7b7" +checksum = "d461325bfb04058070712296601dfe5e5bd6cdff84780a0a8c569ffb15c87eb3" dependencies = [ "bytes 1.4.0", - "indexmap", + "indexmap 1.9.3", "serde", "serde_json", ] [[package]] name = "async-graphql-warp" -version = "5.0.8" +version = "5.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3a2e1f7023d0074be87cd9ae83e3d6e9c6854bd7544026fdfc46c568e76c021" +checksum = "ce971f92675defe1adf14f9e70b8798d797db9f454463b611a552bffd5532188" dependencies = [ "async-graphql", "futures-util", @@ -580,9 +603,9 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0e97ce7de6cf12de5d7226c73f5ba9811622f4db3a5b91b55c53e987e5f91cba" dependencies = [ - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", - "syn 2.0.31", + "syn 2.0.41", ] [[package]] @@ -602,9 +625,9 @@ version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", - "syn 2.0.31", + "syn 2.0.41", ] [[package]] @@ -615,21 +638,15 @@ checksum = "7a40729d2133846d9ed0ea60a8b9541bccddab49cd30f0715a1da672fe9a2524" [[package]] name = "async-trait" -version = "0.1.68" +version = "0.1.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9ccdd8f2a161be9bd5c023df56f1b2a0bd1d83872ae53b71a84a12c9bf6e842" +checksum = "a66537f1bb974b254c98ed142ff995236e81b9d0fe4db0575f46612cb15eb0f9" dependencies = [ - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", - "syn 2.0.31", + "syn 2.0.41", ] -[[package]] -name = "async_once" -version = "0.2.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ce4f10ea3abcd6617873bae9f91d1c5332b4a778bd9ce34d0cd517474c1de82" - [[package]] name = "atomic-waker" version = "1.0.0" @@ -655,25 +672,25 @@ checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" [[package]] name = "aws-config" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56a636c44c77fa18bdba56126a34d30cfe5538fe88f7d34988fa731fee143ddd" +version = "0.54.1" +source = "git+https://github.com/vectordotdev/aws-sdk-rust?rev=3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670#3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670" dependencies = [ + "aws-credential-types", "aws-http", "aws-sdk-sso", "aws-sdk-sts", "aws-smithy-async", "aws-smithy-client", - "aws-smithy-http 0.51.0", - "aws-smithy-http-tower 0.51.0", + "aws-smithy-http", + "aws-smithy-http-tower", "aws-smithy-json", - "aws-smithy-types 0.51.0", + "aws-smithy-types", "aws-types", "bytes 1.4.0", "hex", "http", "hyper", - "ring 0.16.20", + "ring", "time", "tokio", "tower", @@ -681,14 +698,25 @@ dependencies = [ "zeroize", ] +[[package]] +name = "aws-credential-types" +version = "0.54.1" +source = "git+https://github.com/vectordotdev/aws-sdk-rust?rev=3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670#3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670" +dependencies = [ + "aws-smithy-async", + "aws-smithy-types", + "tokio", + "tracing 0.1.37", + "zeroize", +] + [[package]] name = "aws-endpoint" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ca8f374874f6459aaa88dc861d7f5d834ca1ff97668eae190e97266b5f6c3fb" +version = "0.54.1" +source = "git+https://github.com/vectordotdev/aws-sdk-rust?rev=3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670#3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670" dependencies = [ - "aws-smithy-http 0.51.0", - "aws-smithy-types 0.51.0", + "aws-smithy-http", + "aws-smithy-types", "aws-types", "http", "regex", @@ -697,12 +725,12 @@ dependencies = [ [[package]] name = "aws-http" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78d41e19e779b73463f5f0c21b3aacc995f4ba783ab13a7ae9f5dfb159a551b4" +version = "0.54.1" +source = "git+https://github.com/vectordotdev/aws-sdk-rust?rev=3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670#3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670" dependencies = [ - "aws-smithy-http 0.51.0", - "aws-smithy-types 0.51.0", + "aws-credential-types", + "aws-smithy-http", + "aws-smithy-types", "aws-types", "bytes 1.4.0", "http", @@ -715,218 +743,236 @@ dependencies = [ [[package]] name = "aws-sdk-cloudwatch" -version = "0.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "520b1ac14f0850d0d6a69136d15ba7702d41ee7f4014a5d2d1bf4a86e74f7a6b" +version = "0.24.0" +source = "git+https://github.com/vectordotdev/aws-sdk-rust?rev=3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670#3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670" dependencies = [ + "aws-credential-types", "aws-endpoint", "aws-http", "aws-sig-auth", "aws-smithy-async", "aws-smithy-client", - "aws-smithy-http 0.51.0", - "aws-smithy-http-tower 0.51.0", + "aws-smithy-http", + "aws-smithy-http-tower", + "aws-smithy-json", "aws-smithy-query", - "aws-smithy-types 0.51.0", + "aws-smithy-types", "aws-smithy-xml", "aws-types", "bytes 1.4.0", "http", + "regex", "tokio-stream", "tower", ] [[package]] name = "aws-sdk-cloudwatchlogs" -version = "0.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89415e55b57044a09a7eb0a885c2d0af1aa7f95b373e0e898f71a28d7e7d10f9" +version = "0.24.0" +source = "git+https://github.com/vectordotdev/aws-sdk-rust?rev=3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670#3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670" dependencies = [ + "aws-credential-types", "aws-endpoint", "aws-http", "aws-sig-auth", "aws-smithy-async", "aws-smithy-client", - "aws-smithy-http 0.51.0", - "aws-smithy-http-tower 0.51.0", + "aws-smithy-http", + "aws-smithy-http-tower", "aws-smithy-json", - "aws-smithy-types 0.51.0", + "aws-smithy-types", "aws-types", "bytes 1.4.0", "http", + "regex", "tokio-stream", "tower", ] [[package]] name = "aws-sdk-elasticsearch" -version = "0.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9f4cc10278701dbc0d386ddd8cddfda2695eae7103a54eae11b981f28779ff2" +version = "0.24.0" +source = "git+https://github.com/vectordotdev/aws-sdk-rust?rev=3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670#3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670" dependencies = [ + "aws-credential-types", "aws-endpoint", "aws-http", "aws-sig-auth", "aws-smithy-async", "aws-smithy-client", - "aws-smithy-http 0.51.0", - "aws-smithy-http-tower 0.51.0", + "aws-smithy-http", + "aws-smithy-http-tower", "aws-smithy-json", - "aws-smithy-types 0.51.0", + "aws-smithy-types", "aws-types", "bytes 1.4.0", "http", + "regex", "tokio-stream", "tower", ] [[package]] name = "aws-sdk-firehose" -version = "0.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c68310f9d7860b4fe73c58e5cec4d7a310a658d1a983fdf176eb35149939896a" +version = "0.24.0" +source = "git+https://github.com/vectordotdev/aws-sdk-rust?rev=3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670#3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670" dependencies = [ + "aws-credential-types", "aws-endpoint", "aws-http", "aws-sig-auth", "aws-smithy-async", "aws-smithy-client", - "aws-smithy-http 0.51.0", - "aws-smithy-http-tower 0.51.0", + "aws-smithy-http", + "aws-smithy-http-tower", "aws-smithy-json", - "aws-smithy-types 0.51.0", + "aws-smithy-types", "aws-types", "bytes 1.4.0", "http", + "regex", "tower", ] [[package]] name = "aws-sdk-kinesis" -version = "0.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37766fdf50feab317b4f939b1c9ee58a2a1c51785974328ce84cff1eea7a1bb8" +version = "0.24.0" +source = "git+https://github.com/vectordotdev/aws-sdk-rust?rev=3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670#3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670" dependencies = [ + "aws-credential-types", "aws-endpoint", "aws-http", "aws-sig-auth", "aws-smithy-async", "aws-smithy-client", - "aws-smithy-http 0.51.0", - "aws-smithy-http-tower 0.51.0", + "aws-smithy-http", + "aws-smithy-http-tower", "aws-smithy-json", - "aws-smithy-types 0.51.0", + "aws-smithy-types", "aws-types", "bytes 1.4.0", "http", + "regex", "tokio-stream", "tower", ] [[package]] name = "aws-sdk-s3" -version = "0.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9f08665c8e03aca8cb092ef01e617436ebfa977fddc1240e1b062488ab5d48a" +version = "0.24.0" +source = "git+https://github.com/vectordotdev/aws-sdk-rust?rev=3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670#3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670" dependencies = [ + "aws-credential-types", "aws-endpoint", "aws-http", "aws-sig-auth", - "aws-sigv4 0.51.0", + "aws-sigv4", "aws-smithy-async", "aws-smithy-checksums", "aws-smithy-client", "aws-smithy-eventstream", - "aws-smithy-http 0.51.0", - "aws-smithy-http-tower 0.51.0", - "aws-smithy-types 0.51.0", + "aws-smithy-http", + "aws-smithy-http-tower", + "aws-smithy-json", + "aws-smithy-types", "aws-smithy-xml", "aws-types", "bytes 1.4.0", "bytes-utils", + "fastrand", "http", "http-body", + "once_cell", + "percent-encoding", + "regex", "tokio-stream", "tower", "tracing 0.1.37", + "url", ] [[package]] name = "aws-sdk-sqs" -version = "0.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b26bb3d12238492cb12bde0de8486679b007daada21fdb110913b32a2a38275" +version = "0.24.0" +source = "git+https://github.com/vectordotdev/aws-sdk-rust?rev=3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670#3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670" dependencies = [ + "aws-credential-types", "aws-endpoint", "aws-http", "aws-sig-auth", "aws-smithy-async", "aws-smithy-client", - "aws-smithy-http 0.51.0", - "aws-smithy-http-tower 0.51.0", + "aws-smithy-http", + "aws-smithy-http-tower", + "aws-smithy-json", "aws-smithy-query", - "aws-smithy-types 0.51.0", + "aws-smithy-types", "aws-smithy-xml", "aws-types", "bytes 1.4.0", "http", + "regex", "tokio-stream", "tower", ] [[package]] name = "aws-sdk-sso" -version = "0.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86dcb1cb71aa8763b327542ead410424515cff0cde5b753eedd2917e09c63734" +version = "0.24.0" +source = "git+https://github.com/vectordotdev/aws-sdk-rust?rev=3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670#3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670" dependencies = [ + "aws-credential-types", "aws-endpoint", "aws-http", "aws-sig-auth", "aws-smithy-async", "aws-smithy-client", - "aws-smithy-http 0.51.0", - "aws-smithy-http-tower 0.51.0", + "aws-smithy-http", + "aws-smithy-http-tower", "aws-smithy-json", - "aws-smithy-types 0.51.0", + "aws-smithy-types", "aws-types", "bytes 1.4.0", "http", + "regex", "tokio-stream", "tower", ] [[package]] name = "aws-sdk-sts" -version = "0.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdfcf584297c666f6b472d5368a78de3bc714b6e0a53d7fbf76c3e347c292ab1" +version = "0.24.0" +source = "git+https://github.com/vectordotdev/aws-sdk-rust?rev=3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670#3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670" dependencies = [ + "aws-credential-types", "aws-endpoint", "aws-http", "aws-sig-auth", "aws-smithy-async", "aws-smithy-client", - "aws-smithy-http 0.51.0", - "aws-smithy-http-tower 0.51.0", + "aws-smithy-http", + "aws-smithy-http-tower", + "aws-smithy-json", "aws-smithy-query", - "aws-smithy-types 0.51.0", + "aws-smithy-types", "aws-smithy-xml", "aws-types", "bytes 1.4.0", "http", + "regex", "tower", + "tracing 0.1.37", ] [[package]] name = "aws-sig-auth" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12cbe7b2be9e185c1fbce27fc9c41c66b195b32d89aa099f98768d9544221308" +version = "0.54.1" +source = "git+https://github.com/vectordotdev/aws-sdk-rust?rev=3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670#3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670" dependencies = [ - "aws-sigv4 0.51.0", + "aws-credential-types", + "aws-sigv4", "aws-smithy-eventstream", - "aws-smithy-http 0.51.0", + "aws-smithy-http", "aws-types", "http", "tracing 0.1.37", @@ -934,48 +980,28 @@ dependencies = [ [[package]] name = "aws-sigv4" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03ff4cff8c4a101962d593ba94e72cd83891aecd423f0c6e3146bff6fb92c9e3" +version = "0.54.2" +source = "git+https://github.com/vectordotdev/aws-sdk-rust?rev=3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670#3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670" dependencies = [ "aws-smithy-eventstream", - "aws-smithy-http 0.51.0", + "aws-smithy-http", "bytes 1.4.0", "form_urlencoded", "hex", - "http", - "once_cell", - "percent-encoding", - "regex", - "ring 0.16.20", - "time", - "tracing 0.1.37", -] - -[[package]] -name = "aws-sigv4" -version = "0.55.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ab4eebc8ec484fb9eab04b15a5d1e71f3dc13bee8fdd2d9ed78bcd6ecbd7192" -dependencies = [ - "aws-smithy-http 0.55.1", - "form_urlencoded", - "hex", "hmac", "http", "once_cell", "percent-encoding", "regex", - "sha2 0.10.6", + "sha2 0.10.7", "time", "tracing 0.1.37", ] [[package]] name = "aws-smithy-async" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b3442b4c5d3fc39891a2e5e625735fba6b24694887d49c6518460fde98247a9" +version = "0.54.1" +source = "git+https://github.com/vectordotdev/aws-sdk-rust?rev=3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670#3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670" dependencies = [ "futures-util", "pin-project-lite", @@ -985,12 +1011,11 @@ dependencies = [ [[package]] name = "aws-smithy-checksums" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc227e36e346f45298288359f37123e1a92628d1cec6b11b5eb335553278bd9e" +version = "0.54.1" +source = "git+https://github.com/vectordotdev/aws-sdk-rust?rev=3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670#3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670" dependencies = [ - "aws-smithy-http 0.51.0", - "aws-smithy-types 0.51.0", + "aws-smithy-http", + "aws-smithy-types", "bytes 1.4.0", "crc32c", "crc32fast", @@ -1000,27 +1025,30 @@ dependencies = [ "md-5", "pin-project-lite", "sha1", - "sha2 0.10.6", + "sha2 0.10.7", "tracing 0.1.37", ] [[package]] name = "aws-smithy-client" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff28d553714f8f54cd921227934fc13a536a1c03f106e56b362fd57e16d450ad" +version = "0.54.1" +source = "git+https://github.com/vectordotdev/aws-sdk-rust?rev=3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670#3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670" dependencies = [ "aws-smithy-async", - "aws-smithy-http 0.51.0", - "aws-smithy-http-tower 0.51.0", - "aws-smithy-types 0.51.0", + "aws-smithy-http", + "aws-smithy-http-tower", + "aws-smithy-protocol-test", + "aws-smithy-types", "bytes 1.4.0", "fastrand", "http", "http-body", "hyper", + "hyper-rustls 0.23.1", "hyper-tls", + "lazy_static", "pin-project-lite", + "serde", "tokio", "tower", "tracing 0.1.37", @@ -1028,63 +1056,21 @@ dependencies = [ [[package]] name = "aws-smithy-eventstream" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7ea0df7161ce65b5c8ca6eb709a1a907376fa18226976e41c748ce02ccccf24" +version = "0.54.1" +source = "git+https://github.com/vectordotdev/aws-sdk-rust?rev=3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670#3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670" dependencies = [ - "aws-smithy-types 0.51.0", + "aws-smithy-types", "bytes 1.4.0", "crc32fast", ] [[package]] name = "aws-smithy-http" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf58ed4fefa61dbf038e5421a521cbc2c448ef69deff0ab1d915d8a10eda5664" +version = "0.54.1" +source = "git+https://github.com/vectordotdev/aws-sdk-rust?rev=3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670#3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670" dependencies = [ "aws-smithy-eventstream", - "aws-smithy-types 0.51.0", - "bytes 1.4.0", - "bytes-utils", - "futures-core", - "http", - "http-body", - "hyper", - "once_cell", - "percent-encoding", - "pin-project-lite", - "pin-utils", - "tracing 0.1.37", -] - -[[package]] -name = "aws-smithy-http" -version = "0.54.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "873f316f1833add0d3aa54ed1b0cd252ddd88c792a0cf839886400099971e844" -dependencies = [ - "aws-smithy-types 0.54.4", - "bytes 1.4.0", - "bytes-utils", - "futures-core", - "http", - "http-body", - "hyper", - "once_cell", - "percent-encoding", - "pin-project-lite", - "pin-utils", - "tracing 0.1.37", -] - -[[package]] -name = "aws-smithy-http" -version = "0.55.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03bcc02d7ed9649d855c8ce4a735e9848d7b8f7568aad0504c158e3baa955df8" -dependencies = [ - "aws-smithy-types 0.55.1", + "aws-smithy-types", "bytes 1.4.0", "bytes-utils", "futures-core", @@ -1100,11 +1086,11 @@ dependencies = [ [[package]] name = "aws-smithy-http-tower" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20c96d7bd35e7cf96aca1134b2f81b1b59ffe493f7c6539c051791cbbf7a42d3" +version = "0.54.1" +source = "git+https://github.com/vectordotdev/aws-sdk-rust?rev=3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670#3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670" dependencies = [ - "aws-smithy-http 0.51.0", + "aws-smithy-http", + "aws-smithy-types", "bytes 1.4.0", "http", "http-body", @@ -1114,70 +1100,40 @@ dependencies = [ ] [[package]] -name = "aws-smithy-http-tower" -version = "0.54.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f38231d3f5dac9ac7976f44e12803add1385119ffca9e5f050d8e980733d164" +name = "aws-smithy-json" +version = "0.54.1" +source = "git+https://github.com/vectordotdev/aws-sdk-rust?rev=3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670#3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670" dependencies = [ - "aws-smithy-http 0.54.4", - "aws-smithy-types 0.54.4", - "bytes 1.4.0", - "http", - "http-body", - "pin-project-lite", - "tower", - "tracing 0.1.37", + "aws-smithy-types", ] [[package]] -name = "aws-smithy-json" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8324ba98c8a94187723cc16c37aefa09504646ee65c3d2c3af495bab5ea701b" +name = "aws-smithy-protocol-test" +version = "0.54.1" +source = "git+https://github.com/vectordotdev/aws-sdk-rust?rev=3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670#3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670" dependencies = [ - "aws-smithy-types 0.51.0", + "assert-json-diff 1.1.0", + "http", + "pretty_assertions", + "regex", + "roxmltree 0.14.1", + "serde_json", + "thiserror", ] [[package]] name = "aws-smithy-query" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83834ed2ff69ea6f6657baf205267dc2c0abe940703503a3e5d60ce23be3d306" +version = "0.54.1" +source = "git+https://github.com/vectordotdev/aws-sdk-rust?rev=3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670#3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670" dependencies = [ - "aws-smithy-types 0.51.0", + "aws-smithy-types", "urlencoding", ] [[package]] name = "aws-smithy-types" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b02e06ea63498c43bc0217ea4d16605d4e58d85c12fc23f6572ff6d0a840c61" -dependencies = [ - "itoa", - "num-integer", - "ryu", - "time", -] - -[[package]] -name = "aws-smithy-types" -version = "0.54.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8161232eda10290f5136610a1eb9de56aceaccd70c963a26a260af20ac24794f" -dependencies = [ - "base64-simd", - "itoa", - "num-integer", - "ryu", - "time", -] - -[[package]] -name = "aws-smithy-types" -version = "0.55.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd0afc731fd1417d791f9145a1e0c30e23ae0beaab9b4814017708ead2fc20f1" +version = "0.54.1" +source = "git+https://github.com/vectordotdev/aws-sdk-rust?rev=3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670#3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670" dependencies = [ "base64-simd", "itoa", @@ -1188,27 +1144,25 @@ dependencies = [ [[package]] name = "aws-smithy-xml" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "246e9f83dd1fdf5d347fa30ae4ad30a9d1d42ce4cd74a93d94afa874646f94cd" +version = "0.54.1" +source = "git+https://github.com/vectordotdev/aws-sdk-rust?rev=3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670#3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670" dependencies = [ "xmlparser", ] [[package]] name = "aws-types" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05701d32da168b44f7ee63147781aed8723e792cc131cb9b18363b5393f17f70" +version = "0.54.1" +source = "git+https://github.com/vectordotdev/aws-sdk-rust?rev=3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670#3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670" dependencies = [ + "aws-credential-types", "aws-smithy-async", "aws-smithy-client", - "aws-smithy-http 0.51.0", - "aws-smithy-types 0.51.0", + "aws-smithy-http", + "aws-smithy-types", "http", "rustc_version 0.4.0", "tracing 0.1.37", - "zeroize", ] [[package]] @@ -1267,7 +1221,7 @@ dependencies = [ "bytes 1.4.0", "dyn-clone", "futures 0.3.28", - "getrandom 0.2.9", + "getrandom 0.2.10", "http-types", "log", "paste", @@ -1321,7 +1275,7 @@ dependencies = [ "serde-xml-rs", "serde_derive", "serde_json", - "sha2 0.10.6", + "sha2 0.10.7", "time", "url", "uuid", @@ -1355,7 +1309,7 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b62ddb9cb1ec0a098ad4bbf9344d0713fa193ae1a80af55febcff2627b6a00c1" dependencies = [ - "getrandom 0.2.9", + "getrandom 0.2.10", "instant", "rand 0.8.5", ] @@ -1372,6 +1326,21 @@ dependencies = [ "tokio", ] +[[package]] +name = "backtrace" +version = "0.3.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837" +dependencies = [ + "addr2line", + "cc", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", +] + [[package]] name = "base16" version = "0.2.1" @@ -1392,18 +1361,17 @@ checksum = "0ea22880d78093b0cbe17c89f64a7d457941e65759157ec6cb31a31d652b05e5" [[package]] name = "base64" -version = "0.21.0" +version = "0.21.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a4ddaa51a5bc52a6948f74c06d20aaaddb71924eab79b8c97a8c556e942d6a" +checksum = "604178f6c5c21f02dc555784810edfb88d34ac2c73b2eae109655649ee73ce3d" [[package]] name = "base64-simd" -version = "0.8.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "339abbe78e73178762e23bea9dfd08e697eb3f3301cd4be981c0f78ba5859195" +checksum = "781dd20c3aff0bd194fe7d2a977dd92f21c173891f3a03b677359e5fa457e5d5" dependencies = [ - "outref", - "vsimd", + "simd-abstraction", ] [[package]] @@ -1450,9 +1418,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.4.0" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635" +checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07" [[package]] name = "bitmask-enum" @@ -1482,7 +1450,7 @@ version = "0.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" dependencies = [ - "digest 0.10.6", + "digest 0.10.7", ] [[package]] @@ -1541,7 +1509,7 @@ version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af254ed2da4936ef73309e9597180558821cb16ae9bba4cb24ce6b612d8d80ed" dependencies = [ - "base64 0.21.0", + "base64 0.21.2", "bollard-stubs", "bytes 1.4.0", "chrono", @@ -1551,7 +1519,7 @@ dependencies = [ "hex", "http", "hyper", - "hyper-rustls", + "hyper-rustls 0.23.1", "hyperlocal", "log", "pin-project-lite", @@ -1602,7 +1570,7 @@ dependencies = [ "borsh-derive-internal", "borsh-schema-derive-internal", "proc-macro-crate 0.1.5", - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "syn 1.0.109", ] @@ -1612,7 +1580,7 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61820b4c5693eafb998b1e67485423c923db4a75f72585c247bdee32bad81e7b" dependencies = [ - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", "syn 1.0.109", ] @@ -1623,7 +1591,7 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c76cdbfa13def20d1f8af3ae7b3c6771f06352a74221d8851262ac384c122b8e" dependencies = [ - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", "syn 1.0.109", ] @@ -1638,7 +1606,7 @@ dependencies = [ "base64 0.13.1", "bitvec", "hex", - "indexmap", + "indexmap 1.9.3", "js-sys", "lazy_static", "rand 0.8.5", @@ -1657,18 +1625,18 @@ checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" dependencies = [ "lazy_static", "memchr", - "regex-automata", + "regex-automata 0.1.10", ] [[package]] name = "bstr" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3d4260bcc2e8fc9df1eac4919a720effeb63a3f0952f5bf4944adfa18897f09" +checksum = "a246e68bb43f6cd9db24bea052a53e40405417c5fb372e3d1a8a7f770a564ef5" dependencies = [ "memchr", "once_cell", - "regex-automata", + "regex-automata 0.1.10", "serde", ] @@ -1694,16 +1662,16 @@ version = "0.6.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13e576ebe98e605500b3c8041bb888e966653577172df6dd97398714eb30b9bf" dependencies = [ - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", "syn 1.0.109", ] [[package]] name = "bytecount" -version = "0.6.3" +version = "0.6.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c676a478f63e9fa2dd5368a42f28bba0d6c560b775f38583c8bbaa7fcd67c9c" +checksum = "e1e5f035d16fc623ae5f74981db80a439803888314e3a555fd6f04acd51a3205" [[package]] name = "bytemuck" @@ -1760,18 +1728,16 @@ checksum = "c1db59621ec70f09c5e9b597b220c7a2b43611f4710dc03ceb8748637775692c" [[package]] name = "cached" -version = "0.43.0" +version = "0.44.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc2fafddf188d13788e7099295a59b99e99b2148ab2195cae454e754cc099925" +checksum = "b195e4fbc4b6862bbd065b991a34750399c119797efff72492f28a5864de8700" dependencies = [ "async-trait", - "async_once", "cached_proc_macro", "cached_proc_macro_types", "futures 0.3.28", "hashbrown 0.13.2", "instant", - "lazy_static", "once_cell", "thiserror", "tokio", @@ -1779,13 +1745,13 @@ dependencies = [ [[package]] name = "cached_proc_macro" -version = "0.16.0" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e10ca87c81aaa3a949dbbe2b5e6c2c45dbc94ba4897e45ea31ff9ec5087be3dc" +checksum = "b48814962d2fd604c50d2b9433c2a41a0ab567779ee2c02f7fba6eca1221f082" dependencies = [ "cached_proc_macro_types", "darling 0.14.2", - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", "syn 1.0.109", ] @@ -1807,9 +1773,9 @@ dependencies = [ [[package]] name = "cargo-platform" -version = "0.1.3" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2cfa25e60aea747ec7e1124f238816749faa93759c6ff5b31f1ccdda137f4479" +checksum = "e34637b3140142bdf929fb439e8aa4ebad7651ebf7b1080b3930aa16ac1459ff" dependencies = [ "serde", ] @@ -1850,12 +1816,11 @@ dependencies = [ [[package]] name = "cc" -version = "1.0.83" +version = "1.0.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" +checksum = "e9f73505338f7d905b19d18738976aae232eb46b8efc15554ffc56deb5d9ebe4" dependencies = [ "jobserver", - "libc", ] [[package]] @@ -1891,12 +1856,12 @@ dependencies = [ [[package]] name = "chrono" -version = "0.4.24" -source = "git+https://github.com/vectordotdev/chrono.git?tag=v0.4.24-no-default-time-1#7ec1ad93833787da5df64898fb3e6206221c6833" +version = "0.4.26" +source = "git+https://github.com/vectordotdev/chrono.git?tag=v0.4.26-no-default-time-1#d44a3b100183d68f8a3e3cb431fcc4a47152a0a3" dependencies = [ + "android-tzdata", "iana-time-zone", "js-sys", - "num-integer", "num-traits", "serde", "wasm-bindgen", @@ -1905,9 +1870,9 @@ dependencies = [ [[package]] name = "chrono-tz" -version = "0.8.2" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf9cc2b23599e6d7479755f3594285efb3f74a1bdca7a7374948bc831e23a552" +checksum = "e23185c0e21df6ed832a12e2bda87c7d1def6842881fb634a8511ced741b0d76" dependencies = [ "chrono", "chrono-tz-build", @@ -1917,9 +1882,9 @@ dependencies = [ [[package]] name = "chrono-tz-build" -version = "0.1.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9998fb9f7e9b2111641485bf8beb32f92945f97f92a3d061f744cfef335f751" +checksum = "433e39f13c9a060046954e0592a8d0a4bcb1040125cbf91cb8ee58964cfb350f" dependencies = [ "parse-zoneinfo", "phf", @@ -1986,23 +1951,11 @@ dependencies = [ "atty", "bitflags 1.3.2", "strsim 0.8.0", - "textwrap 0.11.0", + "textwrap", "unicode-width", "vec_map", ] -[[package]] -name = "clap" -version = "3.2.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71655c45cb9845d3270c9d6df84ebe72b4dad3c2ba3f7023ad47c144e4e473a5" -dependencies = [ - "bitflags 1.3.2", - "clap_lex 0.2.4", - "indexmap", - "textwrap 0.16.0", -] - [[package]] name = "clap" version = "4.1.14" @@ -2031,7 +1984,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "351f9ad9688141ed83dfd8f5fb998a06225ef444b48ff4dc43de6d409b7fd10b" dependencies = [ "bitflags 1.3.2", - "clap_lex 0.4.1", + "clap_lex", "is-terminal", "strsim 0.10.0", "termcolor", @@ -2040,9 +1993,9 @@ dependencies = [ [[package]] name = "clap_complete" -version = "4.2.3" +version = "4.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1594fe2312ec4abf402076e407628f5c313e54c32ade058521df4ee34ecac8a8" +checksum = "7f6b5c519bab3ea61843a7923d074b04245624bb84a64a8c150f5deb014e388b" dependencies = [ "clap 4.1.14", ] @@ -2054,18 +2007,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81d7dc0031c3a59a04fc2ba395c8e2dd463cba1859275f065d225f6122221b45" dependencies = [ "heck 0.4.0", - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", - "syn 2.0.31", -] - -[[package]] -name = "clap_lex" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5" -dependencies = [ - "os_str_bytes", + "syn 2.0.41", ] [[package]] @@ -2076,9 +2020,9 @@ checksum = "8a2dd5a6fe8c6e3502f568a6353e5273bbb15193ad9a89e457b9970798efbea1" [[package]] name = "clipboard-win" -version = "4.4.2" +version = "4.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4ab1b92798304eedc095b53942963240037c0516452cb11aeba709d420b2219" +checksum = "7191c27c2357d9b7ef96baac1773290d4ca63b24205b82a3fd8a0637afcf0362" dependencies = [ "error-code", "str-buf", @@ -2307,9 +2251,9 @@ checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" [[package]] name = "cpufeatures" -version = "0.2.5" +version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28d997bd5e24a5928dd43e46dc529867e207907fe0b239c3477d924f7f2ca320" +checksum = "ce420fe07aecd3e67c5f910618fe65e94158f6dcc0adf44e00d69ce2bdfe0fd0" dependencies = [ "libc", ] @@ -2349,20 +2293,20 @@ dependencies = [ [[package]] name = "criterion" -version = "0.4.0" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7c76e09c1aae2bc52b3d2f29e13c6572553b30c4aa1b8a49fd70de6412654cb" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" dependencies = [ "anes", - "atty", "cast", "ciborium", - "clap 3.2.23", + "clap 4.1.14", "criterion-plot", "futures 0.3.28", - "itertools", - "lazy_static", + "is-terminal", + "itertools 0.10.5", "num-traits", + "once_cell", "oorandom", "plotters", "rayon", @@ -2382,7 +2326,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" dependencies = [ "cast", - "itertools", + "itertools 0.10.5", ] [[package]] @@ -2431,9 +2375,9 @@ dependencies = [ [[package]] name = "crossbeam-utils" -version = "0.8.15" +version = "0.8.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c063cd8cc95f5c377ed0d4b49a4b21f632396ff690e8470c29b3359b346984b" +checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294" dependencies = [ "cfg-if", ] @@ -2498,9 +2442,9 @@ dependencies = [ [[package]] name = "csv" -version = "1.2.1" +version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b015497079b9a9d69c02ad25de6c0a6edef051ea6360a327d0bd05802ef64ad" +checksum = "626ae34994d3d8d668f4269922248239db4ae42d538b14c398b74a52208e8086" dependencies = [ "csv-core", "itoa", @@ -2566,7 +2510,7 @@ dependencies = [ "cc", "codespan-reporting", "once_cell", - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", "scratch", "syn 1.0.109", @@ -2584,7 +2528,7 @@ version = "1.0.82" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a08a6e2fcc370a089ad3b4aaf54db3b1b4cee38ddabce5896b33eb693275f470" dependencies = [ - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", "syn 1.0.109", ] @@ -2617,7 +2561,7 @@ checksum = "859d65a907b6852c9361e3185c862aae7fafd2887876799fa55f5f99dc40d610" dependencies = [ "fnv", "ident_case", - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", "strsim 0.10.0", "syn 1.0.109", @@ -2631,7 +2575,7 @@ checksum = "a784d2ccaf7c98501746bf0be29b2022ba41fd62a2e622af997a03e9f972859f" dependencies = [ "fnv", "ident_case", - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", "strsim 0.10.0", "syn 1.0.109", @@ -2652,84 +2596,38 @@ dependencies = [ name = "darling_macro" version = "0.14.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7618812407e9402654622dd402b0a89dff9ba93badd6540781526117b92aab7e" -dependencies = [ - "darling_core 0.14.2", - "quote 1.0.33", - "syn 1.0.109", -] - -[[package]] -name = "dashmap" -version = "5.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "907076dfda823b0b36d2a1bb5f90c96660a5bbcd7729e10727f07858f22c4edc" -dependencies = [ - "cfg-if", - "hashbrown 0.12.3", - "lock_api", - "once_cell", - "parking_lot_core", -] - -[[package]] -name = "data-encoding" -version = "2.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23d8666cb01533c39dde32bcbab8e227b4ed6679b2c925eba05feabea39508fb" - -[[package]] -name = "data-url" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d7439c3735f405729d52c3fbbe4de140eaf938a1fe47d227c27f8254d4302a5" - -[[package]] -name = "datadog-filter" -version = "0.4.2" -source = "git+ssh://git@github.com/answerbook/vrl.git?rev=v0.6.0#23ae815d2db5b1e4eab4e1450b5b7868f9277ed1" -dependencies = [ - "datadog-search-syntax", - "dyn-clone", - "regex", -] - -[[package]] -name = "datadog-grok" -version = "0.4.2" -source = "git+ssh://git@github.com/answerbook/vrl.git?rev=v0.6.0#23ae815d2db5b1e4eab4e1450b5b7868f9277ed1" -dependencies = [ - "bytes 1.4.0", - "chrono", - "chrono-tz", - "lalrpop", - "lalrpop-util", - "nom", - "once_cell", - "onig", - "ordered-float 3.7.0", - "path", - "peeking_take_while", - "regex", - "serde_json", - "thiserror", - "tracing 0.1.37", - "value", - "vrl-compiler", +checksum = "7618812407e9402654622dd402b0a89dff9ba93badd6540781526117b92aab7e" +dependencies = [ + "darling_core 0.14.2", + "quote 1.0.33", + "syn 1.0.109", ] [[package]] -name = "datadog-search-syntax" -version = "0.4.2" -source = "git+ssh://git@github.com/answerbook/vrl.git?rev=v0.6.0#23ae815d2db5b1e4eab4e1450b5b7868f9277ed1" +name = "dashmap" +version = "5.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "907076dfda823b0b36d2a1bb5f90c96660a5bbcd7729e10727f07858f22c4edc" dependencies = [ - "itertools", + "cfg-if", + "hashbrown 0.12.3", + "lock_api", "once_cell", - "pest", - "pest_derive", - "regex", + "parking_lot_core", ] +[[package]] +name = "data-encoding" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2e66c9d817f1720209181c316d28635c050fa304f9c79e47a520882661b7308" + +[[package]] +name = "data-url" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d7439c3735f405729d52c3fbbe4de140eaf938a1fe47d227c27f8254d4302a5" + [[package]] name = "db-key" version = "0.0.5" @@ -2791,7 +2689,18 @@ version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fcc3dd5e9e9c0b295d6e1e4d811fb6f157d5ffd784b8d202fc62eac8035a770b" dependencies = [ - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", + "quote 1.0.33", + "syn 1.0.109", +] + +[[package]] +name = "derive_arbitrary" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3cdeb9ec472d588e539a818b2dee436825730da08ad0017c4b1a17676bdc8b7" +dependencies = [ + "proc-macro2 1.0.70", "quote 1.0.33", "syn 1.0.109", ] @@ -2803,7 +2712,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321" dependencies = [ "convert_case 0.4.0", - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", "rustc_version 0.4.0", "syn 1.0.109", @@ -2811,9 +2720,9 @@ dependencies = [ [[package]] name = "deunicode" -version = "1.3.3" +version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c1bba4f227a4a53d12b653f50ca7bf10c9119ae2aba56aff9e0338b5c98f36a" +checksum = "3ae2a35373c5c74340b79ae6780b498b2b183915ec5dacf263aac5a099bf485a" [[package]] name = "diff" @@ -2838,9 +2747,9 @@ dependencies = [ [[package]] name = "digest" -version = "0.10.6" +version = "0.10.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8168378f4e5023e7218c89c891c0fd8ecdb5e5e4f18cb78f38cf245dd021e76f" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ "block-buffer 0.10.3", "crypto-common", @@ -2920,14 +2829,14 @@ dependencies = [ [[package]] name = "dns-lookup" -version = "1.0.8" +version = "2.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53ecafc952c4528d9b51a458d1a8904b81783feff9fde08ab6ed2545ff396872" +checksum = "e5766087c2235fec47fafa4cfecc81e494ee679d0fd4a59887ea0919bfb0e4fc" dependencies = [ "cfg-if", "libc", - "socket2 0.4.9", - "winapi", + "socket2 0.5.3", + "windows-sys 0.48.0", ] [[package]] @@ -3061,7 +2970,6 @@ dependencies = [ "arc-swap", "chrono", "dyn-clone", - "vector-common", "vrl", ] @@ -3072,7 +2980,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "21cdad81446a7f7dc43f6a77409efeb9733d2fa65553efef6018ef257c959b73" dependencies = [ "heck 0.4.0", - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", "syn 1.0.109", ] @@ -3084,7 +2992,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c9720bba047d567ffc8a3cba48bf19126600e249ab7f128e9233e6376976a116" dependencies = [ "heck 0.4.0", - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", "syn 1.0.109", ] @@ -3096,7 +3004,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "11f36e95862220b211a6e2aa5eca09b4fa391b13cd52ceb8035a24bf65a79de2" dependencies = [ "once_cell", - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", "syn 1.0.109", ] @@ -3116,9 +3024,9 @@ version = "0.7.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e9a1f9f7d83e59740248a6e14ecf93929ade55027844dfcea78beafccc15745" dependencies = [ - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", - "syn 2.0.31", + "syn 2.0.41", ] [[package]] @@ -3163,6 +3071,12 @@ dependencies = [ "termcolor", ] +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + [[package]] name = "erased-serde" version = "0.3.23" @@ -3185,13 +3099,13 @@ dependencies = [ [[package]] name = "errno" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50d6a0976c999d473fe89ad888d5a284e55366d9dc9038b1ba2aa15128c4afa0" +checksum = "4bcfec3a70f97c962c307b2d2c56e358cf1d00b558d74262b5f929ee8cc7e73a" dependencies = [ "errno-dragonfly", "libc", - "windows-sys 0.45.0", + "windows-sys 0.48.0", ] [[package]] @@ -3244,6 +3158,18 @@ version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "de853764b47027c2e862a995c34978ffa63c1501f2e15f987ba11bd4f9bba193" +[[package]] +name = "extend" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f47da3a72ec598d9c8937a7ebca8962a5c7a1f28444e38c2b33c771ba3f55f05" +dependencies = [ + "proc-macro-error", + "proc-macro2 1.0.70", + "quote 1.0.33", + "syn 1.0.109", +] + [[package]] name = "fakedata" version = "0.1.0" @@ -3293,9 +3219,9 @@ checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" [[package]] name = "faster-hex" -version = "0.8.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9042d281a5eec0f2387f8c3ea6c4514e2cf2732c90a85aaf383b761ee3b290d" +checksum = "239f7bfb930f820ab16a9cd95afc26f88264cf6905c960b340a615384aa3338a" dependencies = [ "serde", ] @@ -3313,7 +3239,7 @@ dependencies = [ name = "file-source" version = "0.1.0" dependencies = [ - "bstr 1.4.0", + "bstr 1.5.0", "bytes 1.4.0", "chrono", "crc", @@ -3322,7 +3248,7 @@ dependencies = [ "flate2", "futures 0.3.28", "glob", - "indexmap", + "indexmap 2.0.2", "libc", "quickcheck", "scan_fmt", @@ -3365,7 +3291,7 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e4c81935e123ab0741c4c4f0d9b8377e5fb21d3de7e062fa4b1263b1fbcba1ea" dependencies = [ - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", "syn 1.0.109", ] @@ -3442,9 +3368,9 @@ checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" [[package]] name = "form_urlencoded" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9c384f161156f5260c24a097c56119f9be8c798586aecc13afbcbe7b7e26bf8" +checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652" dependencies = [ "percent-encoding", ] @@ -3457,9 +3383,9 @@ checksum = "6c2141d6d6c8512188a7891b4b01590a45f6dac67afb4f255c4124dbb86d4eaa" [[package]] name = "fs_extra" -version = "1.2.0" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2022715d62ab30faffd124d40b76f4134a550a87792276512b18d63272333394" +checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" [[package]] name = "fsevent-sys" @@ -3561,9 +3487,9 @@ version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" dependencies = [ - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", - "syn 2.0.31", + "syn 2.0.41", ] [[package]] @@ -3627,9 +3553,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.9" +version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c85e1d9ab2eadba7e5040d4e09cbd6d072b76a557ad64e797c2cb9d4da21d7e4" +checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" dependencies = [ "cfg-if", "js-sys", @@ -3644,11 +3570,17 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eb19fe8de3ea0920d282f7b77dd4227aea6b8b999b42cdf0ca41b2472b14443a" dependencies = [ - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", "syn 1.0.109", ] +[[package]] +name = "gimli" +version = "0.28.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" + [[package]] name = "glob" version = "0.3.1" @@ -3657,9 +3589,9 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "gloo-utils" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8e8fc851e9c7b9852508bc6e3f690f452f474417e8545ec9857b7f7377036b5" +checksum = "037fcb07216cb3a30f7292bd0176b050b7b9a052ba830ef7d5d65f6dc64ba58e" dependencies = [ "js-sys", "serde", @@ -3725,9 +3657,9 @@ dependencies = [ [[package]] name = "graphql_client" -version = "0.12.0" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa61bb9dc6d373a8b465a5da17b62809483e8527a34b0e9034dc0915b09e160a" +checksum = "09cdf7b487d864c2939b23902291a5041bc4a84418268f25fda1c8d4e15ad8fa" dependencies = [ "graphql_query_derive", "serde", @@ -3736,15 +3668,15 @@ dependencies = [ [[package]] name = "graphql_client_codegen" -version = "0.12.0" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e55df64cc702c4ad6647f8df13a799ad11688a3781fadf5045f7ba12733fa9b" +checksum = "a40f793251171991c4eb75bd84bc640afa8b68ff6907bc89d3b712a22f700506" dependencies = [ "graphql-introspection-query", "graphql-parser", "heck 0.4.0", "lazy_static", - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", "serde", "serde_json", @@ -3753,12 +3685,12 @@ dependencies = [ [[package]] name = "graphql_query_derive" -version = "0.12.0" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d52fc9cde811f44b15ec0692b31e56a3067f6f431c5ace712f286e47c1dacc98" +checksum = "00bda454f3d313f909298f626115092d348bc231025699f557b27e248475f48c" dependencies = [ "graphql_client_codegen", - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "syn 1.0.109", ] @@ -3774,9 +3706,9 @@ dependencies = [ [[package]] name = "h2" -version = "0.3.19" +version = "0.3.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d357c7ae988e7d2182f7d7871d0b963962420b0678b0997ce7de72001aeab782" +checksum = "4d6250322ef6e60f93f9a2162799302cd6f68f79f6e5d85c8c16f14d1d958178" dependencies = [ "bytes 1.4.0", "fnv", @@ -3784,7 +3716,7 @@ dependencies = [ "futures-sink", "futures-util", "http", - "indexmap", + "indexmap 2.0.2", "slab", "tokio", "tokio-util", @@ -3818,17 +3750,16 @@ version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" dependencies = [ - "ahash 0.8.2", + "ahash 0.8.6", ] [[package]] -name = "hashlink" -version = "0.8.2" +name = "hashbrown" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0761a1b9491c4f2e3d66aa0f62d0fba0af9a0e2852e4d48ea506632a4b56e6aa" +checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" dependencies = [ - "hashbrown 0.13.2", - "serde", + "ahash 0.8.6", ] [[package]] @@ -4046,7 +3977,7 @@ version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" dependencies = [ - "digest 0.10.6", + "digest 0.10.7", ] [[package]] @@ -4123,9 +4054,9 @@ checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421" [[package]] name = "httptest" -version = "0.15.4" +version = "0.15.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6f25cfb6def593d43fae1ead24861f217e93bc70768a45cc149a69b5f049df4" +checksum = "c8b44a11846bda8c9fe9194f9924db7132c34635c7ce020f180f6c5d46d2308f" dependencies = [ "bstr 0.2.17", "bytes 1.4.0", @@ -4149,7 +4080,7 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df004cfca50ef23c36850aaaa59ad52cc70d0e90243c3c7737a4dd32dc7a3c4f" dependencies = [ - "quick-error 1.2.3", + "quick-error", ] [[package]] @@ -4160,9 +4091,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "hyper" -version = "0.14.26" +version = "0.14.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab302d72a6f11a3b910431ff93aae7e773078c769f0a3ef15fb9ec692ed147d4" +checksum = "ffb1cfd654a8219eaef89881fdb3bb3b1cdc5fa75ded05d6933b2b382e395468" dependencies = [ "bytes 1.4.0", "futures-channel", @@ -4232,6 +4163,19 @@ dependencies = [ "tokio-rustls 0.23.4", ] +[[package]] +name = "hyper-rustls" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0646026eb1b3eea4cd9ba47912ea5ce9cc07713d105b1a14698f4e6433d348b7" +dependencies = [ + "http", + "hyper", + "rustls 0.21.0", + "tokio", + "tokio-rustls 0.24.0", +] + [[package]] name = "hyper-timeout" version = "0.4.1" @@ -4313,9 +4257,9 @@ dependencies = [ [[package]] name = "idna" -version = "0.3.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e14ddfc70884202db2244c223200c204c2bda1bc6e0998d11b5e024d657209e6" +checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" dependencies = [ "unicode-bidi", "unicode-normalization", @@ -4338,15 +4282,27 @@ dependencies = [ "serde", ] +[[package]] +name = "indexmap" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8adf3ddd720272c6ea8bf59463c04e0f93d0bbf7c5439b691bca2987e0270897" +dependencies = [ + "equivalent", + "hashbrown 0.14.3", + "serde", +] + [[package]] name = "indicatif" -version = "0.17.3" +version = "0.17.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cef509aa9bc73864d6756f0d34d35504af3cf0844373afe9b8669a5b8005a729" +checksum = "8ff8cc23a7393a397ed1d7f56e6365cba772aba9f9912ab968b03043c395d057" dependencies = [ "console", + "instant", "number_prefix", - "portable-atomic 0.3.15", + "portable-atomic", "unicode-segmentation", "unicode-width", ] @@ -4365,9 +4321,9 @@ checksum = "64e9829a50b42bb782c1df523f78d332fe371b10c661e78b7a3c34b0198e9fac" [[package]] name = "infer" -version = "0.13.0" +version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f551f8c3a39f68f986517db0d1759de85881894fdc7db798bd2a9df9cb04b7fc" +checksum = "bbb78f4c4a058ef30a9ff77322e758f7e60f871274b602d7fdc1b0956b0cb88e" [[package]] name = "inotify" @@ -4425,12 +4381,13 @@ checksum = "59ce5ef949d49ee85593fc4d3f3f95ad61657076395cbbce23e2121fc5542074" [[package]] name = "io-lifetimes" -version = "1.0.3" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46112a93252b123d31a119a8d1a1ac19deac4fac6e0e8b0df58f0d4e5870e63c" +checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2" dependencies = [ + "hermit-abi 0.3.1", "libc", - "windows-sys 0.42.0", + "windows-sys 0.48.0", ] [[package]] @@ -4476,8 +4433,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adcf93614601c8129ddf72e2d5633df827ba6551541c6d8c59520a371475be1f" dependencies = [ "hermit-abi 0.3.1", - "io-lifetimes 1.0.3", - "rustix 0.37.5", + "io-lifetimes 1.0.11", + "rustix 0.37.19", "windows-sys 0.48.0", ] @@ -4496,6 +4453,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.6" @@ -4608,7 +4574,7 @@ version = "0.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cd990069640f9db34b3b0f7a1afc62a05ffaa3be9b66aa3c313f58346df7f788" dependencies = [ - "base64 0.21.0", + "base64 0.21.2", "bytes 1.4.0", "chrono", "http", @@ -4706,7 +4672,7 @@ dependencies = [ "secrecy", "serde", "serde_json", - "serde_yaml 0.9.21", + "serde_yaml 0.9.27", "thiserror", "tokio", "tokio-util", @@ -4738,7 +4704,7 @@ version = "0.82.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed8442b2f1d6c1d630677ade9e5d5ebe793dec099a75fb582d56d77b8eb8cee8" dependencies = [ - "ahash 0.8.2", + "ahash 0.8.6", "async-trait", "backoff", "derivative", @@ -4759,20 +4725,20 @@ dependencies = [ [[package]] name = "lalrpop" -version = "0.19.8" +version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b30455341b0e18f276fa64540aff54deafb54c589de6aca68659c63dd2d5d823" +checksum = "da4081d44f4611b66c6dd725e6de3169f9f63905421e8626fcb86b6a898998b8" dependencies = [ "ascii-canvas", - "atty", "bit-set", "diff", "ena", - "itertools", + "is-terminal", + "itertools 0.10.5", "lalrpop-util", "petgraph", "regex", - "regex-syntax 0.6.29", + "regex-syntax 0.7.2", "string_cache", "term", "tiny-keccak", @@ -4781,15 +4747,15 @@ dependencies = [ [[package]] name = "lalrpop-util" -version = "0.19.9" +version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5c1f7869c94d214466c5fd432dfed12c379fd87786768d36455892d46b18edd" +checksum = "3f35c735096c0293d313e8f2a641627472b83d01b937177fe76e5e2708d31e0d" [[package]] name = "lapin" -version = "2.1.1" +version = "2.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd03ea5831b44775e296239a64851e2fd14a80a363d202ba147009ffc994ff0f" +checksum = "acc13beaa09eed710f406201f46b961345b4d061dd90ec3d3ccc70721e70342a" dependencies = [ "amq-protocol", "async-global-executor-trait", @@ -4815,9 +4781,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.144" +version = "0.2.151" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b00cc1c228a6782d0f076e7b232802e0c5689d41bb5df366f2a6b6621cfdfe1" +checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4" [[package]] name = "libflate" @@ -4889,9 +4855,9 @@ checksum = "d4d2456c373231a208ad294c33dc5bff30051eafd954cd4caae83a712b12854d" [[package]] name = "linux-raw-sys" -version = "0.3.0" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd550e73688e6d578f0ac2119e32b797a327631a42f9433e59d02e139c8df60d" +checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519" [[package]] name = "listenfd" @@ -4922,12 +4888,9 @@ checksum = "8166fbddef141acbea89cf3425ed97d4c22d14a68161977fc01c301175a4fb89" [[package]] name = "log" -version = "0.4.17" +version = "0.4.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" -dependencies = [ - "cfg-if", -] +checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4" [[package]] name = "logfmt" @@ -4949,9 +4912,9 @@ dependencies = [ [[package]] name = "lru" -version = "0.10.0" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03f1160296536f10c833a82dca22267d5486734230d47bf00bf435885814ba1e" +checksum = "718e8fae447df0c7e1ba7f5189829e63fd536945c8988d61444c19039f16b670" [[package]] name = "lru-cache" @@ -5045,7 +5008,7 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" dependencies = [ - "regex-automata", + "regex-automata 0.1.10", ] [[package]] @@ -5087,7 +5050,7 @@ version = "0.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6365506850d44bff6e2fbcb5176cf63650e48bd45ef2fe2665ae1570e0f4b9ca" dependencies = [ - "digest 0.10.6", + "digest 0.10.7", ] [[package]] @@ -5098,15 +5061,15 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" [[package]] name = "memchr" -version = "2.5.0" +version = "2.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167" [[package]] name = "memmap2" -version = "0.6.1" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0aa1b505aeecb0adb017db2b6a79a17a38e64f882a201f05e9de8a982cd6096" +checksum = "f49388d20533534cd19360ad3d6a7dadc885944aa802ba3995040c5ec11288c6" dependencies = [ "libc", ] @@ -5135,9 +5098,9 @@ version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "aa8ebbd1a9e57bbab77b9facae7f5136aea44c356943bf9a198f647da64285d6" dependencies = [ - "ahash 0.8.2", + "ahash 0.8.6", "metrics-macros", - "portable-atomic 1.3.1", + "portable-atomic", ] [[package]] @@ -5146,9 +5109,9 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ddece26afd34c31585c74a4db0630c376df271c285d682d1e55012197830b6df" dependencies = [ - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", - "syn 2.0.31", + "syn 2.0.41", ] [[package]] @@ -5177,7 +5140,7 @@ dependencies = [ "crossbeam-epoch", "crossbeam-utils", "hashbrown 0.13.2", - "indexmap", + "indexmap 1.9.3", "metrics", "num_cpus", "ordered-float 3.7.0", @@ -5188,9 +5151,9 @@ dependencies = [ [[package]] name = "mime" -version = "0.3.16" +version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a60c7ce501c71e03a9c9c0d35b861413ae925bd979cc7a4e30d060069aaac8d" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" [[package]] name = "mime_guess" @@ -5219,14 +5182,14 @@ dependencies = [ [[package]] name = "mio" -version = "0.8.5" +version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5d732bc30207a6423068df043e3d02e0735b155ad7ce1a6f76fe2baa5b158de" +checksum = "8f3d0b296e374a4e6f3c7b0a1f5a51d748a0d34c85e7dc48fc3fa9a87657fe09" dependencies = [ "libc", "log", "wasi 0.11.0+wasi-snapshot-preview1", - "windows-sys 0.42.0", + "windows-sys 0.48.0", ] [[package]] @@ -5247,9 +5210,9 @@ dependencies = [ [[package]] name = "mock_instant" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c734e0ceadb79b49feb5a39b038035c7881bfd163e999916dc79b57f4996b6f" +checksum = "6c1a54de846c4006b88b1516731cc1f6026eb5dc4bcb186aa071ef66d40524ec" [[package]] name = "mockall" @@ -5273,7 +5236,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "22ce75669015c4f47b289fd4d4f56e894e4c96003ffdf3ac51313126f94c6cbb" dependencies = [ "cfg-if", - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", "syn 1.0.109", ] @@ -5302,9 +5265,9 @@ dependencies = [ [[package]] name = "mongodb" -version = "2.5.0" +version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebe15399de63ad4294c80069967736cbb87ebe467a8cd0629df9cab88a6fbde6" +checksum = "16928502631c0db72214720aa479c722397fe5aed6bf1c740a3830b3fe4bfcfe" dependencies = [ "async-trait", "base64 0.13.1", @@ -5331,7 +5294,7 @@ dependencies = [ "serde_bytes", "serde_with 1.14.0", "sha-1", - "sha2 0.10.6", + "sha2 0.10.7", "socket2 0.4.9", "stringprep", "strsim 0.10.0", @@ -5411,7 +5374,7 @@ dependencies = [ "once_cell", "parking_lot", "regex", - "ring 0.16.20", + "ring", "rustls 0.19.1", "rustls-native-certs 0.5.0", "rustls-pemfile 0.2.1", @@ -5444,8 +5407,8 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af5a8477ac96877b5bd1fd67e0c28736c12943aba24eda92b127e036b0c8f400" dependencies = [ - "indexmap", - "itertools", + "indexmap 1.9.3", + "itertools 0.10.5", "ndarray", "noisy_float", "num-integer", @@ -5508,7 +5471,7 @@ dependencies = [ "byteorder", "data-encoding", "ed25519-dalek", - "getrandom 0.2.9", + "getrandom 0.2.10", "log", "rand 0.8.5", "signatory", @@ -5523,7 +5486,7 @@ dependencies = [ "byteorder", "data-encoding", "ed25519-dalek", - "getrandom 0.2.9", + "getrandom 0.2.10", "log", "rand 0.8.5", "signatory", @@ -5578,9 +5541,9 @@ checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be" [[package]] name = "notify" -version = "6.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d9ba6c734de18ca27c8cef5cd7058aa4ac9f63596131e4c7e41e579319032a2" +checksum = "5738a2795d57ea20abec2d6d76c6081186709c0024187cd5977265eda6598b51" dependencies = [ "bitflags 1.3.2", "filetime", @@ -5755,7 +5718,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dcbff9bc912032c62bf65ef1d5aea88983b420f4f839db1e9b0c281a25c9c799" dependencies = [ "proc-macro-crate 1.2.1", - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", "syn 1.0.109", ] @@ -5767,9 +5730,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "96667db765a921f7b295ffee8b60472b686a51d4f21c2ee4ffdb94c7013b65a6" dependencies = [ "proc-macro-crate 1.2.1", - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", - "syn 2.0.31", + "syn 2.0.41", ] [[package]] @@ -5789,20 +5752,20 @@ checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" [[package]] name = "oauth2" -version = "4.3.0" +version = "4.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eeaf26a72311c087f8c5ba617c96fac67a5c04f430e716ac8d8ab2de62e23368" +checksum = "c38841cdd844847e3e7c8d29cef9dcfed8877f8f56f9071f77843ecf3baf937f" dependencies = [ "base64 0.13.1", "chrono", - "getrandom 0.2.9", + "getrandom 0.2.10", "http", "rand 0.8.5", "reqwest", "serde", "serde_json", "serde_path_to_error", - "sha2 0.10.6", + "sha2 0.10.7", "thiserror", "url", ] @@ -5816,6 +5779,15 @@ dependencies = [ "malloc_buf", ] +[[package]] +name = "object" +version = "0.32.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cf5f9dd3933bd50a9e1f149ec995f39ae2c496d31fd772c1fd45ebc27e902b0" +dependencies = [ + "memchr", +] + [[package]] name = "ofb" version = "0.6.1" @@ -5827,9 +5799,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.17.1" +version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" [[package]] name = "onig" @@ -5867,15 +5839,15 @@ checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5" [[package]] name = "opendal" -version = "0.34.0" +version = "0.38.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "005c877c4f788a7749825bbc61031ccc950217fac6bcf9965641fbf1bdf991b0" +checksum = "4df645b6012162c04c8949e9b96ae2ef002e79189cfb154e507e51ac5be76a09" dependencies = [ "anyhow", "async-compat", "async-trait", "backon", - "base64 0.21.0", + "base64 0.21.2", "bytes 1.4.0", "chrono", "flagset", @@ -5898,35 +5870,38 @@ dependencies = [ [[package]] name = "openidconnect" -version = "2.4.0" +version = "2.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87af7097640fedbe64718ac1c9b0549d72da747a3f527cd089215f96c6f691d5" +checksum = "98dd5b7049bac4fdd2233b8c9767d42c05da8006fdb79cc903258556d2b18009" dependencies = [ "base64 0.13.1", "chrono", "http", - "itertools", + "itertools 0.10.5", "log", "num-bigint", "oauth2", "rand 0.8.5", - "ring 0.16.20", + "ring", "serde", "serde-value", "serde_derive", "serde_json", "serde_path_to_error", + "serde_plain", + "serde_with 1.14.0", + "subtle", "thiserror", "url", ] [[package]] name = "openssl" -version = "0.10.57" +version = "0.10.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bac25ee399abb46215765b1cb35bc0212377e58a061560d8b29b024fd0430e7c" +checksum = "345df152bc43501c5eb9e4654ff05f794effb78d4efe3d53abc158baddc0703d" dependencies = [ - "bitflags 2.4.0", + "bitflags 1.3.2", "cfg-if", "foreign-types", "libc", @@ -5941,7 +5916,7 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b501e44f11665960c7e7fcf062c7d96a14ade4aa98116c004b2e37b5be7d736c" dependencies = [ - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", "syn 1.0.109", ] @@ -5961,24 +5936,15 @@ dependencies = [ "cc", ] -[[package]] -name = "openssl-src" -version = "300.1.3+3.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd2c101a165fff9935e34def4669595ab1c7847943c42be86e21503e482be107" -dependencies = [ - "cc", -] - [[package]] name = "openssl-sys" -version = "0.9.93" +version = "0.9.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db4d56a4c0478783083cfafcc42493dd4a981d41669da64b4572a2a089b51b1d" +checksum = "374533b0e45f3a7ced10fcaeccca020e66656bc03dac384f852e4e5a7a8104a6" dependencies = [ "cc", "libc", - "openssl-src 300.1.3+3.1.2", + "openssl-src", "pkg-config", "vcpkg", ] @@ -6058,17 +6024,11 @@ dependencies = [ "winapi", ] -[[package]] -name = "os_str_bytes" -version = "6.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b7820b9daea5457c9f21c69448905d723fbd21136ccf521748f23fd49e723ee" - [[package]] name = "outref" -version = "0.5.1" +version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4030760ffd992bef45b0ae3f10ce1aba99e33464c90d14dd7c039884963ddc7a" +checksum = "7f222829ae9293e33a9f5e9f440c6760a3d450a64affe1846486b140db81c1f4" [[package]] name = "overload" @@ -6138,17 +6098,6 @@ version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f746c4065a8fa3fe23974dd82f15431cc8d40779821001404d10d2e79ca7d79" -[[package]] -name = "path" -version = "0.4.2" -source = "git+ssh://git@github.com/answerbook/vrl.git?rev=v0.6.0#23ae815d2db5b1e4eab4e1450b5b7868f9277ed1" -dependencies = [ - "once_cell", - "regex", - "serde", - "snafu", -] - [[package]] name = "pb-rs" version = "0.10.0" @@ -6167,7 +6116,7 @@ version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "83a0692ec44e4cf1ef28ca317f14f8f07da2d95ec3fa01f86e4467b725e60917" dependencies = [ - "digest 0.10.6", + "digest 0.10.7", ] [[package]] @@ -6196,9 +6145,9 @@ dependencies = [ [[package]] name = "percent-encoding" -version = "2.2.0" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e" +checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" [[package]] name = "pest" @@ -6228,7 +6177,7 @@ checksum = "75a1ef20bf3193c15ac345acb32e26b3dc3223aff4d77ae4fc5359567683796b" dependencies = [ "pest", "pest_meta", - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", "syn 1.0.109", ] @@ -6241,7 +6190,7 @@ checksum = "5e3b284b1f13a20dc5ebc90aff59a51b8d7137c221131b52a7260c08cbc1cc80" dependencies = [ "once_cell", "pest", - "sha2 0.10.6", + "sha2 0.10.7", ] [[package]] @@ -6251,7 +6200,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6d5014253a1331579ce62aa67443b4a658c5e7dd03d4bc6d302b94474888143" dependencies = [ "fixedbitset", - "indexmap", + "indexmap 1.9.3", ] [[package]] @@ -6303,29 +6252,29 @@ dependencies = [ [[package]] name = "pin-project" -version = "1.1.0" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c95a7476719eab1e366eaf73d0260af3021184f18177925b07f54b30089ceead" +checksum = "fda4ed1c6c173e3fc7a83629421152e01d7b1f9b7f65fb301e490e8cfc656422" dependencies = [ "pin-project-internal", ] [[package]] name = "pin-project-internal" -version = "1.1.0" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39407670928234ebc5e6e580247dd567ad73a3578460c5990f9503df207e8f07" +checksum = "4359fd9c9171ec6e8c62926d6faaf553a8dc3f64e1507e76da7911b4f6a04405" dependencies = [ - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", - "syn 2.0.31", + "syn 2.0.41", ] [[package]] name = "pin-project-lite" -version = "0.2.9" +version = "0.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116" +checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58" [[package]] name = "pin-utils" @@ -6411,12 +6360,6 @@ dependencies = [ "windows-sys 0.42.0", ] -[[package]] -name = "portable-atomic" -version = "0.3.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15eb2c6e362923af47e13c23ca5afb859e83d54452c55b0b9ac763b8f7c1ac16" - [[package]] name = "portable-atomic" version = "1.3.1" @@ -6457,7 +6400,7 @@ dependencies = [ "md-5", "memchr", "rand 0.8.5", - "sha2 0.10.6", + "sha2 0.10.7", "stringprep", ] @@ -6496,7 +6439,7 @@ checksum = "59230a63c37f3e18569bdb90e4a89cbf5bf8b06fea0b84e65ea10cc4df47addd" dependencies = [ "difflib", "float-cmp", - "itertools", + "itertools 0.10.5", "normalize-line-endings", "predicates-core", "regex", @@ -6510,7 +6453,7 @@ checksum = "1ba7d6ead3e3966038f68caa9fc1f860185d95a793180bbcfe0d0da47b3961ed" dependencies = [ "anstyle 0.3.1", "difflib", - "itertools", + "itertools 0.10.5", "predicates-core", ] @@ -6558,7 +6501,7 @@ version = "0.1.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c142c0e46b57171fe0c528bee8c5b7569e80f0c17e377cd0e30ea57dbc11bb51" dependencies = [ - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "syn 1.0.109", ] @@ -6603,7 +6546,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" dependencies = [ "proc-macro-error-attr", - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", "syn 1.0.109", "version_check", @@ -6615,7 +6558,7 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" dependencies = [ - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", "version_check", ] @@ -6643,9 +6586,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.66" +version = "1.0.70" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" +checksum = "39278fbbf5fb4f646ce651690877f89d1c5811a3d4acb27700c1cb3cdb78fd3b" dependencies = [ "unicode-ident", ] @@ -6654,15 +6597,15 @@ dependencies = [ name = "prometheus-parser" version = "0.1.0" dependencies = [ - "indexmap", + "indexmap 2.0.2", "nom", "num_enum 0.6.1", "prost", "prost-build", "prost-types", "snafu", - "value", "vector-common", + "vrl", ] [[package]] @@ -6680,16 +6623,15 @@ dependencies = [ [[package]] name = "proptest" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29f1b898011ce9595050a68e60f90bad083ff2987a695a42357134c8381fba70" +checksum = "4e35c06b98bf36aba164cc17cb25f7e232f5c4aeea73baa14b8a9f0d92dbfa65" dependencies = [ "bit-set", "bitflags 1.3.2", "byteorder", "lazy_static", "num-traits", - "quick-error 2.0.1", "rand 0.8.5", "rand_chacha 0.3.1", "rand_xorshift", @@ -6717,7 +6659,7 @@ checksum = "119533552c9a7ffacc21e099c24a0ac8bb19c2a2a3f363de84cd9b844feab270" dependencies = [ "bytes 1.4.0", "heck 0.4.0", - "itertools", + "itertools 0.10.5", "lazy_static", "log", "multimap", @@ -6738,12 +6680,23 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5d2d8d10f3c6ded6da8b05b5fb3b8a5082514344d56c9f871412d29b4e075b4" dependencies = [ "anyhow", - "itertools", - "proc-macro2 1.0.66", + "itertools 0.10.5", + "proc-macro2 1.0.70", "quote 1.0.33", "syn 1.0.109", ] +[[package]] +name = "prost-reflect" +version = "0.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b823de344848e011658ac981009100818b322421676740546f8b52ed5249428" +dependencies = [ + "once_cell", + "prost", + "prost-types", +] + [[package]] name = "prost-types" version = "0.11.9" @@ -6768,7 +6721,7 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "16b845dbfca988fa33db069c0e230574d15a3088f147a87b64c7589eb662c9ac" dependencies = [ - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", "syn 1.0.109", ] @@ -6786,9 +6739,9 @@ dependencies = [ [[package]] name = "pulsar" -version = "5.1.1" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20f237570b5665b38c7d5228f9a1d2990e369c00e635704528996bcd5219f540" +checksum = "e6eb95b2e36b92d3e0536be87eaf7accb17db39f5a44452759b43f1328e82dc9" dependencies = [ "async-trait", "bit-vec 0.6.3", @@ -6831,9 +6784,9 @@ checksum = "658fa1faf7a4cc5f057c9ee5ef560f717ad9d8dc66d975267f709624d6e1ab88" [[package]] name = "quanta" -version = "0.11.0" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8cc73c42f9314c4bdce450c77e6f09ecbddefbeddb1b5979ded332a3913ded33" +checksum = "a17e662a7a8291a865152364c20c7abc5e60486ab2001e8ec10b24862de0b9ab" dependencies = [ "crossbeam-utils", "libc", @@ -6851,12 +6804,6 @@ version = "1.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" -[[package]] -name = "quick-error" -version = "2.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3" - [[package]] name = "quick-protobuf" version = "0.8.1" @@ -6893,7 +6840,7 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b22a693222d716a9587786f37ac3f6b4faedb5b80c23914e7303ff5a1d8016e9" dependencies = [ - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", "syn 1.0.109", ] @@ -6913,14 +6860,14 @@ version = "1.0.33" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" dependencies = [ - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", ] [[package]] name = "quoted_printable" -version = "0.4.7" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a24039f627d8285853cc90dcddf8c1ebfaa91f834566948872b225b9a28ed1b6" +checksum = "79ec282e887b434b68c18fe5c121d38e72a5cf35119b59e54ec5b992ea9c8eb0" [[package]] name = "radium" @@ -6997,7 +6944,7 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom 0.2.9", + "getrandom 0.2.10", ] [[package]] @@ -7077,9 +7024,9 @@ dependencies = [ [[package]] name = "rdkafka" -version = "0.34.0" +version = "0.32.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "053adfa02fab06e86c01d586cc68aa47ee0ff4489a59469081dc12cbcde578bf" +checksum = "f8733bc5dc0b192d1a4b28073f9bff1326ad9e4fecd4d9b025d6fc358d1c3e79" dependencies = [ "futures-channel", "futures-util", @@ -7095,9 +7042,9 @@ dependencies = [ [[package]] name = "rdkafka-sys" -version = "4.6.0+2.2.0" +version = "4.5.0+1.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad63c279fca41a27c231c450a2d2ad18288032e9cbb159ad16c9d96eba35aaaf" +checksum = "1bb0676c2112342ac7165decdedbc4e7086c0af384479ccce534546b10687a5d" dependencies = [ "cmake", "libc", @@ -7167,20 +7114,21 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" dependencies = [ - "getrandom 0.2.9", + "getrandom 0.2.10", "redox_syscall 0.2.16", "thiserror", ] [[package]] name = "regex" -version = "1.8.1" +version = "1.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af83e617f331cc6ae2da5443c602dfa5af81e517212d9d611a5b3ba1777b5370" +checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343" dependencies = [ "aho-corasick 1.0.1", "memchr", - "regex-syntax 0.7.1", + "regex-automata 0.4.3", + "regex-syntax 0.8.2", ] [[package]] @@ -7192,6 +7140,17 @@ dependencies = [ "regex-syntax 0.6.29", ] +[[package]] +name = "regex-automata" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" +dependencies = [ + "aho-corasick 1.0.1", + "memchr", + "regex-syntax 0.8.2", +] + [[package]] name = "regex-syntax" version = "0.6.29" @@ -7200,9 +7159,15 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" [[package]] name = "regex-syntax" -version = "0.7.1" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5996294f19bd3aae0453a862ad728f60e6600695733dd5df01da90c54363a3c" +checksum = "436b050e76ed2903236f032a59761c1eb99e1b0aead2c257922771dab1fc8c78" + +[[package]] +name = "regex-syntax" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" [[package]] name = "rend" @@ -7215,11 +7180,11 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.11.17" +version = "0.11.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13293b639a097af28fc8a90f22add145a9c954e49d77da06263d58cf44d5fb91" +checksum = "cde824a14b7c14f85caff81225f411faacc04a2013f41670f41443742b1c1c55" dependencies = [ - "base64 0.21.0", + "base64 0.21.2", "bytes 1.4.0", "encoding_rs", "futures-core", @@ -7228,7 +7193,7 @@ dependencies = [ "http", "http-body", "hyper", - "hyper-rustls", + "hyper-rustls 0.24.0", "hyper-tls", "ipnet", "js-sys", @@ -7238,14 +7203,14 @@ dependencies = [ "once_cell", "percent-encoding", "pin-project-lite", - "rustls 0.20.7", + "rustls 0.21.0", "rustls-pemfile 1.0.1", "serde", "serde_json", "serde_urlencoded", "tokio", "tokio-native-tls", - "tokio-rustls 0.23.4", + "tokio-rustls 0.24.0", "tokio-util", "tower-service", "url", @@ -7264,7 +7229,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "52e44394d2086d010551b14b53b1f24e31647570cd1deb0379e2c21b329aba00" dependencies = [ "hostname", - "quick-error 1.2.3", + "quick-error", ] [[package]] @@ -7283,25 +7248,11 @@ dependencies = [ "libc", "once_cell", "spin 0.5.2", - "untrusted 0.7.1", + "untrusted", "web-sys", "winapi", ] -[[package]] -name = "ring" -version = "0.17.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9babe80d5c16becf6594aa32ad2be8fe08498e7ae60b77de8df700e67f191d7e" -dependencies = [ - "cc", - "getrandom 0.2.9", - "libc", - "spin 0.9.4", - "untrusted 0.9.0", - "windows-sys 0.48.0", -] - [[package]] name = "rkyv" version = "0.7.40" @@ -7322,7 +7273,7 @@ version = "0.7.40" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ff26ed6c7c4dfc2aa9480b86a60e3c7233543a270a680e10758a507c5a4ce476" dependencies = [ - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", "syn 1.0.109", ] @@ -7378,6 +7329,15 @@ dependencies = [ "retain_mut", ] +[[package]] +name = "roxmltree" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "921904a62e410e37e215c40381b7117f830d9d89ba60ab5236170541dd25646b" +dependencies = [ + "xmlparser", +] + [[package]] name = "roxmltree" version = "0.18.0" @@ -7405,6 +7365,12 @@ dependencies = [ "serde_json", ] +[[package]] +name = "rustc-demangle" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" + [[package]] name = "rustc-hash" version = "1.1.0" @@ -7455,16 +7421,16 @@ dependencies = [ [[package]] name = "rustix" -version = "0.37.5" +version = "0.37.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e78cc525325c06b4a7ff02db283472f3c042b7ff0c391f96c6d5ac6f4f91b75" +checksum = "acf8729d8542766f1b2cf77eb034d52f40d375bb8b615d0b147089946e16613d" dependencies = [ "bitflags 1.3.2", - "errno 0.3.0", - "io-lifetimes 1.0.3", + "errno 0.3.1", + "io-lifetimes 1.0.11", "libc", - "linux-raw-sys 0.3.0", - "windows-sys 0.45.0", + "linux-raw-sys 0.3.8", + "windows-sys 0.48.0", ] [[package]] @@ -7475,7 +7441,7 @@ checksum = "35edb675feee39aec9c99fa5ff985081995a06d594114ae14cbe797ad7b7a6d7" dependencies = [ "base64 0.13.1", "log", - "ring 0.16.20", + "ring", "sct 0.6.1", "webpki 0.21.4", ] @@ -7487,19 +7453,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "539a2bfe908f471bfa933876bd1eb6a19cf2176d375f82ef7f99530a40e48c2c" dependencies = [ "log", - "ring 0.16.20", + "ring", "sct 0.7.0", "webpki 0.22.0", ] [[package]] name = "rustls" -version = "0.21.9" +version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "629648aced5775d558af50b2b4c7b02983a04b312126d45eeead26e7caa498b9" +checksum = "07180898a28ed6a7f7ba2311594308f595e3dd2e3c3812fa0a80a47b45f17e5d" dependencies = [ "log", - "ring 0.17.3", + "ring", "rustls-webpki", "sct 0.7.0", ] @@ -7548,12 +7514,12 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.101.7" +version = "0.100.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" +checksum = "d6207cd5ed3d8dca7816f8f3725513a34609c0c765bf652b8c3cb4cfd87db46b" dependencies = [ - "ring 0.17.3", - "untrusted 0.9.0", + "ring", + "untrusted", ] [[package]] @@ -7569,18 +7535,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cb3dcc6e454c328bb824492db107ab7c0ae8fcffe4ad210136ef014458c1bc4f" dependencies = [ "fnv", - "quick-error 1.2.3", + "quick-error", "tempfile", "wait-timeout", ] [[package]] name = "rustyline" -version = "11.0.0" +version = "12.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dfc8644681285d1fb67a467fb3021bfea306b99b4146b166a1fe3ada965eece" +checksum = "994eca4bca05c87e86e15d90fc7a91d1be64b4482b38cb2d27474568fe7c9db9" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.4.1", "cfg-if", "clipboard-win", "libc", @@ -7673,8 +7639,8 @@ version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b362b83898e0e69f38515b82ee15aa80636befe47c3b6d3d89a911e78fc228ce" dependencies = [ - "ring 0.16.20", - "untrusted 0.7.1", + "ring", + "untrusted", ] [[package]] @@ -7683,8 +7649,8 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d53dcdb7c9f8158937a7981b48accfd39a43af418591a5d008c7b22b5e1b7ca4" dependencies = [ - "ring 0.16.20", - "untrusted 0.7.1", + "ring", + "untrusted", ] [[package]] @@ -7705,9 +7671,9 @@ dependencies = [ [[package]] name = "security-framework" -version = "2.9.0" +version = "2.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca2855b3715770894e67cbfa3df957790aa0c9edc3bf06efa1a84d77fa0839d1" +checksum = "1fc758eb7bffce5b308734e9b0c1468893cae9ff70ebf13e7090be8dcbcc83a8" dependencies = [ "bitflags 1.3.2", "core-foundation", @@ -7752,9 +7718,9 @@ checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" [[package]] name = "serde" -version = "1.0.163" +version = "1.0.193" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2113ab51b87a539ae008b5c6c02dc020ffa39afd2d83cffcb3f4eb2722cebec2" +checksum = "25dd9975e68d0cb5aa1120c288333fc98731bd1dd12f561e468ea4728c042b89" dependencies = [ "serde_derive", ] @@ -7765,7 +7731,7 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a78072b550e5c20bc4a9d1384be28809cbdb7b25b2b4707ddc6d908b7e6de3bf" dependencies = [ - "toml 0.7.3", + "toml 0.7.8", ] [[package]] @@ -7812,13 +7778,13 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.163" +version = "1.0.193" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c805777e3930c8883389c602315a24224bcc738b63905ef87cd1420353ea93e" +checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3" dependencies = [ - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", - "syn 2.0.31", + "syn 2.0.41", ] [[package]] @@ -7827,18 +7793,18 @@ version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "85bf8229e7920a9f636479437026331ce11aa132b4dde37d121944a44d6e5f3c" dependencies = [ - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", "syn 1.0.109", ] [[package]] name = "serde_json" -version = "1.0.96" +version = "1.0.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "057d394a50403bcac12672b2b18fb387ab6d289d957dab67dd201875391e52f1" +checksum = "3d1c7e3eac408d115102c4c24ad393e0821bb3a5df4d506a80f85f7a742a526b" dependencies = [ - "indexmap", + "indexmap 2.0.2", "itoa", "ryu", "serde", @@ -7857,7 +7823,16 @@ dependencies = [ name = "serde_path_to_error" version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "184c643044780f7ceb59104cef98a5a6f12cb2288a7bc701ab93a362b49fd47d" +checksum = "184c643044780f7ceb59104cef98a5a6f12cb2288a7bc701ab93a362b49fd47d" +dependencies = [ + "serde", +] + +[[package]] +name = "serde_plain" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce1fc6db65a611022b23a0dec6975d63fb80a302cb3388835ff02c097258d50" dependencies = [ "serde", ] @@ -7879,16 +7854,16 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fe39d9fbb0ebf5eb2c7cb7e2a47e4f462fad1379f1166b8ae49ad9eae89a7ca" dependencies = [ - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", "syn 1.0.109", ] [[package]] name = "serde_spanned" -version = "0.6.1" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0efd8caf556a6cebd3b285caf480045fcc1ac04f6bd786b09a6f11af30c4fcf4" +checksum = "12022b835073e5b11e90a14f86838ceb1c8fb0325b72416845c487ac0fa95e80" dependencies = [ "serde", ] @@ -7924,7 +7899,7 @@ dependencies = [ "base64 0.13.1", "chrono", "hex", - "indexmap", + "indexmap 1.9.3", "serde", "serde_json", "serde_with_macros 2.3.2", @@ -7938,7 +7913,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e182d6ec6f05393cc0e5ed1bf81ad6db3a8feedf8ee515ecdd369809bcce8082" dependencies = [ "darling 0.13.4", - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", "syn 1.0.109", ] @@ -7950,7 +7925,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "859011bddcc11f289f07f467cc1fe01c7a941daa4d8f6c40d4d1c92eb6d9319c" dependencies = [ "darling 0.14.2", - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", "syn 1.0.109", ] @@ -7961,7 +7936,7 @@ version = "0.8.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "578a7433b776b56a35785ed5ce9a7e777ac0598aac5a6dd1b4b18a307c7fc71b" dependencies = [ - "indexmap", + "indexmap 1.9.3", "ryu", "serde", "yaml-rust", @@ -7969,11 +7944,11 @@ dependencies = [ [[package]] name = "serde_yaml" -version = "0.9.21" +version = "0.9.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9d684e3ec7de3bf5466b32bd75303ac16f0736426e5a4e0d6e489559ce1249c" +checksum = "3cc7a1570e38322cfe4154732e5110f887ea57e22b76f4bfd32b5bdd3368666c" dependencies = [ - "indexmap", + "indexmap 2.0.2", "itoa", "ryu", "serde", @@ -8000,7 +7975,7 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "079a83df15f85d89a68d64ae1238f142f172b1fa915d0d76b26a7cba1b659a69" dependencies = [ - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", "syn 1.0.109", ] @@ -8013,7 +7988,7 @@ checksum = "f5058ada175748e33390e40e872bd0fe59a19f265d0158daa551c5a88a76009c" dependencies = [ "cfg-if", "cpufeatures", - "digest 0.10.6", + "digest 0.10.7", ] [[package]] @@ -8024,7 +7999,7 @@ checksum = "f04293dc80c3993519f2d7f6f511707ee7094fe0c6d3406feb330cdb3540eba3" dependencies = [ "cfg-if", "cpufeatures", - "digest 0.10.6", + "digest 0.10.7", ] [[package]] @@ -8042,13 +8017,13 @@ dependencies = [ [[package]] name = "sha2" -version = "0.10.6" +version = "0.10.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82e6b795fe2e3b1e845bafcb27aa35405c4d47cdfc92af5fc8d3002f76cebdc0" +checksum = "479fb9d862239e610720565ca91403019f2f00410f1864c5aa7479b950a76ed8" dependencies = [ "cfg-if", "cpufeatures", - "digest 0.10.6", + "digest 0.10.7", ] [[package]] @@ -8057,7 +8032,7 @@ version = "0.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bdf0c33fae925bdc080598b84bc15c55e7b9a4a43b3c704da051f977469691c9" dependencies = [ - "digest 0.10.6", + "digest 0.10.7", "keccak", ] @@ -8128,6 +8103,15 @@ version = "1.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "74233d3b3b2f6d4b006dc19dee745e73e2a6bfb6f93607cd3b02bd5b00797d7c" +[[package]] +name = "simd-abstraction" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cadb29c57caadc51ff8346233b5cec1d240b68ce55cf1afc764818791876987" +dependencies = [ + "outref", +] + [[package]] name = "similar" version = "2.2.1" @@ -8252,7 +8236,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "475b3bbe5245c26f2d8a6f62d67c1f30eb9fffeccee721c45d162c3ebbdf81b2" dependencies = [ "heck 0.4.0", - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", "syn 1.0.109", ] @@ -8393,7 +8377,7 @@ checksum = "dcb5ae327f9cc13b68763b5749770cb9e048a99bd9dfdfa58d0cf05d5f64afe0" dependencies = [ "heck 0.3.3", "proc-macro-error", - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", "syn 1.0.109", ] @@ -8411,7 +8395,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59" dependencies = [ "heck 0.4.0", - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", "rustversion", "syn 1.0.109", @@ -8459,18 +8443,18 @@ version = "1.0.109" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" dependencies = [ - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", "unicode-ident", ] [[package]] name = "syn" -version = "2.0.31" +version = "2.0.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "718fa2415bcb8d8bd775917a1bf12a7931b6dfa890753378538118181e0cb398" +checksum = "44c8b28c477cc3bf0e7966561e3460130e1255f7a1cf71931075f1c5e7a7e269" dependencies = [ - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", "unicode-ident", ] @@ -8487,7 +8471,7 @@ version = "0.12.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f" dependencies = [ - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", "syn 1.0.109", "unicode-xid 0.2.4", @@ -8552,24 +8536,25 @@ checksum = "af547b166dd1ea4b472165569fc456cfb6818116f854690b0ff205e636523dab" [[package]] name = "temp-env" -version = "0.3.5" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e010429b1f3ea1311190c658c7570100f03c1dab05c16cfab774181c648d656a" +checksum = "96374855068f47402c3121c6eed88d29cb1de8f3ab27090e273e420bdabcf050" dependencies = [ "parking_lot", ] [[package]] name = "tempfile" -version = "3.5.0" +version = "3.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9fbec84f381d5795b08656e4912bec604d162bff9291d6189a78f4c8ab87998" +checksum = "31c0432476357e58790aaa47a8efb0c5138f137343f3b5f23bd36a27e3b0a6d6" dependencies = [ + "autocfg", "cfg-if", "fastrand", "redox_syscall 0.3.5", - "rustix 0.37.5", - "windows-sys 0.45.0", + "rustix 0.37.19", + "windows-sys 0.48.0", ] [[package]] @@ -8639,12 +8624,6 @@ dependencies = [ "unicode-width", ] -[[package]] -name = "textwrap" -version = "0.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d" - [[package]] name = "thiserror" version = "1.0.40" @@ -8660,9 +8639,9 @@ version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" dependencies = [ - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", - "syn 2.0.31", + "syn 2.0.41", ] [[package]] @@ -8760,11 +8739,11 @@ checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" [[package]] name = "tokio" -version = "1.28.1" +version = "1.33.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0aa32867d44e6f2ce3385e89dceb990188b8bb0fb25b0cf576647a6f98ac5105" +checksum = "4f38200e3ef7995e5ef13baec2f432a6da0aa9ac495b2c0e8f3b7eec2c92d653" dependencies = [ - "autocfg", + "backtrace", "bytes 1.4.0", "libc", "mio", @@ -8772,7 +8751,7 @@ dependencies = [ "parking_lot", "pin-project-lite", "signal-hook-registry", - "socket2 0.4.9", + "socket2 0.5.3", "tokio-macros", "tracing 0.1.37", "windows-sys 0.48.0", @@ -8805,16 +8784,16 @@ version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" dependencies = [ - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", - "syn 2.0.31", + "syn 2.0.41", ] [[package]] name = "tokio-native-tls" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7d995660bd2b7f8c1568414c1126076c13fbb725c40112dc0120b78eb9b717b" +checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" dependencies = [ "native-tls", "tokio", @@ -8873,7 +8852,7 @@ version = "0.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e0d409377ff5b1e3ca6437aa86c1eb7d40c134bfec254e44c830defa92669db5" dependencies = [ - "rustls 0.21.9", + "rustls 0.21.0", "tokio", ] @@ -8902,6 +8881,31 @@ dependencies = [ "tokio-stream", ] +[[package]] +name = "tokio-tungstenite" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54319c93411147bced34cb5609a80e0a8e44c5999c93903a81cd866630ec0bfd" +dependencies = [ + "futures-util", + "log", + "tokio", + "tungstenite 0.18.0", +] + +[[package]] +name = "tokio-tungstenite" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec509ac96e9a0c43427c74f003127d953a265737636129424288d27cb5c4b12c" +dependencies = [ + "futures-util", + "log", + "rustls 0.21.0", + "tokio", + "tungstenite 0.19.0", +] + [[package]] name = "tokio-tungstenite" version = "0.20.1" @@ -8910,9 +8914,8 @@ checksum = "212d5dcb2a1ce06d81107c3d0ffa3121fe974b73f068c8282cb1c32328113b6c" dependencies = [ "futures-util", "log", - "rustls 0.21.9", "tokio", - "tungstenite", + "tungstenite 0.20.1", ] [[package]] @@ -8941,9 +8944,9 @@ dependencies = [ [[package]] name = "toml" -version = "0.7.3" +version = "0.7.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b403acf6f2bb0859c93c7f0d967cb4a75a7ac552100f9322faf64dc047669b21" +checksum = "dd79e69d3b627db300ff956027cc6c3798cef26d22526befdfcd12feeb6d2257" dependencies = [ "serde", "serde_spanned", @@ -8953,20 +8956,20 @@ dependencies = [ [[package]] name = "toml_datetime" -version = "0.6.1" +version = "0.6.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ab8ed2edee10b50132aed5f331333428b011c99402b5a534154ed15746f9622" +checksum = "3550f4e9685620ac18a50ed434eb3aec30db8ba93b0287467bca5826ea25baf1" dependencies = [ "serde", ] [[package]] name = "toml_edit" -version = "0.19.6" +version = "0.19.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08de71aa0d6e348f070457f85af8bd566e2bc452156a423ddf22861b3a953fae" +checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421" dependencies = [ - "indexmap", + "indexmap 2.0.2", "serde", "serde_spanned", "toml_datetime", @@ -8982,7 +8985,7 @@ dependencies = [ "async-stream", "async-trait", "axum", - "base64 0.21.0", + "base64 0.21.2", "bytes 1.4.0", "flate2", "futures-core", @@ -9013,7 +9016,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6fdaae4c2c638bb70fe42803a26fbd6fc6ac8c72f5c59f67ecc2a2dcabf4b07" dependencies = [ "prettyplease", - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "prost-build", "quote 1.0.33", "syn 1.0.109", @@ -9027,7 +9030,7 @@ checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" dependencies = [ "futures-core", "futures-util", - "indexmap", + "indexmap 1.9.3", "pin-project", "pin-project-lite", "rand 0.8.5", @@ -9041,13 +9044,13 @@ dependencies = [ [[package]] name = "tower-http" -version = "0.4.0" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d1d42a9b3f3ec46ba828e8d376aec14592ea199f70a06a548587ecd1c4ab658" +checksum = "61c5bb1d698276a2443e5ecfabc1008bf15a36c12e6a7176e7bf089ea9131140" dependencies = [ - "async-compression 0.3.15", - "base64 0.20.0", - "bitflags 1.3.2", + "async-compression", + "base64 0.21.2", + "bitflags 2.4.1", "bytes 1.4.0", "futures-core", "futures-util", @@ -9118,7 +9121,7 @@ version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4017f8f45139870ca7e672686113917c71c7a6e02d4924eda67186083c03081a" dependencies = [ - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", "syn 1.0.109", ] @@ -9248,9 +9251,9 @@ dependencies = [ [[package]] name = "triomphe" -version = "0.1.9" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0eee8098afad3fb0c54a9007aab6804558410503ad676d4633f9c2559a00ac0f" +checksum = "859eb650cfee7434994602c3a68b25d77ad9e68c8a6cd491616ef86661382eb3" [[package]] name = "trust-dns-proto" @@ -9341,6 +9344,44 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "tungstenite" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30ee6ab729cd4cf0fd55218530c4522ed30b7b6081752839b68fcec8d0960788" +dependencies = [ + "base64 0.13.1", + "byteorder", + "bytes 1.4.0", + "http", + "httparse", + "log", + "rand 0.8.5", + "sha1", + "thiserror", + "url", + "utf-8", +] + +[[package]] +name = "tungstenite" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15fba1a6d6bb030745759a9a2a588bfe8490fc8b4751a277db3a0be1c9ebbf67" +dependencies = [ + "byteorder", + "bytes 1.4.0", + "data-encoding", + "http", + "httparse", + "log", + "rand 0.8.5", + "sha1", + "thiserror", + "url", + "utf-8", +] + [[package]] name = "tungstenite" version = "0.20.1" @@ -9376,7 +9417,7 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "89851716b67b937e393b3daa8423e67ddfc4bbbf1654bcf05488e95e0828db0c" dependencies = [ - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", "syn 1.0.109", ] @@ -9406,9 +9447,9 @@ version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2c3e1c30cedd24fc597f7d37a721efdbdc2b1acae012c1ef1218f4c7c2c0f3e7" dependencies = [ - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", - "syn 2.0.31", + "syn 2.0.41", ] [[package]] @@ -9448,9 +9489,9 @@ dependencies = [ [[package]] name = "unicode-bidi" -version = "0.3.8" +version = "0.3.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "099b7128301d285f79ddd55b9a83d5e6b9e97c92e0ea0daebee7263e932de992" +checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" [[package]] name = "unicode-ident" @@ -9512,12 +9553,6 @@ version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" -[[package]] -name = "untrusted" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" - [[package]] name = "uom" version = "0.31.1" @@ -9531,12 +9566,12 @@ dependencies = [ [[package]] name = "url" -version = "2.3.1" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d68c799ae75762b8c3fe375feb6600ef5602c883c5d21eb51c09f22b83c4643" +checksum = "50bff7831e19200a85b17131d085c25d7811bc4e186efdaf54bbd132994a88cb" dependencies = [ "form_urlencoded", - "idna 0.3.0", + "idna 0.4.0", "percent-encoding", "serde", ] @@ -9567,14 +9602,15 @@ checksum = "936e4b492acfd135421d8dca4b1aa80a7bfc26e702ef3af710e0752684df5372" [[package]] name = "uuid" -version = "1.3.3" +version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "345444e32442451b267fc254ae85a209c64be56d2890e601a0c37ff0c3c5ecd2" +checksum = "5e395fcf16a7a3d8127ec99782007af141946b4795001f876d54fb0d55978560" dependencies = [ - "getrandom 0.2.9", + "getrandom 0.2.10", "md-5", "rand 0.8.5", "serde", + "wasm-bindgen", ] [[package]] @@ -9583,25 +9619,6 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" -[[package]] -name = "value" -version = "0.4.2" -source = "git+ssh://git@github.com/answerbook/vrl.git?rev=v0.6.0#23ae815d2db5b1e4eab4e1450b5b7868f9277ed1" -dependencies = [ - "bytes 1.4.0", - "chrono", - "mlua", - "once_cell", - "ordered-float 3.7.0", - "path", - "quickcheck", - "regex", - "serde", - "serde_json", - "snafu", - "tracing 0.1.37", -] - [[package]] name = "vcpkg" version = "0.2.15" @@ -9623,10 +9640,10 @@ dependencies = [ "directories 5.0.1", "dunce", "glob", - "hashlink", "hex", + "indexmap 2.0.2", "indicatif", - "itertools", + "itertools 0.11.0", "log", "once_cell", "os_info", @@ -9636,10 +9653,10 @@ dependencies = [ "reqwest", "serde", "serde_json", - "serde_yaml 0.9.21", - "sha2 0.10.6", + "serde_yaml 0.9.27", + "sha2 0.10.7", "tempfile", - "toml 0.7.3", + "toml 0.7.8", ] [[package]] @@ -9650,7 +9667,7 @@ checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" [[package]] name = "vector" -version = "0.30.0" +version = "0.31.0" dependencies = [ "apache-avro", "approx", @@ -9658,13 +9675,14 @@ dependencies = [ "arr_macro", "assay", "assert_cmd", - "async-compression 0.4.0", + "async-compression", "async-graphql", "async-graphql-warp", "async-stream", "async-trait", "atty", "aws-config", + "aws-credential-types", "aws-sdk-cloudwatch", "aws-sdk-cloudwatchlogs", "aws-sdk-elasticsearch", @@ -9672,19 +9690,19 @@ dependencies = [ "aws-sdk-kinesis", "aws-sdk-s3", "aws-sdk-sqs", - "aws-sigv4 0.55.1", + "aws-sigv4", "aws-smithy-async", "aws-smithy-client", - "aws-smithy-http 0.51.0", - "aws-smithy-http-tower 0.54.4", - "aws-smithy-types 0.51.0", + "aws-smithy-http", + "aws-smithy-http-tower", + "aws-smithy-types", "aws-types", "axum", "azure_core", "azure_identity", "azure_storage", "azure_storage_blobs", - "base64 0.21.0", + "base64 0.21.2", "blake2", "bloom", "bollard", @@ -9699,8 +9717,6 @@ dependencies = [ "criterion", "crossterm 0.26.1", "csv", - "datadog-filter", - "datadog-search-syntax", "deadpool-postgres", "derivative", "dirs-next", @@ -9721,7 +9737,7 @@ dependencies = [ "grok", "h2", "hash_hasher", - "hashbrown 0.13.2", + "hashbrown 0.14.3", "headers", "heim", "hex", @@ -9732,11 +9748,11 @@ dependencies = [ "hyper", "hyper-openssl", "hyper-proxy", - "indexmap", + "indexmap 2.0.2", "indoc", - "infer 0.13.0", + "infer 0.14.0", "inventory", - "itertools", + "itertools 0.11.0", "k8s-openapi 0.18.0", "kube", "lapin", @@ -9765,7 +9781,7 @@ dependencies = [ "opendal", "openssl", "openssl-probe", - "openssl-src 111.25.0+1.1.1t", + "openssl-src", "opentelemetry-proto", "ordered-float 3.7.0", "paste", @@ -9777,6 +9793,7 @@ dependencies = [ "proptest", "prost", "prost-build", + "prost-reflect", "prost-types", "pulsar", "quickcheck", @@ -9796,9 +9813,9 @@ dependencies = [ "serde_bytes", "serde_json", "serde_with 2.3.2", - "serde_yaml 0.9.21", + "serde_yaml 0.9.27", "serial_test", - "sha2 0.10.6", + "sha2 0.10.7", "similar-asserts", "smallvec", "smpl_jwt", @@ -9817,9 +9834,9 @@ dependencies = [ "tokio-postgres", "tokio-stream", "tokio-test", - "tokio-tungstenite", + "tokio-tungstenite 0.20.1", "tokio-util", - "toml 0.7.3", + "toml 0.7.8", "tonic", "tonic-build", "tower", @@ -9850,7 +9867,7 @@ dependencies = [ "warp", "windows-service", "wiremock", - "zstd 0.12.3+zstd.1.5.2", + "zstd 0.12.4", ] [[package]] @@ -9869,7 +9886,7 @@ dependencies = [ "serde_json", "tokio", "tokio-stream", - "tokio-tungstenite", + "tokio-tungstenite 0.19.0", "url", "uuid", ] @@ -9903,7 +9920,7 @@ dependencies = [ "rand 0.8.5", "rkyv", "serde", - "serde_yaml 0.9.21", + "serde_yaml 0.9.27", "snafu", "temp-dir", "tokio", @@ -9929,7 +9946,7 @@ dependencies = [ "crossbeam-utils", "derivative", "futures 0.3.28", - "indexmap", + "indexmap 2.0.2", "metrics", "nom", "ordered-float 3.7.0", @@ -9955,11 +9972,11 @@ dependencies = [ name = "vector-config" version = "0.1.0" dependencies = [ - "assert-json-diff", + "assert-json-diff 2.0.2", "chrono", "chrono-tz", "encoding_rs", - "indexmap", + "indexmap 2.0.2", "inventory", "no-proxy", "num-traits", @@ -9968,7 +9985,7 @@ dependencies = [ "serde_json", "serde_with 2.3.2", "snafu", - "toml 0.7.3", + "toml 0.7.8", "tracing 0.1.37", "url", "vector-config-common", @@ -9982,9 +9999,8 @@ version = "0.1.0" dependencies = [ "convert_case 0.6.0", "darling 0.13.4", - "indexmap", "once_cell", - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", "serde", "serde_json", @@ -9997,7 +10013,7 @@ name = "vector-config-macros" version = "0.1.0" dependencies = [ "darling 0.13.4", - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", "serde", "serde_derive_internals", @@ -10012,7 +10028,7 @@ version = "0.1.0" dependencies = [ "async-graphql", "async-trait", - "base64 0.21.0", + "base64 0.21.2", "bitmask-enum", "bytes 1.4.0", "chrono", @@ -10032,7 +10048,7 @@ dependencies = [ "http", "httptest", "hyper-proxy", - "indexmap", + "indexmap 2.0.2", "metrics", "metrics-tracing-context", "metrics-util", @@ -10074,7 +10090,7 @@ dependencies = [ "tokio-stream", "tokio-test", "tokio-util", - "toml 0.7.3", + "toml 0.7.8", "tonic", "tower", "tracing 0.1.37", @@ -10146,14 +10162,12 @@ name = "vector-vrl-web-playground" version = "0.1.0" dependencies = [ "enrichment", - "getrandom 0.2.9", + "getrandom 0.2.10", "gloo-utils", "serde", "serde-wasm-bindgen", - "value", "vector-vrl-functions", "vrl", - "vrl-stdlib", "wasm-bindgen", ] @@ -10171,206 +10185,83 @@ checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" [[package]] name = "vrl" -version = "0.4.2" -source = "git+ssh://git@github.com/answerbook/vrl.git?rev=v0.6.0#23ae815d2db5b1e4eab4e1450b5b7868f9277ed1" -dependencies = [ - "datadog-filter", - "datadog-grok", - "datadog-search-syntax", - "path", - "value", - "vrl-cli", - "vrl-compiler", - "vrl-core", - "vrl-diagnostic", - "vrl-parser", - "vrl-stdlib", - "vrl-tests", -] - -[[package]] -name = "vrl-cli" -version = "0.4.2" -source = "git+ssh://git@github.com/answerbook/vrl.git?rev=v0.6.0#23ae815d2db5b1e4eab4e1450b5b7868f9277ed1" -dependencies = [ - "clap 4.1.14", - "exitcode", - "indoc", - "once_cell", - "path", - "prettytable-rs", - "regex", - "rustyline", - "serde_json", - "thiserror", - "value", - "vrl-compiler", - "vrl-core", - "vrl-diagnostic", - "vrl-stdlib", - "webbrowser", -] - -[[package]] -name = "vrl-compiler" -version = "0.4.2" -source = "git+ssh://git@github.com/answerbook/vrl.git?rev=v0.6.0#23ae815d2db5b1e4eab4e1450b5b7868f9277ed1" -dependencies = [ - "anymap", - "bytes 1.4.0", - "chrono", - "chrono-tz", - "dyn-clone", - "getrandom 0.2.9", - "indoc", - "lalrpop-util", - "ordered-float 3.7.0", - "paste", - "path", - "regex", - "serde", - "snafu", - "thiserror", - "tracing 0.1.37", - "value", - "vrl-diagnostic", - "vrl-parser", -] - -[[package]] -name = "vrl-core" -version = "0.4.2" -source = "git+ssh://git@github.com/answerbook/vrl.git?rev=v0.6.0#23ae815d2db5b1e4eab4e1450b5b7868f9277ed1" -dependencies = [ - "bytes 1.4.0", - "chrono", - "chrono-tz", - "derivative", - "nom", - "ordered-float 3.7.0", - "path", - "serde", - "serde_json", - "snafu", - "value", - "vrl-diagnostic", -] - -[[package]] -name = "vrl-diagnostic" -version = "0.4.2" -source = "git+ssh://git@github.com/answerbook/vrl.git?rev=v0.6.0#23ae815d2db5b1e4eab4e1450b5b7868f9277ed1" -dependencies = [ - "codespan-reporting", - "termcolor", -] - -[[package]] -name = "vrl-parser" -version = "0.4.2" -source = "git+ssh://git@github.com/answerbook/vrl.git?rev=v0.6.0#23ae815d2db5b1e4eab4e1450b5b7868f9277ed1" -dependencies = [ - "lalrpop", - "lalrpop-util", - "ordered-float 3.7.0", - "paste", - "path", - "thiserror", - "vrl-diagnostic", -] - -[[package]] -name = "vrl-stdlib" -version = "0.4.2" -source = "git+ssh://git@github.com/answerbook/vrl.git?rev=v0.6.0#23ae815d2db5b1e4eab4e1450b5b7868f9277ed1" +version = "0.5.0" +source = "git+ssh://git@github.com/answerbook/vrl.git?rev=v0.7.0#2457db0764e0bac16e4da3e41b032507dc509d62" dependencies = [ "aes", + "ansi_term", + "anymap", + "arbitrary", "base16", - "base64 0.21.0", + "base64 0.21.2", "bytes 1.4.0", "cbc", "cfb-mode", + "cfg-if", "charset", "chrono", + "chrono-tz", "cidr-utils", + "clap 4.1.14", + "codespan-reporting", "csv", "ctr", "data-encoding", - "datadog-filter", - "datadog-grok", - "datadog-search-syntax", "dns-lookup", + "dyn-clone", + "exitcode", "flate2", "grok", "hex", "hmac", "hostname", - "indexmap", + "indexmap 2.0.2", "indoc", + "itertools 0.11.0", + "lalrpop", + "lalrpop-util", "md-5", "nom", "ofb", "once_cell", + "onig", "ordered-float 3.7.0", - "path", + "paste", + "peeking_take_while", "percent-encoding", + "pest", + "pest_derive", + "prettydiff", + "prettytable-rs", + "quickcheck", "quoted_printable", "rand 0.8.5", "regex", - "roxmltree", + "roxmltree 0.18.0", "rust_decimal", + "rustyline", "seahash", "serde", "serde_json", "sha-1", - "sha2 0.10.6", + "sha2 0.10.7", "sha3", + "snafu", "strip-ansi-escapes", "substring", "syslog_loose", + "termcolor", + "thiserror", "tracing 0.1.37", "uaparser", "url", "utf8-width", "uuid", - "value", - "vrl-compiler", - "vrl-core", - "vrl-diagnostic", + "webbrowser", "woothee", - "zstd 0.12.3+zstd.1.5.2", -] - -[[package]] -name = "vrl-tests" -version = "0.4.2" -source = "git+ssh://git@github.com/answerbook/vrl.git?rev=v0.6.0#23ae815d2db5b1e4eab4e1450b5b7868f9277ed1" -dependencies = [ - "ansi_term", - "chrono", - "chrono-tz", - "clap 4.1.14", - "glob", - "path", - "prettydiff", - "regex", - "serde", - "serde_json", - "tikv-jemallocator", - "tracing-subscriber", - "value", - "vrl-compiler", - "vrl-core", - "vrl-diagnostic", - "vrl-stdlib", + "zstd 0.12.4", ] -[[package]] -name = "vsimd" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64" - [[package]] name = "vte" version = "0.10.1" @@ -10388,7 +10279,7 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d257817081c7dffcdbab24b9e62d2def62e2ff7d00b1c20062551e6cccc145ff" dependencies = [ - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", ] @@ -10430,9 +10321,9 @@ dependencies = [ [[package]] name = "warp" -version = "0.3.6" +version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1e92e22e03ff1230c03a1a8ee37d2f89cd489e2e541b7550d6afad96faed169" +checksum = "ba431ef570df1287f7f8b07e376491ad54f84d26ac473489427231e1718e1f69" dependencies = [ "bytes 1.4.0", "futures-channel", @@ -10452,7 +10343,7 @@ dependencies = [ "serde_urlencoded", "tokio", "tokio-stream", - "tokio-tungstenite", + "tokio-tungstenite 0.18.0", "tokio-util", "tower-service", "tracing 0.1.37", @@ -10472,9 +10363,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.86" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bba0e8cb82ba49ff4e229459ff22a191bbe9a1cb3a341610c9c33efc27ddf73" +checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -10482,16 +10373,16 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.86" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19b04bc93f9d6bdee709f6bd2118f57dd6679cf1176a1af464fca3ab0d66d8fb" +checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd" dependencies = [ "bumpalo", "log", "once_cell", - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", - "syn 2.0.31", + "syn 2.0.41", "wasm-bindgen-shared", ] @@ -10509,9 +10400,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.86" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14d6b024f1a526bb0234f52840389927257beb670610081360e5a03c5df9c258" +checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d" dependencies = [ "quote 1.0.33", "wasm-bindgen-macro-support", @@ -10519,22 +10410,22 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.86" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e128beba882dd1eb6200e1dc92ae6c5dbaa4311aa7bb211ca035779e5efc39f8" +checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", - "syn 2.0.31", + "syn 2.0.41", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.86" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed9d5b4305409d1fc9482fee2d7f9bcbf24b3972bf59817ef757e23982242a93" +checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1" [[package]] name = "wasm-streams" @@ -10582,8 +10473,8 @@ version = "0.21.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b8e38c0608262c46d4a56202ebabdeb094cef7e560ca7a226c6bf055188aa4ea" dependencies = [ - "ring 0.16.20", - "untrusted 0.7.1", + "ring", + "untrusted", ] [[package]] @@ -10592,8 +10483,8 @@ version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f095d78192e208183081cc07bc5515ef55216397af48b873e5edcd72637fa1bd" dependencies = [ - "ring 0.16.20", - "untrusted 0.7.1", + "ring", + "untrusted", ] [[package]] @@ -10834,9 +10725,9 @@ checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" [[package]] name = "winnow" -version = "0.3.5" +version = "0.5.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee7b2c67f962bf5042bfd8b6a916178df33a26eec343ae064cb8e069f638fa6f" +checksum = "6c830786f7720c2fd27a1a0e27a709dbd3c4d009b56d098fc742d4f4eab91fe2" dependencies = [ "memchr", ] @@ -10852,13 +10743,13 @@ dependencies = [ [[package]] name = "wiremock" -version = "0.5.18" +version = "0.5.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd7b0b5b253ebc0240d6aac6dd671c495c467420577bf634d3064ae7e6fa2b4c" +checksum = "c6f71803d3a1c80377a06221e0530be02035d5b3e854af56c6ece7ac20ac441d" dependencies = [ - "assert-json-diff", + "assert-json-diff 2.0.2", "async-trait", - "base64 0.21.0", + "base64 0.21.2", "deadpool", "futures 0.3.28", "futures-timer", @@ -10893,9 +10784,9 @@ dependencies = [ [[package]] name = "xml-rs" -version = "0.8.4" +version = "0.8.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2d7d3948613f75c98fd9328cfdcc45acc4d360655289d0a7d4ec931392200a3" +checksum = "52839dc911083a8ef63efa4d039d1f58b5e409f923e44c80828f206f66e5541c" [[package]] name = "xmlparser" @@ -10925,7 +10816,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "332f188cc1bcf1fe1064b8c58d150f497e697f49774aa846f2dc949d9a25f236" dependencies = [ "byteorder", - "zerocopy-derive", + "zerocopy-derive 0.3.2", +] + +[[package]] +name = "zerocopy" +version = "0.7.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c4061bedbb353041c12f413700357bec76df2c7e2ca8e4df8bac24c6bf68e3d" +dependencies = [ + "zerocopy-derive 0.7.31", ] [[package]] @@ -10934,16 +10834,27 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6505e6815af7de1746a08f69c69606bb45695a17149517680f3b2149713b19a3" dependencies = [ - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", "syn 1.0.109", ] +[[package]] +name = "zerocopy-derive" +version = "0.7.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3c129550b3e6de3fd0ba67ba5c81818f9805e58b8d7fee80a3a59d2c9fc601a" +dependencies = [ + "proc-macro2 1.0.70", + "quote 1.0.33", + "syn 2.0.41", +] + [[package]] name = "zeroize" -version = "1.5.7" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c394b5bd0c6f669e7275d9c20aa90ae064cb22e75a1cad54e1b34088034b149f" +checksum = "525b4ec142c6b68a2d10f01f7bbf6755599ca3f81ea53b8431b7dd348f5fdb2d" dependencies = [ "zeroize_derive", ] @@ -10954,7 +10865,7 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f8f187641dad4f680d25c4bfc4225b418165984179f26ca76ec4fb6441d3a17" dependencies = [ - "proc-macro2 1.0.66", + "proc-macro2 1.0.70", "quote 1.0.33", "syn 1.0.109", "synstructure", @@ -10971,9 +10882,9 @@ dependencies = [ [[package]] name = "zstd" -version = "0.12.3+zstd.1.5.2" +version = "0.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76eea132fb024e0e13fd9c2f5d5d595d8a967aa72382ac2f9d39fcc95afd0806" +checksum = "1a27595e173641171fc74a1232b7b1c7a7cb6e18222c11e9dfb9888fa424c53c" dependencies = [ "zstd-safe 6.0.3+zstd.1.5.2", ] diff --git a/Cargo.toml b/Cargo.toml index b871f0e5fc058..84f8b21156d41 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "vector" -version = "0.30.0" +version = "0.31.0" authors = ["Vector Contributors "] edition = "2021" description = "A lightweight and ultra-fast tool for building observability pipelines" @@ -117,7 +117,12 @@ members = [ "vdev", ] +[workspace.dependencies] +vrl = { git = "ssh://git@github.com/answerbook/vrl.git", rev = "v0.7.0", features = ["cli", "test", "test_framework", "arbitrary"] } + [dependencies] +vrl.workspace = true + # Internal libs codecs = { path = "lib/codecs", default-features = false } dnsmsg-parser = { path = "lib/dnsmsg-parser", optional = true } @@ -143,7 +148,7 @@ loki-logproto = { path = "lib/loki-logproto", optional = true } async-stream = { version = "0.3.5", default-features = false } async-trait = { version = "0.1.68", default-features = false } futures = { version = "0.3.28", default-features = false, features = ["compat", "io-compat"], package = "futures" } -tokio = { version = "1.28.1", default-features = false, features = ["full"] } +tokio = { version = "1.29.0", default-features = false, features = ["full"] } tokio-openssl = { version = "0.6.3", default-features = false } tokio-stream = { version = "0.1.14", default-features = false, features = ["net", "sync", "time"] } tokio-util = { version = "0.7", default-features = false, features = ["io", "time"] } @@ -161,21 +166,23 @@ metrics = "0.21.0" metrics-tracing-context = { version = "0.14.0", default-features = false } # AWS - Official SDK -aws-sdk-s3 = { version = "0.21.0", default-features = false, features = ["native-tls"], optional = true } -aws-sdk-sqs = { version = "0.21.0", default-features = false, features = ["native-tls"], optional = true } -aws-sdk-cloudwatch = { version = "0.21.0", default-features = false, features = ["native-tls"], optional = true } -aws-sdk-cloudwatchlogs = { version = "0.21.0", default-features = false, features = ["native-tls"], optional = true } -aws-sdk-elasticsearch = {version = "0.21.0", default-features = false, features = ["native-tls"], optional = true } -aws-sdk-firehose = { version = "0.21.0", default-features = false, features = ["native-tls"], optional = true } -aws-sdk-kinesis = { version = "0.21.0", default-features = false, features = ["native-tls"], optional = true } -aws-types = { version = "0.51.0", default-features = false, features = ["hardcoded-credentials"], optional = true } -aws-sigv4 = { version = "0.55.1", default-features = false, features = ["sign-http"], optional = true } -aws-config = { version = "0.51.0", default-features = false, features = ["native-tls"], optional = true } -aws-smithy-async = { version = "0.51.0", default-features = false, optional = true } -aws-smithy-client = { version = "0.51.0", default-features = false, features = ["client-hyper"], optional = true} -aws-smithy-http = { version = "0.51.0", default-features = false, features = ["event-stream"], optional = true } -aws-smithy-http-tower = { version = "0.54.4", default-features = false, optional = true } -aws-smithy-types = { version = "0.51.0", default-features = false, optional = true } +# depending on a fork to circumvent https://github.com/awslabs/aws-sdk-rust/issues/749 +aws-sdk-s3 = { git = "https://github.com/vectordotdev/aws-sdk-rust", rev = "3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670", default-features = false, features = ["native-tls"], optional = true } +aws-sdk-sqs = { git = "https://github.com/vectordotdev/aws-sdk-rust", rev = "3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670", default-features = false, features = ["native-tls"], optional = true } +aws-sdk-cloudwatch = { git = "https://github.com/vectordotdev/aws-sdk-rust", rev = "3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670", default-features = false, features = ["native-tls"], optional = true } +aws-sdk-cloudwatchlogs = { git = "https://github.com/vectordotdev/aws-sdk-rust", rev = "3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670", default-features = false, features = ["native-tls"], optional = true } +aws-sdk-elasticsearch = { git = "https://github.com/vectordotdev/aws-sdk-rust", rev = "3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670", default-features = false, features = ["native-tls"], optional = true } +aws-sdk-firehose = { git = "https://github.com/vectordotdev/aws-sdk-rust", rev = "3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670", default-features = false, features = ["native-tls"], optional = true } +aws-sdk-kinesis = { git = "https://github.com/vectordotdev/aws-sdk-rust", rev = "3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670", default-features = false, features = ["native-tls"], optional = true } +aws-types = { git = "https://github.com/vectordotdev/aws-sdk-rust", rev = "3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670", default-features = false, optional = true } +aws-sigv4 = { git = "https://github.com/vectordotdev/aws-sdk-rust", rev = "3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670", default-features = false, features = ["sign-http"], optional = true } +aws-config = { git = "https://github.com/vectordotdev/aws-sdk-rust", rev = "3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670", default-features = false, features = ["native-tls"], optional = true } +aws-credential-types = { git = "https://github.com/vectordotdev/aws-sdk-rust", rev = "3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670", default-features = false, features = ["hardcoded-credentials"], optional = true } +aws-smithy-async = { git = "https://github.com/vectordotdev/aws-sdk-rust", rev = "3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670", default-features = false, optional = true } +aws-smithy-client = { git = "https://github.com/vectordotdev/aws-sdk-rust", rev = "3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670", default-features = false, features = ["client-hyper"], optional = true} +aws-smithy-http = { git = "https://github.com/vectordotdev/aws-sdk-rust", rev = "3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670", default-features = false, features = ["event-stream"], optional = true } +aws-smithy-http-tower = { git = "https://github.com/vectordotdev/aws-sdk-rust", rev = "3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670", default-features = false, optional = true } +aws-smithy-types = { git = "https://github.com/vectordotdev/aws-sdk-rust", rev = "3d6aefb7fcfced5fc2a7e761a87e4ddbda1ee670", default-features = false, optional = true } # Azure azure_core = { git = "https://github.com/Azure/azure-sdk-for-rust.git", rev = "b4544d4920fa3064eb921340054cd9cc130b7664", default-features = false, features = ["enable_reqwest"], optional = true } @@ -184,25 +191,26 @@ azure_storage = { git = "https://github.com/Azure/azure-sdk-for-rust.git", rev = azure_storage_blobs = { git = "https://github.com/Azure/azure-sdk-for-rust.git", rev = "b4544d4920fa3064eb921340054cd9cc130b7664", default-features = false, optional = true } # OpenDAL -opendal = {version = "0.34", default-features = false, features = ["native-tls", "services-webhdfs"], optional = true} +opendal = {version = "0.38", default-features = false, features = ["native-tls", "services-webhdfs"], optional = true} # Tower tower = { version = "0.4.13", default-features = false, features = ["buffer", "limit", "retry", "timeout", "util", "balance", "discover"] } -tower-http = { version = "0.4.0", default-features = false, features = ["decompression-gzip"]} +tower-http = { version = "0.4.1", default-features = false, features = ["decompression-gzip"]} # Serde -serde = { version = "1.0.163", default-features = false, features = ["derive"] } +serde = { version = "1.0.164", default-features = false, features = ["derive"] } serde-toml-merge = { version = "0.3.0", default-features = false } serde_bytes = { version = "0.11.9", default-features = false, features = ["std"], optional = true } -serde_json = { version = "1.0.96", default-features = false, features = ["raw_value"] } +serde_json = { version = "1.0.99", default-features = false, features = ["raw_value"] } serde_with = { version = "2.3.2", default-features = false, features = ["macros", "std"] } -serde_yaml = { version = "0.9.21", default-features = false } +serde_yaml = { version = "0.9.22", default-features = false } # Messagepack rmp-serde = { version = "1.1.1", default-features = false, optional = true } rmpv = { version = "1.0.0", default-features = false, features = ["with-serde"], optional = true } -# Prost +# Prost / Protocol Buffers prost = { version = "0.11", default-features = false, features = ["std"] } +prost-reflect = { version = "0.11", default-features = false, optional = true } prost-types = { version = "0.11", default-features = false, optional = true } # GCP @@ -214,9 +222,9 @@ smpl_jwt = { version = "0.7.1", default-features = false, optional = true } lapin = { version = "2.1.1", default-features = false, features = ["openssl"], optional = true } # API -async-graphql = { version = "5.0.8", default-features = false, optional = true, features = ["chrono"] } -async-graphql-warp = { version = "5.0.8", default-features = false, optional = true } -itertools = { version = "0.10.5", default-features = false, optional = true } +async-graphql = { version = "5.0.10", default-features = false, optional = true, features = ["chrono"] } +async-graphql-warp = { version = "5.0.10", default-features = false, optional = true } +itertools = { version = "0.11.0", default-features = false, optional = true } # API client crossterm = { version = "0.26.1", default-features = false, features = ["event-stream"], optional = true } @@ -225,26 +233,24 @@ number_prefix = { version = "0.4.0", default-features = false, features = ["std" tui = { version = "0.19.0", optional = true, default-features = false, features = ["crossterm"] } # Datadog Pipelines -datadog-filter = { package = "datadog-filter", git = "ssh://git@github.com/answerbook/vrl.git", rev = "v0.6.0" } -datadog-search-syntax = { package = "datadog-search-syntax", git = "ssh://git@github.com/answerbook/vrl.git", rev = "v0.6.0" } -hex = { version = "0.4.3", default-features = false } -sha2 = { version = "0.10.6", default-features = false } +# datadog-filter = { package = "datadog-filter", git = "ssh://git@github.com/answerbook/vrl.git", rev = "next" } +# datadog-search-syntax = { package = "datadog-search-syntax", git = "ssh://git@github.com/answerbook/vrl.git", rev = "next" } -# VRL Lang -vrl = { package = "vrl", git = "ssh://git@github.com/answerbook/vrl.git", rev = "v0.6.0", features = ["cli"] } +hex = { version = "0.4.3", default-features = false} +sha2 = { version = "0.10.7", default-features = false} # External libs arc-swap = { version = "1.6", default-features = false, optional = true } async-compression = { version = "0.4.0", default-features = false, features = ["tokio", "gzip", "zstd"], optional = true } apache-avro = { version = "0.14.0", default-features = false, optional = true } axum = { version = "0.6.18", default-features = false } -base64 = { version = "0.21.0", default-features = false, optional = true } +base64 = { version = "0.21.2", default-features = false, optional = true } bloom = { version = "0.3.2", default-features = false, optional = true } bollard = { version = "0.14.0", default-features = false, features = ["ssl", "chrono"], optional = true } bytes = { version = "1.4.0", default-features = false, features = ["serde"] } blake2 = { version = "0.10.6", default-features = false, optional = true } bytesize = { version = "1.2.0", default-features = false } -chrono = { version = "0.4.24", default-features = false, features = ["serde"] } +chrono = { version = "0.4.26", default-features = false, features = ["serde"] } cidr-utils = { version = "0.5.10", default-features = false } clap = { version = "4.1.14", default-features = false, features = ["derive", "error-context", "env", "help", "std", "string", "usage", "wrap_help"] } colored = { version = "2.0.0", default-features = false } @@ -261,47 +267,47 @@ futures-util = { version = "0.3.28", default-features = false } glob = { version = "0.3.1", default-features = false } governor = { version = "0.5.1", default-features = false, features = ["dashmap", "jitter", "std"], optional = true } grok = { version = "2.0.0", default-features = false, optional = true } -h2 = { version = "0.3.19", default-features = false, optional = true } +h2 = { version = "0.3.20", default-features = false, optional = true } hash_hasher = { version = "2.0.0", default-features = false } -hashbrown = { version = "0.13.2", default-features = false, optional = true, features = ["ahash"] } +hashbrown = { version = "0.14.0", default-features = false, optional = true, features = ["ahash"] } headers = { version = "0.3.8", default-features = false } hostname = { version = "0.3.1", default-features = false } http = { version = "0.2.9", default-features = false } http-body = { version = "0.4.5", default-features = false } -hyper = { version = "0.14.26", default-features = false, features = ["client", "runtime", "http1", "http2", "server", "stream"] } +hyper = { version = "0.14.27", default-features = false, features = ["client", "runtime", "http1", "http2", "server", "stream"] } hyper-openssl = { version = "0.9.2", default-features = false } hyper-proxy = { version = "0.9.1", default-features = false, features = ["openssl-tls"] } -indexmap = { version = "~1.9.3", default-features = false, features = ["serde"] } -infer = { version = "0.13.0", default-features = false, optional = true} +indexmap = { version = "~2.0.0", default-features = false, features = ["serde", "std"] } +infer = { version = "0.14.0", default-features = false, optional = true} indoc = { version = "2.0.1", default-features = false } inventory = { version = "0.3.6", default-features = false } k8s-openapi = { version = "0.18.0", default-features = false, features = ["api", "v1_26"], optional = true } kube = { version = "0.82.0", default-features = false, features = ["client", "openssl-tls", "runtime"], optional = true } listenfd = { version = "1.0.1", default-features = false, optional = true } logfmt = { version = "0.0.2", default-features = false, optional = true } -lru = { version = "0.10.0", default-features = false, optional = true } +lru = { version = "0.10.1", default-features = false, optional = true } maxminddb = { version = "0.23.0", default-features = false, optional = true } md-5 = { version = "0.10", default-features = false, optional = true } -mongodb = { version = "2.5.0", default-features = false, features = ["tokio-runtime"], optional = true } +mongodb = { version = "2.6.0", default-features = false, features = ["tokio-runtime"], optional = true } nats = { version = "0.24.0", default-features = false, optional = true } nkeys = { version = "0.3.0", default-features = false, optional = true } nom = { version = "7.1.3", default-features = false, optional = true } -notify = { version = "6.0.0", default-features = false, features = ["macos_fsevent"] } -once_cell = { version = "1.17", default-features = false } +notify = { version = "6.0.1", default-features = false, features = ["macos_fsevent"] } +once_cell = { version = "1.18", default-features = false } openssl = { version = "0.10.55", default-features = false, features = ["vendored"] } openssl-probe = { version = "0.1.5", default-features = false } ordered-float = { version = "3.7.0", default-features = false } paste = "1.0.12" -percent-encoding = { version = "2.2.0", default-features = false } -pin-project = { version = "1.1.0", default-features = false } +percent-encoding = { version = "2.3.0", default-features = false } +pin-project = { version = "1.1.1", default-features = false } postgres-openssl = { version = "0.5.0", default-features = false, features = ["runtime"], optional = true } -pulsar = { version = "5.1.1", default-features = false, features = ["tokio-runtime", "auth-oauth2", "flate2", "lz4", "snap", "zstd"], optional = true } +pulsar = { version = "6.0.1", default-features = false, features = ["tokio-runtime", "auth-oauth2", "flate2", "lz4", "snap", "zstd"], optional = true } rand = { version = "0.8.5", default-features = false, features = ["small_rng"] } rand_distr = { version = "0.4.3", default-features = false } -rdkafka = { version = "0.34.0", default-features = false, features = ["tokio", "libz", "ssl", "zstd"], optional = true } -redis = { version = "0.23.0", default-features = false, features = ["connection-manager", "tokio-comp", "tokio-native-tls-comp"], optional = true } -regex = { version = "1.8.1", default-features = false, features = ["std", "perf"] } reqwest = { version = "0.11", features = ["json"] } +rdkafka = { version = "0.32.2", default-features = false, features = ["tokio", "libz", "ssl", "zstd"], optional = true } +redis = { version = "0.23.0", default-features = false, features = ["connection-manager", "tokio-comp", "tokio-native-tls-comp"], optional = true } +regex = { version = "1.8.4", default-features = false, features = ["std", "perf"] } roaring = { version = "0.10.1", default-features = false, optional = true } seahash = { version = "4.1.0", default-features = false } semver = { version = "1.0.17", default-features = false, features = ["serde", "std"], optional = true } @@ -312,15 +318,15 @@ socket2 = { version = "0.5.3", default-features = false } stream-cancel = { version = "0.8.1", default-features = false } strip-ansi-escapes = { version = "0.1.1", default-features = false } syslog = { version = "6.1.0", default-features = false, optional = true } -tikv-jemallocator = { version = "0.5.0", default-features = false, optional = true, features = ["profiling"] } -tokio-postgres = { version = "0.7.7", default-features = false, features = ["runtime", "with-chrono-0_4", "with-uuid-1"], optional = true } +urlencoding = { version = "2.1.0", default-features = false } +tikv-jemallocator = { version = "0.5.0", default-features = false, optional = true } +tokio-postgres = { version = "0.7.7", default-features = false, features = ["runtime", "with-chrono-0_4"], optional = true } tokio-tungstenite = {version = "0.20.1", default-features = false, features = ["connect"], optional = true} -toml = { version = "0.7.3", default-features = false, features = ["parse", "display"] } +toml = { version = "0.7.5", default-features = false, features = ["parse", "display"] } tonic = { version = "0.9", optional = true, default-features = false, features = ["transport", "codegen", "prost", "tls", "tls-roots", "gzip"] } trust-dns-proto = { version = "0.22.0", default-features = false, features = ["dnssec"], optional = true } typetag = { version = "0.2.8", default-features = false } -url = { version = "2.3.1", default-features = false, features = ["serde"] } -urlencoding = { version = "2.1.0", default-features = false } +url = { version = "2.4.0", default-features = false, features = ["serde"] } uuid = { version = "1", default-features = false, features = ["serde", "v4"] } warp = { version = "0.3.5", default-features = false } zstd = { version = "0.12.3", default-features = false } @@ -354,24 +360,24 @@ azure_core = { git = "https://github.com/Azure/azure-sdk-for-rust.git", rev = "b azure_identity = { git = "https://github.com/Azure/azure-sdk-for-rust.git", rev = "b4544d4920fa3064eb921340054cd9cc130b7664", default-features = false, features = ["enable_reqwest"] } azure_storage_blobs = { git = "https://github.com/Azure/azure-sdk-for-rust.git", rev = "b4544d4920fa3064eb921340054cd9cc130b7664", default-features = false, features = ["azurite_workaround"] } azure_storage = { git = "https://github.com/Azure/azure-sdk-for-rust.git", rev = "b4544d4920fa3064eb921340054cd9cc130b7664", default-features = false, features = ["azurite_workaround"] } -base64 = "0.21.0" -criterion = { version = "0.4.0", features = ["html_reports", "async_tokio"] } -httptest = "0.15.4" -itertools = { version = "0.10.5", default-features = false } -libc = "0.2.144" mockall = "0.11.4" +httptest = "0.15.4" +base64 = "0.21.2" +criterion = { version = "0.5.1", features = ["html_reports", "async_tokio"] } +itertools = { version = "0.11.0", default-features = false, features = ["use_alloc"] } +libc = "0.2.147" similar-asserts = "1.4.2" -proptest = "1.1" +proptest = "1.2" quickcheck = "1.0.3" serial_test = "1.0.0" -tempfile = "3.5.0" +tempfile = "3.6.0" test-generator = "0.3.1" tokio-test = "0.4.2" -tokio = { version = "1.28.1", features = ["test-util"] } +tokio = { version = "1.29.0", features = ["test-util"] } tower-test = "0.4.0" vector-core = { path = "lib/vector-core", default-features = false, features = ["vrl", "test"] } -wiremock = "0.5.18" snap = "1" +wiremock = "0.5.19" zstd = { version = "0.12.3", default-features = false } assay = "0.1.1" temp-env = "0.3.1" @@ -379,7 +385,7 @@ temp-env = "0.3.1" [patch.crates-io] # Removes dependency on `time` v0.1 # https://github.com/chronotope/chrono/pull/578 -chrono = { git = "https://github.com/vectordotdev/chrono.git", tag = "v0.4.24-no-default-time-1" } +chrono = { git = "https://github.com/vectordotdev/chrono.git", tag = "v0.4.26-no-default-time-1" } # The upgrade for `tokio-util` >= 0.6.9 is blocked on https://github.com/vectordotdev/vector/issues/11257. tokio-util = { git = "https://github.com/vectordotdev/tokio", branch = "tokio-util-0.7.4-framed-read-continue-on-error" } nix = { git = "https://github.com/vectordotdev/nix.git", branch = "memfd/gnu/musl" } @@ -450,6 +456,7 @@ api-client = [ aws-core = [ "aws-config", + "dep:aws-credential-types", "dep:aws-sigv4", "dep:aws-types", "dep:aws-smithy-async", @@ -684,7 +691,6 @@ sinks-logs = [ "sinks-clickhouse", "sinks-console", "sinks-databend", - "sinks-datadog_archives", "sinks-datadog_events", "sinks-datadog_logs", "sinks-datadog_traces", @@ -780,10 +786,9 @@ sinks-chronicle = [] sinks-clickhouse = [] sinks-console = [] sinks-databend = [] -sinks-datadog_archives = ["sinks-aws_s3", "sinks-azure_blob", "sinks-gcp"] sinks-datadog_events = [] sinks-datadog_logs = [] -sinks-datadog_metrics = ["protobuf-build"] +sinks-datadog_metrics = ["protobuf-build", "dep:prost-reflect"] sinks-datadog_traces = ["protobuf-build", "dep:rmpv", "dep:rmp-serde", "dep:serde_bytes"] sinks-elasticsearch = ["aws-core", "transforms-metric_to_log"] sinks-file = ["dep:async-compression"] diff --git a/LICENSE-3rdparty.csv b/LICENSE-3rdparty.csv index ef5c467a86c93..fd759a75ab82a 100644 --- a/LICENSE-3rdparty.csv +++ b/LICENSE-3rdparty.csv @@ -1,21 +1,25 @@ Component,Origin,License,Copyright Inflector,https://github.com/whatisinternet/inflector,BSD-2-Clause,Josh Teeter RustyXML,https://github.com/Florob/RustyXML,MIT OR Apache-2.0,Florian Zeitz +addr2line,https://github.com/gimli-rs/addr2line,Apache-2.0 OR MIT,The addr2line Authors adler,https://github.com/jonas-schievink/adler,0BSD OR MIT OR Apache-2.0,Jonas Schievink adler32,https://github.com/remram44/adler32-rs,Zlib,Remi Rampin aes,https://github.com/RustCrypto/block-ciphers,MIT OR Apache-2.0,RustCrypto Developers ahash,https://github.com/tkaitchuck/ahash,MIT OR Apache-2.0,Tom Kaitchuck aho-corasick,https://github.com/BurntSushi/aho-corasick,Unlicense OR MIT,Andrew Gallant amq-protocol,https://github.com/amqp-rs/amq-protocol,BSD-2-Clause,Marc-Antoine Perennou <%arc-Antoine@Perennou.com> +android-tzdata,https://github.com/RumovZ/android-tzdata,MIT OR Apache-2.0,RumovZ android_system_properties,https://github.com/nical/android_system_properties,MIT OR Apache-2.0,Nicolas Silva ansi_term,https://github.com/ogham/rust-ansi-term,MIT,"ogham@bsago.me, Ryan Scheel (Havvy) , Josh Triplett " anyhow,https://github.com/dtolnay/anyhow,MIT OR Apache-2.0,David Tolnay anymap,https://github.com/chris-morgan/anymap,BlueOak-1.0.0 OR MIT OR Apache-2.0,Chris Morgan apache-avro,https://github.com/apache/avro,Apache-2.0,Apache Avro team +arbitrary,https://github.com/rust-fuzz/arbitrary,MIT OR Apache-2.0,"The Rust-Fuzz Project Developers, Nick Fitzgerald , Manish Goregaokar , Simonas Kazlauskas , Brian L. Troutwine , Corey Farwell " arc-swap,https://github.com/vorner/arc-swap,MIT OR Apache-2.0,Michal 'vorner' Vaner arr_macro,https://github.com/JoshMcguigan/arr_macro,MIT OR Apache-2.0,Josh Mcguigan arrayvec,https://github.com/bluss/arrayvec,MIT OR Apache-2.0,bluss ascii,https://github.com/tomprogrammer/rust-ascii,Apache-2.0 OR MIT,"Thomas Bahn , Torbjørn Birch Moltu , Simon Sapin " +assert-json-diff,https://github.com/davidpdrsn/assert-json-diff,MIT,David Pedersen async-channel,https://github.com/smol-rs/async-channel,Apache-2.0 OR MIT,Stjepan Glavina async-compat,https://github.com/smol-rs/async-compat,Apache-2.0 OR MIT,Stjepan Glavina async-compression,https://github.com/Nemo157/async-compression,MIT OR Apache-2.0,"Wim Looman , Allen Bui " @@ -36,6 +40,7 @@ async-trait,https://github.com/dtolnay/async-trait,MIT OR Apache-2.0,David Tolna atomic-waker,https://github.com/stjepang/atomic-waker,Apache-2.0 OR MIT,Stjepan Glavina atty,https://github.com/softprops/atty,MIT,softprops aws-config,https://github.com/awslabs/smithy-rs,Apache-2.0,"AWS Rust SDK Team , Russell Cohen " +aws-credential-types,https://github.com/awslabs/smithy-rs,Apache-2.0,AWS Rust SDK Team aws-endpoint,https://github.com/awslabs/smithy-rs,Apache-2.0,"AWS Rust SDK Team , Russell Cohen " aws-http,https://github.com/awslabs/smithy-rs,Apache-2.0,"AWS Rust SDK Team , Russell Cohen " aws-sdk-cloudwatch,https://github.com/awslabs/aws-sdk-rust,Apache-2.0,"AWS Rust SDK Team , Russell Cohen " @@ -55,6 +60,7 @@ aws-smithy-eventstream,https://github.com/awslabs/smithy-rs,Apache-2.0,"AWS Rust aws-smithy-http,https://github.com/awslabs/smithy-rs,Apache-2.0,"AWS Rust SDK Team , Russell Cohen " aws-smithy-http-tower,https://github.com/awslabs/smithy-rs,Apache-2.0,"AWS Rust SDK Team , Russell Cohen " aws-smithy-json,https://github.com/awslabs/smithy-rs,Apache-2.0,"AWS Rust SDK Team , John DiSanti " +aws-smithy-protocol-test,https://github.com/awslabs/smithy-rs,Apache-2.0,"AWS Rust SDK Team , Russell Cohen " aws-smithy-query,https://github.com/awslabs/smithy-rs,Apache-2.0,"AWS Rust SDK Team , John DiSanti " aws-smithy-types,https://github.com/awslabs/smithy-rs,Apache-2.0,"AWS Rust SDK Team , Russell Cohen " aws-smithy-xml,https://github.com/awslabs/smithy-rs,Apache-2.0,"AWS Rust SDK Team , Russell Cohen " @@ -67,6 +73,7 @@ azure_storage,https://github.com/azure/azure-sdk-for-rust,MIT,Microsoft Corp. azure_storage_blobs,https://github.com/azure/azure-sdk-for-rust,MIT,Microsoft Corp. backoff,https://github.com/ihrwein/backoff,MIT OR Apache-2.0,Tibor Benke backon,https://github.com/Xuanwo/backon,Apache-2.0,Xuanwo +backtrace,https://github.com/rust-lang/backtrace-rs,MIT OR Apache-2.0,The Rust Project Developers base16,https://github.com/thomcc/rust-base16,CC0-1.0,Thom Chiovoloni base64,https://github.com/marshallpierce/rust-base64,MIT OR Apache-2.0,"Alice Maz , Marshall Pierce " base64-simd,https://github.com/Nugine/simd,MIT,The base64-simd Authors @@ -136,6 +143,7 @@ crossterm,https://github.com/crossterm-rs/crossterm,MIT,T. Post crossterm_winapi,https://github.com/crossterm-rs/crossterm-winapi,MIT,T. Post crypto-common,https://github.com/RustCrypto/traits,MIT OR Apache-2.0,RustCrypto Developers csv,https://github.com/BurntSushi/rust-csv,Unlicense OR MIT,Andrew Gallant +ctor,https://github.com/mmastrac/rust-ctor,Apache-2.0 OR MIT,Matt Mastracci ctr,https://github.com/RustCrypto/block-modes,MIT OR Apache-2.0,RustCrypto Developers cty,https://github.com/japaric/cty,MIT OR Apache-2.0,Jorge Aparicio curve25519-dalek,https://github.com/dalek-cryptography/curve25519-dalek,BSD-3-Clause,"Isis Lovecruft , Henry de Valence " @@ -147,7 +155,9 @@ data-url,https://github.com/servo/rust-url,MIT OR Apache-2.0,Simon Sapin der,https://github.com/RustCrypto/formats/tree/master/der,Apache-2.0 OR MIT,RustCrypto Developers derivative,https://github.com/mcarton/rust-derivative,MIT OR Apache-2.0,mcarton +derive_arbitrary,https://github.com/rust-fuzz/arbitrary,MIT OR Apache-2.0,"The Rust-Fuzz Project Developers, Nick Fitzgerald , Manish Goregaokar , Andre Bogus , Corey Farwell " derive_more,https://github.com/JelteF/derive_more,MIT,Jelte Fennema +diff,https://github.com/utkarshkukreti/diff.rs,MIT OR Apache-2.0,Utkarsh Kukreti digest,https://github.com/RustCrypto/traits,MIT OR Apache-2.0,RustCrypto Developers dirs,https://github.com/soc/dirs-rs,MIT OR Apache-2.0,Simon Ochsenreither dirs-next,https://github.com/xdg-rs/dirs,MIT OR Apache-2.0,The @xdg-rs members @@ -166,6 +176,7 @@ enum-as-inner,https://github.com/bluejekyll/enum-as-inner,MIT OR Apache-2.0,Benj enum_dispatch,https://gitlab.com/antonok/enum_dispatch,MIT OR Apache-2.0,Anton Lazarev enumflags2,https://github.com/meithecatte/enumflags2,MIT OR Apache-2.0,"maik klein , Maja Kądziołka " env_logger,https://github.com/env-logger-rs/env_logger,MIT OR Apache-2.0,The Rust Project Developers +equivalent,https://github.com/cuviper/equivalent,Apache-2.0 OR MIT,The equivalent Authors erased-serde,https://github.com/dtolnay/erased-serde,MIT OR Apache-2.0,David Tolnay errno,https://github.com/lambda-fairy/rust-errno,MIT OR Apache-2.0,Chris Wong errno-dragonfly,https://github.com/mneumann/errno-dragonfly-rs,MIT,Michael Neumann @@ -174,6 +185,7 @@ error-code,https://github.com/DoumanAsh/error-code,BSL-1.0,Douman executor-trait,https://github.com/amqp-rs/executor-trait,Apache-2.0 OR MIT,Marc-Antoine Perennou exitcode,https://github.com/benwilber/exitcode,Apache-2.0,Ben Wilber +extend,https://github.com/davidpdrsn/ext,MIT,David Pedersen fakedata_generator,https://github.com/kevingimbel/fakedata_generator,MIT,Kevin Gimbel fallible-iterator,https://github.com/sfackler/rust-fallible-iterator,MIT OR Apache-2.0,Steven Fackler fastrand,https://github.com/smol-rs/fastrand,Apache-2.0 OR MIT,Stjepan Glavina @@ -204,6 +216,7 @@ futures-util,https://github.com/rust-lang/futures-rs,MIT OR Apache-2.0,The futur generic-array,https://github.com/fizyk20/generic-array,MIT,"Bartłomiej Kamiński , Aaron Trent " getrandom,https://github.com/rust-random/getrandom,MIT OR Apache-2.0,The Rand Project Developers ghost,https://github.com/dtolnay/ghost,MIT OR Apache-2.0,David Tolnay +gimli,https://github.com/gimli-rs/gimli,MIT OR Apache-2.0,The gimli Authors glob,https://github.com/rust-lang/glob,MIT OR Apache-2.0,The Rust Project Developers goauth,https://github.com/durch/rust-goauth,MIT,Drazen Urch governor,https://github.com/antifuchs/governor,MIT,Andreas Fuchs @@ -234,6 +247,7 @@ hyper,https://github.com/hyperium/hyper,MIT,Sean McArthur hyper-openssl,https://github.com/sfackler/hyper-openssl,MIT OR Apache-2.0,Steven Fackler hyper-proxy,https://github.com/tafia/hyper-proxy,MIT,Johann Tuffe hyper-rustls,https://github.com/ctz/hyper-rustls,Apache-2.0 OR ISC OR MIT,Joseph Birr-Pixton +hyper-rustls,https://github.com/ctz/hyper-rustls,Apache-2.0 OR ISC OR MIT,The hyper-rustls Authors hyper-timeout,https://github.com/hjr3/hyper-timeout,MIT OR Apache-2.0,Herman J. Radtke III hyper-tls,https://github.com/hyperium/hyper-tls,MIT OR Apache-2.0,Sean McArthur hyperlocal,https://github.com/softprops/hyperlocal,MIT,softprops @@ -336,6 +350,7 @@ num_threads,https://github.com/jhpratt/num_threads,MIT OR Apache-2.0,Jacob Pratt number_prefix,https://github.com/ogham/rust-number-prefix,MIT,Benjamin Sago oauth2,https://github.com/ramosbugs/oauth2-rs,MIT OR Apache-2.0,"Alex Crichton , Florin Lipan , David A. Ramos " objc,http://github.com/SSheldon/rust-objc,MIT,Steven Sheldon +object,https://github.com/gimli-rs/object,Apache-2.0 OR MIT,The object Authors ofb,https://github.com/RustCrypto/block-modes,MIT OR Apache-2.0,RustCrypto Developers once_cell,https://github.com/matklad/once_cell,MIT OR Apache-2.0,Aleksey Kladov onig,http://github.com/iwillspeak/rust-onig,MIT,"Will Speak , Ivan Ivashchenko " @@ -347,6 +362,7 @@ openssl-macros,https://github.com/sfackler/rust-openssl,MIT OR Apache-2.0,The op openssl-probe,https://github.com/alexcrichton/openssl-probe,MIT OR Apache-2.0,Alex Crichton openssl-sys,https://github.com/sfackler/rust-openssl,MIT,"Alex Crichton , Steven Fackler " ordered-float,https://github.com/reem/rust-ordered-float,MIT,"Jonathan Reem , Matt Brubeck " +output_vt100,https://github.com/Phundrak/output-vt100-rs,MIT,Phuntsok Drak-pa outref,https://github.com/Nugine/outref,MIT,The outref Authors overload,https://github.com/danaugrs/overload,MIT,Daniel Salvadori pad,https://github.com/ogham/rust-pad,MIT,Ben S @@ -372,6 +388,7 @@ postgres-openssl,https://github.com/sfackler/rust-postgres,MIT OR Apache-2.0,Ste postgres-protocol,https://github.com/sfackler/rust-postgres,MIT OR Apache-2.0,Steven Fackler postgres-types,https://github.com/sfackler/rust-postgres,MIT OR Apache-2.0,Steven Fackler ppv-lite86,https://github.com/cryptocorrosion/cryptocorrosion,MIT OR Apache-2.0,The CryptoCorrosion Contributors +pretty_assertions,https://github.com/rust-pretty-assertions/rust-pretty-assertions,MIT OR Apache-2.0,"Colin Kiegel , Florent Fayolle , Tom Milligan " prettydiff,https://github.com/romankoblov/prettydiff,MIT,Roman Koblov prettytable-rs,https://github.com/phsym/prettytable-rs,BSD-3-Clause,Pierre-Henri Symoneaux proc-macro-crate,https://github.com/bkchr/proc-macro-crate,Apache-2.0 OR MIT,Bastian Köcher @@ -381,6 +398,7 @@ proc-macro2,https://github.com/dtolnay/proc-macro2,MIT OR Apache-2.0,"David Toln proptest,https://github.com/proptest-rs/proptest,MIT OR Apache-2.0,Jason Lingle prost,https://github.com/tokio-rs/prost,Apache-2.0,"Dan Burkert , Lucio Franco " prost-derive,https://github.com/tokio-rs/prost,Apache-2.0,"Dan Burkert , Lucio Franco , Tokio Contributors " +prost-reflect,https://github.com/andrewhickman/prost-reflect,MIT OR Apache-2.0,Andrew Hickman ptr_meta,https://github.com/djkoloski/ptr_meta,MIT,David Koloski pulsar,https://github.com/streamnative/pulsar-rs,MIT OR Apache-2.0,"Colin Stearns , Kevin Stenerson , Geoffroy Couprie " quad-rand,https://github.com/not-fl3/quad-rand,MIT,not-fl3 @@ -416,8 +434,10 @@ rmp,https://github.com/3Hren/msgpack-rust,MIT,Evgeny Safronov rmpv,https://github.com/3Hren/msgpack-rust,MIT,Evgeny Safronov roaring,https://github.com/RoaringBitmap/roaring-rs,MIT OR Apache-2.0,"Wim Looman , Kerollmops " +roxmltree,https://github.com/RazrFalcon/roxmltree,MIT OR Apache-2.0,Evgeniy Reizner roxmltree,https://github.com/RazrFalcon/roxmltree,MIT OR Apache-2.0,Yevhenii Reizner rust_decimal,https://github.com/paupino/rust-decimal,MIT,Paul Mason +rustc-demangle,https://github.com/alexcrichton/rustc-demangle,MIT OR Apache-2.0,Alex Crichton rustc-hash,https://github.com/rust-lang-nursery/rustc-hash,Apache-2.0 OR MIT,The Rust Project Developers rustc_version,https://github.com/Kimundi/rustc-version-rs,MIT OR Apache-2.0,Marvin Löbel rustc_version_runtime,https://github.com/seppo0010/rustc-version-runtime-rs,MIT,Sebastian Waisbrot @@ -469,6 +489,7 @@ signal-hook,https://github.com/vorner/signal-hook,Apache-2.0 OR MIT,"Michal 'vor signal-hook-registry,https://github.com/vorner/signal-hook,Apache-2.0 OR MIT,"Michal 'vorner' Vaner , Masaki Hara " signatory,https://github.com/iqlusioninc/crates/tree/main/signatory,Apache-2.0 OR MIT,Tony Arcieri signature,https://github.com/RustCrypto/traits/tree/master/signature,Apache-2.0 OR MIT,RustCrypto Developers +simd-abstraction,https://github.com/Nugine/simd,MIT,The simd-abstraction Authors simpl,https://github.com/durch/simplerr,MIT,Drazen Urch siphasher,https://github.com/jedisct1/rust-siphash,MIT OR Apache-2.0,Frank Denis sketches-ddsketch,https://github.com/mheffner/rust-sketches-ddsketch,Apache-2.0,Mike Heffner @@ -522,7 +543,7 @@ tokio-postgres,https://github.com/sfackler/rust-postgres,MIT OR Apache-2.0,Steve tokio-rustls,https://github.com/tokio-rs/tls,MIT OR Apache-2.0,quininer kel tokio-tungstenite,https://github.com/snapview/tokio-tungstenite,MIT,"Daniel Abramov , Alexey Galakhov " toml,https://github.com/toml-rs/toml,MIT OR Apache-2.0,Alex Crichton -toml_edit,https://github.com/ordian/toml_edit,MIT OR Apache-2.0,"Andronik Ordian , Ed Page " +toml_edit,https://github.com/toml-rs/toml,MIT OR Apache-2.0,"Andronik Ordian , Ed Page " tonic,https://github.com/hyperium/tonic,MIT,Lucio Franco tower,https://github.com/tower-rs/tower,MIT,Tower Maintainers tower-http,https://github.com/tower-rs/tower-http,MIT,Tower Maintainers @@ -564,11 +585,9 @@ utf8-width,https://github.com/magiclen/utf8-width,MIT,Magic Len , Christian Duerr " uuid,https://github.com/uuid-rs/uuid,Apache-2.0 OR MIT,"Ashley Mannix, Christopher Armstrong, Dylan DPC, Hunar Roop Kahlon" valuable,https://github.com/tokio-rs/valuable,MIT,The valuable Authors -value,https://github.com/vectordotdev/vrl,MPL-2.0,Vector Contributors vec_map,https://github.com/contain-rs/vec-map,MIT OR Apache-2.0,"Alex Crichton , Jorge Aparicio , Alexis Beingessner , Brian Anderson <>, tbu- <>, Manish Goregaokar <>, Aaron Turon , Adolfo Ochagavía <>, Niko Matsakis <>, Steven Fackler <>, Chase Southwood , Eduard Burtescu <>, Florian Wilkens <>, Félix Raimundo <>, Tibor Benke <>, Markus Siemens , Josh Branchaud , Huon Wilson , Corey Farwell , Aaron Liblong <>, Nick Cameron , Patrick Walton , Felix S Klock II <>, Andrew Paseltiner , Sean McArthur , Vadim Petrochenkov <>" void,https://github.com/reem/rust-void,MIT,Jonathan Reem vrl,https://github.com/vectordotdev/vrl,MPL-2.0,Vector Contributors -vsimd,https://github.com/Nugine/simd,MIT,The vsimd Authors vte,https://github.com/alacritty/vte,Apache-2.0 OR MIT,"Joe Wilm , Christian Duerr " vte_generate_state_changes,https://github.com/jwilm/vte,Apache-2.0 OR MIT,Christian Duerr wait-timeout,https://github.com/alexcrichton/wait-timeout,MIT OR Apache-2.0,Alex Crichton @@ -607,9 +626,10 @@ winnow,https://github.com/winnow-rs/winnow,MIT,The winnow Authors winreg,https://github.com/gentoo90/winreg-rs,MIT,Igor Shaula woothee,https://github.com/woothee/woothee-rust,Apache-2.0,hhatto wyz,https://github.com/myrrlyn/wyz,MIT,myrrlyn -xml-rs,https://github.com/netvl/xml-rs,MIT,Vladimir Matveev +xml-rs,https://github.com/kornelski/xml-rs,MIT,Vladimir Matveev xmlparser,https://github.com/RazrFalcon/xmlparser,MIT OR Apache-2.0,Evgeniy Reizner yaml-rust,https://github.com/chyh1990/yaml-rust,MIT OR Apache-2.0,Yuheng Chen +yansi,https://github.com/SergioBenitez/yansi,MIT OR Apache-2.0,Sergio Benitez zerocopy,https://fuchsia.googlesource.com/fuchsia/+/HEAD/src/lib/zerocopy,BSD-2-Clause,Joshua Liebow-Feeser zerocopy-derive,https://github.com/google/zerocopy,BSD-2-Clause,Joshua Liebow-Feeser zeroize,https://github.com/RustCrypto/utils/tree/master/zeroize,Apache-2.0 OR MIT,The RustCrypto Project Developers diff --git a/NOTICE b/NOTICE index e085f4b436c94..1c1d999241f54 100644 --- a/NOTICE +++ b/NOTICE @@ -1,4 +1,7 @@ +Unless explicitly stated otherwise all files in this repository are licensed under the Mozilla +Public License, version 2.0 (MPL-2.0). +This product includes software developed at Datadog (https://www.datadoghq.com/) Copyright (c) 2020 Vector Authors diff --git a/README.md b/README.md index cb6c089fe9084..1ab1fb21812a2 100644 --- a/README.md +++ b/README.md @@ -89,7 +89,6 @@ Vector**][docs.installation]. * [**Community**][urls.vector_community] - [chat][urls.vector_chat], [calendar][urls.vector_calendar], [@vectordotdev][urls.vector_twitter] * [**Releases**][urls.vector_releases] -* [**Roadmap**][urls.vector_roadmap] - [vote on new features][urls.vote_feature] * **Policies** - [Code of Conduct][urls.vector_code_of_conduct], [Privacy][urls.vector_privacy_policy], [Releases][urls.vector_releases_policy], [Security][urls.vector_security_policy], [Versioning][urls.vector_versioning_policy] ## Comparisons @@ -221,7 +220,6 @@ Vector is an end-to-end, unified, open data platform. [urls.vector_release_policy]: https://github.com/vectordotdev/vector/blob/master/RELEASING.md [urls.vector_releases]: https://vector.dev/releases/ [urls.vector_releases_policy]: https://github.com/vectordotdev/vector/blob/master/RELEASES.md -[urls.vector_roadmap]: https://roadmap.vector.dev [urls.vector_security_policy]: https://github.com/vectordotdev/vector/security/policy [urls.vector_test_harness]: https://github.com/vectordotdev/vector-test-harness/ [urls.vector_twitter]: https://twitter.com/vectordotdev diff --git a/STYLE.md b/STYLE.md index a1403ecb7fe4b..4e3e70818f708 100644 --- a/STYLE.md +++ b/STYLE.md @@ -24,6 +24,18 @@ As an additional note, `rustfmt` sometimes can fail to format code within macros to see such code that doesn't look like it's formatted correctly, you may need to manually tweak it if `rustfmt` cannot be persuaded to format it correctly for you. :) +### Const strings + +When re-typing the same raw string literal more than once, this can lead to typo +errors, especially when names ares similar. In general, when reasonable, it is +preferred to use [Compile-time constants](https://doc.rust-lang.org/std/keyword.const.html) +when dealing with non-dynamic strings. For example, when working with field names +for event metadata. + +As this has not always been a consistently enforced code style for the project, +please take the opportunity to update existing raw strings to use constants +when modifying existing code + ## Code Organization Code is primarily split into two main directories: `lib/` and `src/`. diff --git a/Tiltfile b/Tiltfile index 1766beef83e69..6c0c9246042b4 100644 --- a/Tiltfile +++ b/Tiltfile @@ -7,7 +7,7 @@ load('ext://helm_resource', 'helm_resource', 'helm_repo') docker_build( ref='timberio/vector', context='.', - build_args={'RUST_VERSION': '1.69.0'}, + build_args={'RUST_VERSION': '1.70.0'}, dockerfile='tilt/Dockerfile' ) diff --git a/build.rs b/build.rs index 20cfd23d52a88..42316c102da33 100644 --- a/build.rs +++ b/build.rs @@ -1,5 +1,8 @@ use std::{collections::HashSet, env, fs::File, io::Write, path::Path, process::Command}; +#[cfg(feature = "protobuf-build")] +use std::path::PathBuf; + struct TrackedEnv { tracked: HashSet, } @@ -124,8 +127,19 @@ fn main() { println!("cargo:rerun-if-changed=proto/google/rpc/status.proto"); println!("cargo:rerun-if-changed=proto/vector.proto"); + // Create and store the "file descriptor set" from the compiled Protocol Buffers packages. + // + // This allows us to use runtime reflection to manually build Protocol Buffers payloads + // in a type-safe way, which is necessary for incrementally building certain payloads, like + // the ones generated in the `datadog_metrics` sink. + let protobuf_fds_path = + PathBuf::from(std::env::var("OUT_DIR").expect("OUT_DIR environment variable not set")) + .join("protobuf-fds.bin"); + let mut prost_build = prost_build::Config::new(); - prost_build.btree_map(["."]); + prost_build + .btree_map(["."]) + .file_descriptor_set_path(protobuf_fds_path); tonic_build::configure() .protoc_arg("--experimental_allow_proto3_optional") diff --git a/distribution/docker/alpine/Dockerfile b/distribution/docker/alpine/Dockerfile index 3ca4900b4fe2d..c3479f1001191 100644 --- a/distribution/docker/alpine/Dockerfile +++ b/distribution/docker/alpine/Dockerfile @@ -1,4 +1,4 @@ -FROM docker.io/alpine:3.14 AS builder +FROM docker.io/alpine:3.18 AS builder WORKDIR /vector @@ -7,7 +7,7 @@ RUN tar -xvf vector-0*-"$(cat /etc/apk/arch)"-unknown-linux-musl*.tar.gz --strip RUN mkdir -p /var/lib/vector -FROM docker.io/alpine:3.14 +FROM docker.io/alpine:3.18 RUN apk --no-cache add ca-certificates tzdata COPY --from=builder /vector/bin/* /usr/local/bin/ diff --git a/distribution/docker/distroless-static/Dockerfile b/distribution/docker/distroless-static/Dockerfile index dc5ef9cd955bd..874c165d64457 100644 --- a/distribution/docker/distroless-static/Dockerfile +++ b/distribution/docker/distroless-static/Dockerfile @@ -1,4 +1,4 @@ -FROM docker.io/alpine:3.14 AS builder +FROM docker.io/alpine:3.18 AS builder WORKDIR /vector diff --git a/distribution/install.sh b/distribution/install.sh index b4f72e03bcccd..4fbfb8f10b059 100755 --- a/distribution/install.sh +++ b/distribution/install.sh @@ -12,7 +12,7 @@ set -u # If PACKAGE_ROOT is unset or empty, default it. PACKAGE_ROOT="${PACKAGE_ROOT:-"https://packages.timber.io/vector"}" -VECTOR_VERSION="0.30.0" +VECTOR_VERSION="0.31.0" _divider="--------------------------------------------------------------------------------" _prompt=">>>" _indent=" " @@ -76,9 +76,11 @@ main() { ;; --no-modify-path) modify_path=no + shift ;; -y) prompt=no + shift ;; *) ;; @@ -139,6 +141,7 @@ install_from_archive() { assert_nz "$_arch" "arch" local _archive_arch="" + case "$_arch" in x86_64-apple-darwin) _archive_arch=$_arch @@ -152,13 +155,13 @@ install_from_archive() { aarch64-*linux*) _archive_arch="aarch64-unknown-linux-musl" ;; - armv7-*linux*-gnu) + armv7-*linux*-gnueabihf) _archive_arch="armv7-unknown-linux-gnueabihf" ;; - armv7-*linux*-musl) + armv7-*linux*-musleabihf) _archive_arch="armv7-unknown-linux-musleabihf" ;; - *) + *) err "unsupported arch: $_arch" ;; esac diff --git a/distribution/kubernetes/vector-agent/README.md b/distribution/kubernetes/vector-agent/README.md index 4c23f60fef5e4..58bc8ff221193 100644 --- a/distribution/kubernetes/vector-agent/README.md +++ b/distribution/kubernetes/vector-agent/README.md @@ -1,6 +1,6 @@ The kubernetes manifests found in this directory have been automatically generated from the [helm chart `vector/vector`](https://github.com/vectordotdev/helm-charts/tree/master/charts/vector) -version 0.21.1 with the following `values.yaml`: +version 0.22.0 with the following `values.yaml`: ```yaml role: Agent diff --git a/distribution/kubernetes/vector-agent/configmap.yaml b/distribution/kubernetes/vector-agent/configmap.yaml index 444a9830b7cf9..d7c928b159f7c 100644 --- a/distribution/kubernetes/vector-agent/configmap.yaml +++ b/distribution/kubernetes/vector-agent/configmap.yaml @@ -8,7 +8,7 @@ metadata: app.kubernetes.io/name: vector app.kubernetes.io/instance: vector app.kubernetes.io/component: Agent - app.kubernetes.io/version: "0.29.1-distroless-libc" + app.kubernetes.io/version: "0.30.0-distroless-libc" data: agent.yaml: | data_dir: /vector-data-dir diff --git a/distribution/kubernetes/vector-agent/daemonset.yaml b/distribution/kubernetes/vector-agent/daemonset.yaml index 7c41dffdd16fc..8508420956313 100644 --- a/distribution/kubernetes/vector-agent/daemonset.yaml +++ b/distribution/kubernetes/vector-agent/daemonset.yaml @@ -8,7 +8,7 @@ metadata: app.kubernetes.io/name: vector app.kubernetes.io/instance: vector app.kubernetes.io/component: Agent - app.kubernetes.io/version: "0.29.1-distroless-libc" + app.kubernetes.io/version: "0.30.0-distroless-libc" annotations: {} spec: selector: @@ -29,7 +29,7 @@ spec: dnsPolicy: ClusterFirst containers: - name: vector - image: "timberio/vector:0.29.1-distroless-libc" + image: "timberio/vector:0.30.0-distroless-libc" imagePullPolicy: IfNotPresent args: - --config-dir diff --git a/distribution/kubernetes/vector-agent/rbac.yaml b/distribution/kubernetes/vector-agent/rbac.yaml index b02da7d71b441..2ad572473e7d7 100644 --- a/distribution/kubernetes/vector-agent/rbac.yaml +++ b/distribution/kubernetes/vector-agent/rbac.yaml @@ -10,7 +10,7 @@ metadata: app.kubernetes.io/name: vector app.kubernetes.io/instance: vector app.kubernetes.io/component: Agent - app.kubernetes.io/version: "0.29.1-distroless-libc" + app.kubernetes.io/version: "0.30.0-distroless-libc" rules: - apiGroups: - "" @@ -31,7 +31,7 @@ metadata: app.kubernetes.io/name: vector app.kubernetes.io/instance: vector app.kubernetes.io/component: Agent - app.kubernetes.io/version: "0.29.1-distroless-libc" + app.kubernetes.io/version: "0.30.0-distroless-libc" roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole diff --git a/distribution/kubernetes/vector-agent/service-headless.yaml b/distribution/kubernetes/vector-agent/service-headless.yaml index aa06b662acef9..18ea854b8f9e7 100644 --- a/distribution/kubernetes/vector-agent/service-headless.yaml +++ b/distribution/kubernetes/vector-agent/service-headless.yaml @@ -8,7 +8,7 @@ metadata: app.kubernetes.io/name: vector app.kubernetes.io/instance: vector app.kubernetes.io/component: Agent - app.kubernetes.io/version: "0.29.1-distroless-libc" + app.kubernetes.io/version: "0.30.0-distroless-libc" annotations: spec: clusterIP: None diff --git a/distribution/kubernetes/vector-agent/serviceaccount.yaml b/distribution/kubernetes/vector-agent/serviceaccount.yaml index f64c260c94d3a..18d7093b583f8 100644 --- a/distribution/kubernetes/vector-agent/serviceaccount.yaml +++ b/distribution/kubernetes/vector-agent/serviceaccount.yaml @@ -8,5 +8,5 @@ metadata: app.kubernetes.io/name: vector app.kubernetes.io/instance: vector app.kubernetes.io/component: Agent - app.kubernetes.io/version: "0.29.1-distroless-libc" + app.kubernetes.io/version: "0.30.0-distroless-libc" automountServiceAccountToken: true diff --git a/distribution/kubernetes/vector-aggregator/README.md b/distribution/kubernetes/vector-aggregator/README.md index 5f1fdd0160509..194d80cc3ebb1 100644 --- a/distribution/kubernetes/vector-aggregator/README.md +++ b/distribution/kubernetes/vector-aggregator/README.md @@ -1,6 +1,6 @@ The kubernetes manifests found in this directory have been automatically generated from the [helm chart `vector/vector`](https://github.com/vectordotdev/helm-charts/tree/master/charts/vector) -version 0.21.1 with the following `values.yaml`: +version 0.22.0 with the following `values.yaml`: ```yaml diff --git a/distribution/kubernetes/vector-aggregator/configmap.yaml b/distribution/kubernetes/vector-aggregator/configmap.yaml index 0fae6e2357dd9..028a2f273a075 100644 --- a/distribution/kubernetes/vector-aggregator/configmap.yaml +++ b/distribution/kubernetes/vector-aggregator/configmap.yaml @@ -8,7 +8,7 @@ metadata: app.kubernetes.io/name: vector app.kubernetes.io/instance: vector app.kubernetes.io/component: Aggregator - app.kubernetes.io/version: "0.29.1-distroless-libc" + app.kubernetes.io/version: "0.30.0-distroless-libc" data: aggregator.yaml: | data_dir: /vector-data-dir diff --git a/distribution/kubernetes/vector-aggregator/service-headless.yaml b/distribution/kubernetes/vector-aggregator/service-headless.yaml index fb06b330de493..06cad3551b635 100644 --- a/distribution/kubernetes/vector-aggregator/service-headless.yaml +++ b/distribution/kubernetes/vector-aggregator/service-headless.yaml @@ -8,7 +8,7 @@ metadata: app.kubernetes.io/name: vector app.kubernetes.io/instance: vector app.kubernetes.io/component: Aggregator - app.kubernetes.io/version: "0.29.1-distroless-libc" + app.kubernetes.io/version: "0.30.0-distroless-libc" annotations: spec: clusterIP: None diff --git a/distribution/kubernetes/vector-aggregator/service.yaml b/distribution/kubernetes/vector-aggregator/service.yaml index 897c1e1d59c24..449a24950bc1e 100644 --- a/distribution/kubernetes/vector-aggregator/service.yaml +++ b/distribution/kubernetes/vector-aggregator/service.yaml @@ -8,7 +8,7 @@ metadata: app.kubernetes.io/name: vector app.kubernetes.io/instance: vector app.kubernetes.io/component: Aggregator - app.kubernetes.io/version: "0.29.1-distroless-libc" + app.kubernetes.io/version: "0.30.0-distroless-libc" annotations: spec: ports: diff --git a/distribution/kubernetes/vector-aggregator/serviceaccount.yaml b/distribution/kubernetes/vector-aggregator/serviceaccount.yaml index 0a45553520cf3..0bf2da2d58d3b 100644 --- a/distribution/kubernetes/vector-aggregator/serviceaccount.yaml +++ b/distribution/kubernetes/vector-aggregator/serviceaccount.yaml @@ -8,5 +8,5 @@ metadata: app.kubernetes.io/name: vector app.kubernetes.io/instance: vector app.kubernetes.io/component: Aggregator - app.kubernetes.io/version: "0.29.1-distroless-libc" + app.kubernetes.io/version: "0.30.0-distroless-libc" automountServiceAccountToken: true diff --git a/distribution/kubernetes/vector-aggregator/statefulset.yaml b/distribution/kubernetes/vector-aggregator/statefulset.yaml index b1fceaa085f17..2eef56ffd5ad9 100644 --- a/distribution/kubernetes/vector-aggregator/statefulset.yaml +++ b/distribution/kubernetes/vector-aggregator/statefulset.yaml @@ -8,7 +8,7 @@ metadata: app.kubernetes.io/name: vector app.kubernetes.io/instance: vector app.kubernetes.io/component: Aggregator - app.kubernetes.io/version: "0.29.1-distroless-libc" + app.kubernetes.io/version: "0.30.0-distroless-libc" annotations: {} spec: replicas: 1 @@ -32,7 +32,7 @@ spec: dnsPolicy: ClusterFirst containers: - name: vector - image: "timberio/vector:0.29.1-distroless-libc" + image: "timberio/vector:0.30.0-distroless-libc" imagePullPolicy: IfNotPresent args: - --config-dir diff --git a/distribution/kubernetes/vector-stateless-aggregator/README.md b/distribution/kubernetes/vector-stateless-aggregator/README.md index a921045ffdb63..2703746f5d435 100644 --- a/distribution/kubernetes/vector-stateless-aggregator/README.md +++ b/distribution/kubernetes/vector-stateless-aggregator/README.md @@ -1,6 +1,6 @@ The kubernetes manifests found in this directory have been automatically generated from the [helm chart `vector/vector`](https://github.com/vectordotdev/helm-charts/tree/master/charts/vector) -version 0.21.1 with the following `values.yaml`: +version 0.22.0 with the following `values.yaml`: ```yaml role: Stateless-Aggregator diff --git a/distribution/kubernetes/vector-stateless-aggregator/configmap.yaml b/distribution/kubernetes/vector-stateless-aggregator/configmap.yaml index 2d5a0971e3d77..766c693669f7b 100644 --- a/distribution/kubernetes/vector-stateless-aggregator/configmap.yaml +++ b/distribution/kubernetes/vector-stateless-aggregator/configmap.yaml @@ -8,7 +8,7 @@ metadata: app.kubernetes.io/name: vector app.kubernetes.io/instance: vector app.kubernetes.io/component: Stateless-Aggregator - app.kubernetes.io/version: "0.29.1-distroless-libc" + app.kubernetes.io/version: "0.30.0-distroless-libc" data: aggregator.yaml: | data_dir: /vector-data-dir diff --git a/distribution/kubernetes/vector-stateless-aggregator/deployment.yaml b/distribution/kubernetes/vector-stateless-aggregator/deployment.yaml index ecd7b3bfd61e7..6ff20dc958816 100644 --- a/distribution/kubernetes/vector-stateless-aggregator/deployment.yaml +++ b/distribution/kubernetes/vector-stateless-aggregator/deployment.yaml @@ -8,7 +8,7 @@ metadata: app.kubernetes.io/name: vector app.kubernetes.io/instance: vector app.kubernetes.io/component: Stateless-Aggregator - app.kubernetes.io/version: "0.29.1-distroless-libc" + app.kubernetes.io/version: "0.30.0-distroless-libc" annotations: {} spec: replicas: 1 @@ -30,7 +30,7 @@ spec: dnsPolicy: ClusterFirst containers: - name: vector - image: "timberio/vector:0.29.1-distroless-libc" + image: "timberio/vector:0.30.0-distroless-libc" imagePullPolicy: IfNotPresent args: - --config-dir diff --git a/distribution/kubernetes/vector-stateless-aggregator/service-headless.yaml b/distribution/kubernetes/vector-stateless-aggregator/service-headless.yaml index 3a9b4da3bc67e..3230af57fbd76 100644 --- a/distribution/kubernetes/vector-stateless-aggregator/service-headless.yaml +++ b/distribution/kubernetes/vector-stateless-aggregator/service-headless.yaml @@ -8,7 +8,7 @@ metadata: app.kubernetes.io/name: vector app.kubernetes.io/instance: vector app.kubernetes.io/component: Stateless-Aggregator - app.kubernetes.io/version: "0.29.1-distroless-libc" + app.kubernetes.io/version: "0.30.0-distroless-libc" annotations: spec: clusterIP: None diff --git a/distribution/kubernetes/vector-stateless-aggregator/service.yaml b/distribution/kubernetes/vector-stateless-aggregator/service.yaml index 7096ca16a604a..a22d86ff0925f 100644 --- a/distribution/kubernetes/vector-stateless-aggregator/service.yaml +++ b/distribution/kubernetes/vector-stateless-aggregator/service.yaml @@ -8,7 +8,7 @@ metadata: app.kubernetes.io/name: vector app.kubernetes.io/instance: vector app.kubernetes.io/component: Stateless-Aggregator - app.kubernetes.io/version: "0.29.1-distroless-libc" + app.kubernetes.io/version: "0.30.0-distroless-libc" annotations: spec: ports: diff --git a/distribution/kubernetes/vector-stateless-aggregator/serviceaccount.yaml b/distribution/kubernetes/vector-stateless-aggregator/serviceaccount.yaml index e41ed49a7442e..50bba163cf9f5 100644 --- a/distribution/kubernetes/vector-stateless-aggregator/serviceaccount.yaml +++ b/distribution/kubernetes/vector-stateless-aggregator/serviceaccount.yaml @@ -8,5 +8,5 @@ metadata: app.kubernetes.io/name: vector app.kubernetes.io/instance: vector app.kubernetes.io/component: Stateless-Aggregator - app.kubernetes.io/version: "0.29.1-distroless-libc" + app.kubernetes.io/version: "0.30.0-distroless-libc" automountServiceAccountToken: true diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md deleted file mode 100644 index cebda9652551b..0000000000000 --- a/docs/CONTRIBUTING.md +++ /dev/null @@ -1,227 +0,0 @@ -# Contributing - -First, thank you for contributing to Vector! The goal of this document is to -provide everything you need to start contributing to Vector. The -following TOC is sorted progressively, starting with the basics and -expanding into more specifics. Everyone from a first time contributor to a -Vector team member will find this document useful. - -- [Introduction](#introduction) -- [Your First Contribution](#your-first-contribution) - - [New sources, sinks, and transforms](#new-sources-sinks-and-transforms) -- [Workflow](#workflow) - - [Git Branches](#git-branches) - - [Git Commits](#git-commits) - - [Style](#style) - - [GitHub Pull Requests](#github-pull-requests) - - [Title](#title) - - [Reviews & Approvals](#reviews--approvals) - - [Merge Style](#merge-style) - - [CI](#ci) - - [Releasing](#releasing) - - [Testing](#testing) - - [Skipping tests](#skipping-tests) - - [Daily tests](#daily-tests) - - [Flakey tests](#flakey-tests) - - [Test harness](#test-harness) - - [Deprecations](#deprecations) - - [Dependencies](#dependencies) -- [Next steps](#next-steps) -- [Legal](#legal) - - [Contributor License Agreement](#contributor-license-agreement) - - [Granted rights and copyright assignment](#granted-rights-and-copyright-assignment) - -## Introduction - -1. **You're familiar with [GitHub](https://github.com) and the pull request - workflow.** -2. **You've read Vector's [docs](https://vector.dev/docs/).** -3. **You know about the [Vector community](https://vector.dev/community/). - Please use this for help.** - -## Your First Contribution - -1. Ensure your change has an issue! Find an - [existing issue][urls.existing_issues] or [open a new issue][urls.new_issue]. - - This is where you can get a feel if the change will be accepted or not. - Changes that are questionable will have a `needs: approval` label. -2. Once approved, [fork the Vector repository][urls.fork_repo] in your own - GitHub account (only applicable to outside contributors). -3. [Create a new Git branch][urls.create_branch]. -4. Make your changes. -5. [Submit the branch as a pull request][urls.submit_pr] to the main Vector - repo. A Vector team member should comment and/or review your pull request - within a few days. Although, depending on the circumstances, it may take - longer. - -### New sources, sinks, and transforms - -If you're contributing a new source, sink, or transform to Vector, thank you that's way cool! There's a few steps you -need to think about if you want to make sure we can merge your contribution. We're here to help you along with these steps, -but they are a blocker to getting a new integration released. - -To merge a new source, sink, or transform, you need to: - -- [ ] Add tests, especially integration tests if your contribution connects to an external service. -- [ ] Add instrumentation so folks using your integration can get insight into how it's working and performing. You can -see some [example of instrumentation in existing integrations](https://github.com/vectordotdev/vector/tree/master/src/internal_events). -- [ ] Add documentation. You can see [examples in the `docs` directory](https://github.com/vectordotdev/vector/blob/master/docs). - -## Workflow - -### Git Branches - -_All_ changes must be made in a branch and submitted as [pull requests](#github-pull-requests). -Vector does not adopt any type of branch naming style, but please use something -descriptive of your changes. - -### Git Commits - -#### Style - -Please ensure your commits are small and focused; they should tell a story of -your change. This helps reviewers to follow your changes, especially for more -complex changes. - -### GitHub Pull Requests - -Once your changes are ready you must submit your branch as a [pull request](https://github.com/vectordotdev/vector/pulls). - -#### Title - -The pull request title must follow the format outlined in the [conventional commits spec](https://www.conventionalcommits.org). -[Conventional commits](https://www.conventionalcommits.org) is a standardized -format for commit messages. Vector only requires this format for commits on -the `master` branch. And because Vector squashes commits before merging -branches, this means that only the pull request title must conform to this -format. Vector performs a pull request check to verify the pull request title -in case you forget. - -A list of allowed sub-categories is defined -[here](https://github.com/vectordotdev/vector/tree/master/.github). - -The following are all good examples of pull request titles: - -```text -feat(new sink): new `xyz` sink -feat(tcp source): add foo bar baz feature -fix(tcp source): fix foo bar baz bug -chore: improve build process -docs: fix typos -``` - -#### Reviews & Approvals - -All pull requests should be reviewed by: - -- No review required for cosmetic changes like whitespace, typos, and spelling - by a maintainer -- One Vector team member for minor changes or trivial changes from contributors -- Two Vector team members for major changes -- Three Vector team members for RFCs - -If CODEOWNERS are assigned, a review from an individual from each of the sets of owners is required. - -#### Merge Style - -All pull requests are squashed and merged. We generally discourage large pull -requests that are over 300-500 lines of diff. If you would like to propose a -change that is larger we suggest coming onto our [Discord server](https://chat.vector.dev/) and discuss it -with one of our engineers. This way we can talk through the solution and -discuss if a change that large is even needed! This will produce a quicker -response to the change and likely produce code that aligns better with our -process. - -### CI - -Currently, Vector uses GitHub Actions to run tests. The workflows are defined in -`.github/workflows`. - -#### Releasing - -GitHub Actions is responsible for releasing updated versions of Vector through -various channels. - -#### Testing - -##### Skipping tests - -Tests are run for all changes except those that have the label: - -```text -ci-condition: skip -``` - -##### Daily tests - -Some long-running tests are only run daily, rather than on every pull request. -If needed, an administrator can kick off these tests manually via the button on -the [nightly build action -page](https://github.com/vectordotdev/vector/actions?query=workflow%3Anightly) - -#### Flakey tests - -Historically, we've had some trouble with tests being flakey. If your PR does -not have passing tests: - -- Ensure that the test failures are unrelated to your change - - Is it failing on master? - - Does it fail if you rerun CI? - - Can you reproduce locally? -- Find or open an issue for the test failure - ([example](https://github.com/vectordotdev/vector/issues/3781)) -- Link the PR in the issue for the failing test so that there are more examples - -##### Test harness - -You can invoke the [test harness][urls.vector_test_harness] by commenting on -any pull request with: - -```bash -/test -t -``` - -### Deprecations - -When deprecating functionality in Vector, see [DEPRECATION.md](DEPRECATION.md). - -### Dependencies - -When adding, modifying, or removing a dependency in Vector you may find that you need to update the -inventory of third-party licenses maintained in `LICENSE-3rdparty.csv`. This file is generated using -[rust-license-tool](https://github.com/DataDog/rust-license-tool.git) and can be updated using -`cargo vdev build licenses`. - -## Next steps - -As discussed in the [`README`](README.md), you should continue to the following -documents: - -1. **[DEVELOPING.md](DEVELOPING.md)** - Everything necessary to develop -2. **[DOCUMENTING.md](DOCUMENTING.md)** - Preparing your change for Vector users -3. **[DEPRECATION.md](DEPRECATION.md)** - Deprecating functionality in Vector - -## Legal - -To protect all users of Vector, the following legal requirements are made. -If you have additional questions, please [contact us]. - -### Contributor License Agreement - -Vector requires all contributors to sign the Contributor License Agreement -(CLA). This gives Vector the right to use your contribution as well as ensuring -that you own your contributions and can use them for other purposes. - -The full text of the CLA can be found at [https://cla.datadoghq.com/vectordotdev/vector](https://cla.datadoghq.com/vectordotdev/vector). - -### Granted rights and copyright assignment - -This is covered by the CLA. - -[contact us]: https://vector.dev/community -[urls.create_branch]: https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-and-deleting-branches-within-your-repository -[urls.existing_issues]: https://github.com/vectordotdev/vector/issues -[urls.fork_repo]: https://help.github.com/en/github/getting-started-with-github/fork-a-repo -[urls.new_issue]: https://github.com/vectordotdev/vector/issues/new -[urls.submit_pr]: https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request-from-a-fork -[urls.vector_test_harness]: https://github.com/vectordotdev/vector-test-harness/ diff --git a/docs/DEVELOPING.md b/docs/DEVELOPING.md index 39b802755e03c..d7512a980bd6c 100644 --- a/docs/DEVELOPING.md +++ b/docs/DEVELOPING.md @@ -8,6 +8,7 @@ - [Makefile](#makefile) - [Code style](#code-style) - [Logging style](#logging-style) + - [Panics](#panics) - [Feature flags](#feature-flags) - [Dependencies](#dependencies) - [Guidelines](#guidelines) @@ -122,6 +123,8 @@ Loosely, you'll need the following: - **To build Vector:** Have working Rustup, Protobuf tools, C++/C build tools (LLVM, GCC, or MSVC), Python, and Perl, `make` (the GNU one preferably), `bash`, `cmake`, `GNU coreutils`, and `autotools`. - **To run integration tests:** Have `docker` available, or a real live version of that service. (Use `AUTOSPAWN=false`) - **To run `make check-component-features`:** Have `remarshal` installed. +- **To run `make check-licenses` or `cargo vdev build licenses`:** Have `rust-license-tool` [installed](https://github.com/DataDog/rust-license-tool). +- **To run `cargo vdev build component-docs`:** Have `cue` [installed](https://cuelang.org/docs/install/). If you find yourself needing to run something inside the Docker environment described above, that's totally fine, they won't collide or hurt each other. In this case, you'd just run `make environment-generate`. @@ -155,6 +158,8 @@ cargo bench transforms::example # Format your code before pushing! make fmt cargo fmt +# Build component documentation for the website +cargo vdev build component-docs ``` If you run `make` you'll see a full list of all our tasks. Some of these will start Docker containers, sign commits, or even make releases. These are not common development commands and your mileage may vary. diff --git a/docs/DOCUMENTING.md b/docs/DOCUMENTING.md index 1b1dc230ba2f7..33546631e252b 100644 --- a/docs/DOCUMENTING.md +++ b/docs/DOCUMENTING.md @@ -9,12 +9,14 @@ documentation in tandem with code changes. 1. [Responsibilities](#responsibilities) 2. [Reference documentation](#reference-documentation) - 1. [Formatting](#formatting) - 2. [Validating](#validating) + 1. [Installing CUE](#installing-cue) + 2. [Generating from source code](#generating-from-source-code) + 3. [Formatting](#formatting) + 4. [Validating](#validating) 1. [Tips & tricks](#tips--tricks) 1. [Make small incremental changes](#make-small-incremental-changes) - 3. [Changelog](#changelog) - 4. [Release highlights](#release-highlights) + 5. [Changelog](#changelog) + 6. [Release highlights](#release-highlights) 1. [FAQ](#faq) 1. [What makes a release highlight noteworthy?](#what-makes-a-release-highlight-noteworthy) 2. [How is a release highlight different from a blog post?](#how-is-a-release-highlight-different-from-a-blog-post) @@ -53,6 +55,15 @@ version that Vector depends on. Currently Vector is using `v0.5.0`. Using a CUE version different than this may result in CUE check/build errors. We are aiming to improve the developer experience around external tool dependencies ([#15909](https://github.com/vectordotdev/vector/issues/15909)). +### Generating from source code + +Much of Vector's reference documentation is automatically compiled from source code (e.g., doc comments). +To regenerate this content, run: + +```bash +cargo vdev build component-docs +``` + ### Formatting Vector has some CUE-related CI checks that are run whenever changes are made to diff --git a/docs/specs/component.md b/docs/specs/component.md index eaa819c3f16b4..20e43b4bf5ec3 100644 --- a/docs/specs/component.md +++ b/docs/specs/component.md @@ -114,7 +114,8 @@ _All components_ MUST emit a `ComponentEventsReceived` event that represents the reception of Vector events from an upstream component. - Emission - - MUST emit immediately after creating or receiving Vector events. + - MUST emit immediately after creating or receiving Vector events, before modification or metadata + is added. - Properties - `count` - The count of Vector events. - `byte_size` - The estimated JSON byte size of all events received. @@ -130,9 +131,11 @@ the reception of Vector events from an upstream component. #### ComponentBytesReceived -*Sources* MUST emit a `ComponentBytesReceived` event immediately after receiving, decompressing -and filtering bytes from the upstream source and before the creation of a Vector event. +*Sources* MUST emit a `ComponentBytesReceived` event that represent the reception of bytes. +- Emission + - MUST emit immediately after receiving, decompressing and filtering bytes from the upstream + source and before the creation of a Vector event. - Properties - `byte_size` - For UDP, TCP, and Unix protocols, the total number of bytes received from @@ -155,13 +158,13 @@ and filtering bytes from the upstream source and before the creation of a Vector #### ComponentBytesSent -*Sinks* that send events downstream, MUST emit a `ComponentBytesSent` event immediately after -sending bytes to the downstream target, if the transmission was successful. The reported bytes MUST -be before compression. - -Note that for sinks that simply expose data, but don't delete the data after -sending it, like the `prometheus_exporter` sink, SHOULD NOT publish this metric. +*Sinks* MUST emit a `ComponentBytesSent` event that represent the transmission of bytes. +- Emission + - MUST emit a `ComponentBytesSent` event immediately after sending bytes to the downstream target, + if the transmission was successful. The reported bytes MUST be before compression. + - Note that sinks that simply expose data, but don't delete the data after sending it, like the + `prometheus_exporter` sink, SHOULD NOT emit this metric. - Properties - `byte_size` - For UDP, TCP, and Unix protocols, the total number of bytes placed on the diff --git a/docs/specs/configuration.md b/docs/specs/configuration.md index 104da9e2506ae..9e9a06e16760b 100644 --- a/docs/specs/configuration.md +++ b/docs/specs/configuration.md @@ -9,10 +9,12 @@ interpreted as described in [RFC 2119]. - [Introduction](#introduction) - [Scope](#scope) - [Terminology](#terminology) + - [Flag](#flag) - [Entity](#entity) - [Option](#option) - [Schema](#schema) - [Naming](#naming) + - [Flag naming](#flag-naming) - [Entity naming](#entity-naming) - [Option naming](#option-naming) - [Types](#types) @@ -37,6 +39,10 @@ relevant specifications, such as the [component specification]. ## Terminology +### Flag + +"Flag" refers to a CLI flag provided when running Vector. + ### Entity "Entity" refers to a Vector concept used to model Vector's processing graph. @@ -53,6 +59,16 @@ under entities and also used to define global Vector behavior. ### Naming +#### Flag naming + +- MUST only contain ASCII alphanumeric, lowercase, and hyphens +- MUST be in kebab-case format when multiple words are used (e.g., `config-dir`) +- For flags that take a value, but are also able to be "disabled", they SHOULD NOT use a sentinel + value. Instead they SHOULD have a second flag added prefixed with `no-` and SHOULD leave off any + unit suffixes. For example, to disable `--graceful-shutdown-limit-secs`, + a `--no-graceful-shutdown` flag was added. Vector MUST NOT allow both the flag and its negative to + be specified at the same time. + #### Entity naming - MUST only contain ASCII alphanumeric, lowercase, and underscores @@ -64,7 +80,8 @@ under entities and also used to define global Vector behavior. - MUST only contain ASCII alphanumeric, lowercase, and underscores - MUST be in snake case format when multiple words are used (e.g., `timeout_seconds`) - SHOULD use nouns, not verbs, as names (e.g., `fingerprint` instead of `fingerprinting`) -- MUST suffix options with their _full_ unit name (e.g., `_seconds`, `_bytes`, etc.) +- MUST suffix options with their _full_ unit name (e.g., `_megabytes` rather than `_mb`) or the + following abbreviations for time units: `_secs`, `_ms`, `_ns`. - SHOULD consistent with units within the same scope. (e.g., don't mix seconds and milliseconds) - MUST NOT repeat the name space in the option name (e.g., `fingerprint.bytes` instead of `fingerprint.fingerprint_bytes`) diff --git a/docs/tutorials/sinks/1_basic_sink.md b/docs/tutorials/sinks/1_basic_sink.md index 4e999d227e822..18194636eb469 100644 --- a/docs/tutorials/sinks/1_basic_sink.md +++ b/docs/tutorials/sinks/1_basic_sink.md @@ -22,16 +22,7 @@ Provide some module level comments to explain what the sink does. Let's setup all the imports we will need for the tutorial: ```rust -use super::Healthcheck; -use crate::config::{GenerateConfig, SinkConfig, SinkContext}; -use futures::{stream::BoxStream, StreamExt}; -use vector_common::finalization::{EventStatus, Finalizable}; -use vector_config::configurable_component; -use vector_core::{ - config::{AcknowledgementsConfig, Input}, - event::Event, - sink::{StreamSink, VectorSink}, -}; +use crate::prelude::*; ``` # Configuration @@ -42,7 +33,7 @@ is deserialized to the fields in this struct so the user can customise the sink's behaviour. ```rust -#[configurable_component(sink("basic"))] +#[configurable_component(sink("basic", "Basic sink."))] #[derive(Clone, Debug)] /// A basic sink that dumps its output to stdout. pub struct BasicConfig { @@ -84,10 +75,12 @@ configuration for the sink. # SinkConfig We need to implement the [`SinkConfig`][sink_config] trait. This is used by -Vector to generate the main Sink from the configuration. +Vector to generate the main Sink from the configuration. Note that type name +given to `typetag` below must match the name of the configurable component above. ```rust #[async_trait::async_trait] +#[typetag::serde(name = "basic")] impl SinkConfig for BasicConfig { async fn build(&self, _cx: SinkContext) -> crate::Result<(VectorSink, Healthcheck)> { let healthcheck = Box::pin(async move { Ok(()) }); @@ -207,59 +200,6 @@ sinks-logs = [ "sinks-chronicle", ``` -## Module - -Import this module into Vector. In `src/sinks/mod.rs` add the lines: - - -```diff - #[cfg(feature = "sinks-azure_monitor_logs")] - pub mod azure_monitor_logs; -+ #[cfg(feature = "sinks-basic")] -+ pub mod basic; - #[cfg(feature = "sinks-blackhole")] - pub mod blackhole; -``` - -All sinks are feature gated, this allows us to build custom versions of Vector -with only the components required. We will ignore the feature flag for now with -our new basic sink. - -Next, each sink needs to be added to the [`Sinks`][sinks_enum] enum. Find the -enum in `mod.rs` and add our new sink to it. - -```diff -#[configurable_component] -#[allow(clippy::large_enum_variant)] -#[derive(Clone, Debug)] -#[serde(tag = "type", rename_all = "snake_case")] -#[enum_dispatch(SinkConfig)] -pub enum Sinks { - ... - -+ /// Basic -+ #[cfg(feature = "sinks-basic")] -+ Basic(#[configurable(derived)] basic::BasicConfig), - - ... - -``` - -Then we need to add this to the `get_component_name` function defined below. - -```diff - - fn get_component_name(&self) -> &'static str { - match self { - ... - -+ #[cfg(feature = "sinks-basic")] -+ Self::Basic(config) => config.get_component_name(), - - ... - -``` - # Acknowledgements When our sink finishes processing the event, it needs to acknowledge this so diff --git a/docs/tutorials/sinks/2_http_sink.md b/docs/tutorials/sinks/2_http_sink.md index ed99ca4105d4b..66fcb2e4d6f97 100644 --- a/docs/tutorials/sinks/2_http_sink.md +++ b/docs/tutorials/sinks/2_http_sink.md @@ -12,32 +12,11 @@ To start, update our imports to the following: use std::task::Poll; use crate::{ - config::{GenerateConfig, SinkConfig, SinkContext}, + sinks::prelude::*, http::HttpClient, internal_events::SinkRequestBuildError, - sinks::util::{ - encoding::{write_all, Encoder}, - metadata::RequestMetadataBuilder, - request_builder::EncodeResult, - Compression, RequestBuilder, SinkBuilderExt, - }, - sinks::Healthcheck, }; use bytes::Bytes; -use futures::{future::BoxFuture, stream::BoxStream, StreamExt}; -use vector_common::{ - finalization::{EventFinalizers, EventStatus, Finalizable}, - internal_event::CountByteSize, - request_metadata::{MetaDescriptive, RequestMetadata}, -}; -use vector_config::configurable_component; -use vector_core::{ - config::{AcknowledgementsConfig, Input}, - event::Event, - sink::{StreamSink, VectorSink}, - stream::DriverResponse, - tls::TlsSettings, -}; ``` # Configuration @@ -387,9 +366,9 @@ impl DriverResponse for BasicResponse { EventStatus::Delivered } - fn events_sent(&self) -> CountByteSize { + fn events_sent(&self) -> RequestCountByteSize { // (events count, byte size) - CountByteSize(1, self.byte_size) + CountByteSize(1, self.byte_size).into() } } ``` diff --git a/lib/codecs/Cargo.toml b/lib/codecs/Cargo.toml index 661be7af6989d..612951fc488cc 100644 --- a/lib/codecs/Cargo.toml +++ b/lib/codecs/Cargo.toml @@ -15,11 +15,11 @@ dyn-clone = { version = "1", default-features = false } faster-hex = "0.8" lookup = { package = "vector-lookup", path = "../vector-lookup", default-features = false } memchr = { version = "2", default-features = false } -once_cell = { version = "1.17", default-features = false } +once_cell = { version = "1.18", default-features = false } ordered-float = { version = "3.7.0", default-features = false } prometheus-remote-write = { version = "1.1", tag = "v1.1.3", git = "ssh://git@github.com/answerbook/prometheus-remote-write-rs.git" } prost = { version = "0.11.8", default-features = false, features = ["std"] } -regex = { version = "1.8.1", default-features = false, features = ["std", "perf"] } +regex = { version = "1.8.4", default-features = false, features = ["std", "perf"] } serde = { version = "1", default-features = false, features = ["derive"] } serde_json = { version = "1", default-features = false } smallvec = { version = "1", default-features = false, features = ["union"] } @@ -28,7 +28,7 @@ snap = "1" syslog_loose = { version = "0.18", default-features = false, optional = true } tokio-util = { version = "0.7", default-features = false, features = ["codec"] } tracing = { version = "0.1", default-features = false } -vrl = { git = "ssh://git@github.com/answerbook/vrl.git", rev = "v0.6.0", default-features = false, features = ["value"] } +vrl.workspace = true vector-common = { path = "../vector-common", default-features = false } vector-config = { path = "../vector-config", default-features = false } vector-config-common = { path = "../vector-config-common", default-features = false } diff --git a/lib/codecs/src/decoding/format/gelf.rs b/lib/codecs/src/decoding/format/gelf.rs index 050bcdfc21912..e5b7dbe96c315 100644 --- a/lib/codecs/src/decoding/format/gelf.rs +++ b/lib/codecs/src/decoding/format/gelf.rs @@ -1,9 +1,11 @@ use bytes::Bytes; use chrono::{DateTime, NaiveDateTime, Utc}; +use derivative::Derivative; use lookup::{event_path, owned_value_path, PathPrefix}; use serde::{Deserialize, Serialize}; use smallvec::{smallvec, SmallVec}; use std::collections::HashMap; +use vector_config::configurable_component; use vector_core::config::LogNamespace; use vector_core::{ config::{log_schema, DataType}, @@ -14,7 +16,7 @@ use vector_core::{ use vrl::value::kind::Collection; use vrl::value::{Kind, Value}; -use super::Deserializer; +use super::{default_lossy, Deserializer}; use crate::{gelf_fields::*, VALID_FIELD_REGEX}; /// On GELF decoding behavior: @@ -24,13 +26,28 @@ use crate::{gelf_fields::*, VALID_FIELD_REGEX}; /// of vector will still work with the new relaxed decoding. /// Config used to build a `GelfDeserializer`. -#[derive(Debug, Clone, Default, Deserialize, Serialize)] -pub struct GelfDeserializerConfig; +#[configurable_component] +#[derive(Debug, Clone, Default)] +pub struct GelfDeserializerConfig { + /// GELF-specific decoding options. + #[serde( + default, + skip_serializing_if = "vector_core::serde::skip_serializing_if_default" + )] + pub gelf: GelfDeserializerOptions, +} impl GelfDeserializerConfig { + /// Creates a new `GelfDeserializerConfig`. + pub fn new(options: GelfDeserializerOptions) -> Self { + Self { gelf: options } + } + /// Build the `GelfDeserializer` from this configuration. pub fn build(&self) -> GelfDeserializer { - GelfDeserializer::default() + GelfDeserializer { + lossy: self.gelf.lossy, + } } /// Return the type of event built by this deserializer. @@ -60,21 +77,36 @@ impl GelfDeserializerConfig { } } -/// Deserializer that builds an `Event` from a byte frame containing a GELF log -/// message. -#[derive(Debug, Clone)] -pub struct GelfDeserializer; +/// GELF-specific decoding options. +#[configurable_component] +#[derive(Debug, Clone, PartialEq, Eq, Derivative)] +#[derivative(Default)] +pub struct GelfDeserializerOptions { + /// Determines whether or not to replace invalid UTF-8 sequences instead of failing. + /// + /// When true, invalid UTF-8 sequences are replaced with the [`U+FFFD REPLACEMENT CHARACTER`][U+FFFD]. + /// + /// [U+FFFD]: https://en.wikipedia.org/wiki/Specials_(Unicode_block)#Replacement_character + #[serde( + default = "default_lossy", + skip_serializing_if = "vector_core::serde::skip_serializing_if_default" + )] + #[derivative(Default(value = "default_lossy()"))] + pub lossy: bool, +} -impl Default for GelfDeserializer { - fn default() -> Self { - Self::new() - } +/// Deserializer that builds an `Event` from a byte frame containing a GELF log message. +#[derive(Debug, Clone, Derivative)] +#[derivative(Default)] +pub struct GelfDeserializer { + #[derivative(Default(value = "default_lossy()"))] + lossy: bool, } impl GelfDeserializer { - /// Create a new GelfDeserializer - pub fn new() -> GelfDeserializer { - GelfDeserializer + /// Create a new `GelfDeserializer`. + pub fn new(lossy: bool) -> GelfDeserializer { + GelfDeserializer { lossy } } /// Builds a LogEvent from the parsed GelfMessage. @@ -195,10 +227,10 @@ impl Deserializer for GelfDeserializer { bytes: Bytes, _log_namespace: LogNamespace, ) -> vector_common::Result> { - let line = std::str::from_utf8(&bytes)?; - let line = line.trim(); - - let parsed: GelfMessage = serde_json::from_str(line)?; + let parsed: GelfMessage = match self.lossy { + true => serde_json::from_str(&String::from_utf8_lossy(&bytes)), + false => serde_json::from_slice(&bytes), + }?; let event = self.message_to_event(&parsed)?; Ok(smallvec![event]) @@ -220,7 +252,7 @@ mod tests { fn deserialize_gelf_input( input: &serde_json::Value, ) -> vector_common::Result> { - let config = GelfDeserializerConfig; + let config = GelfDeserializerConfig::default(); let deserializer = config.build(); let buffer = Bytes::from(serde_json::to_vec(&input).unwrap()); deserializer.parse(buffer, LogNamespace::Legacy) diff --git a/lib/codecs/src/decoding/format/json.rs b/lib/codecs/src/decoding/format/json.rs index 32f28e5e58436..67e7bc624bbdf 100644 --- a/lib/codecs/src/decoding/format/json.rs +++ b/lib/codecs/src/decoding/format/json.rs @@ -2,9 +2,10 @@ use std::convert::TryInto; use bytes::Bytes; use chrono::Utc; +use derivative::Derivative; use lookup::PathPrefix; -use serde::{Deserialize, Serialize}; use smallvec::{smallvec, SmallVec}; +use vector_config::configurable_component; use vector_core::{ config::{log_schema, DataType, LogNamespace}, event::Event, @@ -12,13 +13,26 @@ use vector_core::{ }; use vrl::value::Kind; -use super::Deserializer; +use super::{default_lossy, Deserializer}; /// Config used to build a `JsonDeserializer`. -#[derive(Debug, Clone, Default, Deserialize, Serialize)] -pub struct JsonDeserializerConfig; +#[configurable_component] +#[derive(Debug, Clone, Default)] +pub struct JsonDeserializerConfig { + /// JSON-specific decoding options. + #[serde( + default, + skip_serializing_if = "vector_core::serde::skip_serializing_if_default" + )] + pub json: JsonDeserializerOptions, +} impl JsonDeserializerConfig { + /// Creates a new `JsonDeserializerConfig`. + pub fn new(options: JsonDeserializerOptions) -> Self { + Self { json: options } + } + /// Build the `JsonDeserializer` from this configuration. pub fn build(&self) -> JsonDeserializer { Into::::into(self) @@ -54,21 +68,36 @@ impl JsonDeserializerConfig { } } -impl JsonDeserializerConfig { - /// Creates a new `JsonDeserializerConfig`. - pub fn new() -> Self { - Default::default() - } +/// JSON-specific decoding options. +#[configurable_component] +#[derive(Debug, Clone, PartialEq, Eq, Derivative)] +#[derivative(Default)] +pub struct JsonDeserializerOptions { + /// Determines whether or not to replace invalid UTF-8 sequences instead of failing. + /// + /// When true, invalid UTF-8 sequences are replaced with the [`U+FFFD REPLACEMENT CHARACTER`][U+FFFD]. + /// + /// [U+FFFD]: https://en.wikipedia.org/wiki/Specials_(Unicode_block)#Replacement_character + #[serde( + default = "default_lossy", + skip_serializing_if = "vector_core::serde::skip_serializing_if_default" + )] + #[derivative(Default(value = "default_lossy()"))] + pub lossy: bool, } /// Deserializer that builds `Event`s from a byte frame containing JSON. -#[derive(Debug, Clone, Default)] -pub struct JsonDeserializer; +#[derive(Debug, Clone, Derivative)] +#[derivative(Default)] +pub struct JsonDeserializer { + #[derivative(Default(value = "default_lossy()"))] + lossy: bool, +} impl JsonDeserializer { /// Creates a new `JsonDeserializer`. - pub fn new() -> Self { - Default::default() + pub fn new(lossy: bool) -> Self { + Self { lossy } } } @@ -84,8 +113,11 @@ impl Deserializer for JsonDeserializer { return Ok(smallvec![]); } - let json: serde_json::Value = serde_json::from_slice(&bytes) - .map_err(|error| format!("Error parsing JSON: {:?}", error))?; + let json: serde_json::Value = match self.lossy { + true => serde_json::from_str(&String::from_utf8_lossy(&bytes)), + false => serde_json::from_slice(&bytes), + } + .map_err(|error| format!("Error parsing JSON: {:?}", error))?; // If the root is an Array, split it into multiple events let mut events = match json { @@ -119,8 +151,10 @@ impl Deserializer for JsonDeserializer { } impl From<&JsonDeserializerConfig> for JsonDeserializer { - fn from(_: &JsonDeserializerConfig) -> Self { - Self + fn from(config: &JsonDeserializerConfig) -> Self { + Self { + lossy: config.json.lossy, + } } } @@ -133,7 +167,7 @@ mod tests { #[test] fn deserialize_json() { let input = Bytes::from(r#"{ "foo": 123 }"#); - let deserializer = JsonDeserializer::new(); + let deserializer = JsonDeserializer::default(); for namespace in [LogNamespace::Legacy, LogNamespace::Vector] { let events = deserializer.parse(input.clone(), namespace).unwrap(); @@ -160,7 +194,7 @@ mod tests { #[test] fn deserialize_json_array() { let input = Bytes::from(r#"[{ "foo": 123 }, { "bar": 456 }]"#); - let deserializer = JsonDeserializer::new(); + let deserializer = JsonDeserializer::default(); for namespace in [LogNamespace::Legacy, LogNamespace::Vector] { let events = deserializer.parse(input.clone(), namespace).unwrap(); let mut events = events.into_iter(); @@ -197,7 +231,7 @@ mod tests { #[test] fn deserialize_skip_empty() { let input = Bytes::from(""); - let deserializer = JsonDeserializer::new(); + let deserializer = JsonDeserializer::default(); for namespace in [LogNamespace::Legacy, LogNamespace::Vector] { let events = deserializer.parse(input.clone(), namespace).unwrap(); @@ -208,7 +242,44 @@ mod tests { #[test] fn deserialize_error_invalid_json() { let input = Bytes::from("{ foo"); - let deserializer = JsonDeserializer::new(); + let deserializer = JsonDeserializer::default(); + + for namespace in [LogNamespace::Legacy, LogNamespace::Vector] { + assert!(deserializer.parse(input.clone(), namespace).is_err()); + } + } + + #[test] + fn deserialize_lossy_replace_invalid_utf8() { + let input = Bytes::from(b"{ \"foo\": \"Hello \xF0\x90\x80World\" }".as_slice()); + let deserializer = JsonDeserializer::new(true); + + for namespace in [LogNamespace::Legacy, LogNamespace::Vector] { + let events = deserializer.parse(input.clone(), namespace).unwrap(); + let mut events = events.into_iter(); + + { + let event = events.next().unwrap(); + let log = event.as_log(); + assert_eq!(log["foo"], b"Hello \xEF\xBF\xBDWorld".into()); + assert_eq!( + log.get(( + lookup::PathPrefix::Event, + log_schema().timestamp_key().unwrap() + )) + .is_some(), + namespace == LogNamespace::Legacy + ); + } + + assert_eq!(events.next(), None); + } + } + + #[test] + fn deserialize_non_lossy_error_invalid_utf8() { + let input = Bytes::from(b"{ \"foo\": \"Hello \xF0\x90\x80World\" }".as_slice()); + let deserializer = JsonDeserializer::new(false); for namespace in [LogNamespace::Legacy, LogNamespace::Vector] { assert!(deserializer.parse(input.clone(), namespace).is_err()); diff --git a/lib/codecs/src/decoding/format/mezmo/open_telemetry/mod.rs b/lib/codecs/src/decoding/format/mezmo/open_telemetry/mod.rs index bda49d34a8f6c..4195aa53136e2 100644 --- a/lib/codecs/src/decoding/format/mezmo/open_telemetry/mod.rs +++ b/lib/codecs/src/decoding/format/mezmo/open_telemetry/mod.rs @@ -64,9 +64,7 @@ impl OpenTelemetryMetricDeserializer { /// Default Stream Framing for the Deserializer pub fn default_stream_framing() -> FramingConfig { - FramingConfig::NewlineDelimited { - newline_delimited: Default::default(), - } + FramingConfig::NewlineDelimited(Default::default()) } /// Content Type expected by Deserializer @@ -114,9 +112,7 @@ impl OpenTelemetryLogDeserializer { /// Default Stream Framing for the Deserializer pub fn default_stream_framing() -> FramingConfig { - FramingConfig::NewlineDelimited { - newline_delimited: Default::default(), - } + FramingConfig::NewlineDelimited(Default::default()) } /// Content Type expected by Deserializer @@ -161,9 +157,7 @@ impl OpenTelemetryTraceDeserializer { /// Default Stream Framing for the Deserializer pub fn default_stream_framing() -> FramingConfig { - FramingConfig::NewlineDelimited { - newline_delimited: Default::default(), - } + FramingConfig::NewlineDelimited(Default::default()) } /// Content Type expected by Deserializer diff --git a/lib/codecs/src/decoding/format/mezmo/prometheus_remote_write/mod.rs b/lib/codecs/src/decoding/format/mezmo/prometheus_remote_write/mod.rs index 267d7ba817901..0e62da4c46c23 100644 --- a/lib/codecs/src/decoding/format/mezmo/prometheus_remote_write/mod.rs +++ b/lib/codecs/src/decoding/format/mezmo/prometheus_remote_write/mod.rs @@ -54,9 +54,7 @@ impl PrometheusRemoteWriteDeserializer { /// Default Stream Framing for the Deserializer pub fn default_stream_framing() -> FramingConfig { - FramingConfig::NewlineDelimited { - newline_delimited: Default::default(), - } + FramingConfig::NewlineDelimited(Default::default()) } /// Content Type expected by Deserializer diff --git a/lib/codecs/src/decoding/format/mod.rs b/lib/codecs/src/decoding/format/mod.rs index 3f9c541e01c0a..76942d49af244 100644 --- a/lib/codecs/src/decoding/format/mod.rs +++ b/lib/codecs/src/decoding/format/mod.rs @@ -15,21 +15,23 @@ mod mezmo; use ::bytes::Bytes; use dyn_clone::DynClone; -pub use gelf::{GelfDeserializer, GelfDeserializerConfig}; -pub use json::{JsonDeserializer, JsonDeserializerConfig}; +pub use gelf::{GelfDeserializer, GelfDeserializerConfig, GelfDeserializerOptions}; +pub use json::{JsonDeserializer, JsonDeserializerConfig, JsonDeserializerOptions}; pub use mezmo::{ open_telemetry::{DeserializerError, OpenTelemetryMetricDeserializer}, MezmoDeserializer, }; pub use native::{NativeDeserializer, NativeDeserializerConfig}; -pub use native_json::{NativeJsonDeserializer, NativeJsonDeserializerConfig}; +pub use native_json::{ + NativeJsonDeserializer, NativeJsonDeserializerConfig, NativeJsonDeserializerOptions, +}; use smallvec::SmallVec; +#[cfg(feature = "syslog")] +pub use syslog::{SyslogDeserializer, SyslogDeserializerConfig, SyslogDeserializerOptions}; use vector_core::config::LogNamespace; use vector_core::event::Event; pub use self::bytes::{BytesDeserializer, BytesDeserializerConfig}; -#[cfg(feature = "syslog")] -pub use self::syslog::{SyslogDeserializer, SyslogDeserializerConfig}; /// Parse structured events from bytes. pub trait Deserializer: DynClone + Send + Sync { @@ -50,3 +52,8 @@ dyn_clone::clone_trait_object!(Deserializer); /// A `Box` containing a `Deserializer`. pub type BoxedDeserializer = Box; + +/// Default value for the UTF-8 lossy option. +const fn default_lossy() -> bool { + true +} diff --git a/lib/codecs/src/decoding/format/native_json.rs b/lib/codecs/src/decoding/format/native_json.rs index 038a36c69fa28..43e09ec86f5f6 100644 --- a/lib/codecs/src/decoding/format/native_json.rs +++ b/lib/codecs/src/decoding/format/native_json.rs @@ -1,21 +1,39 @@ use bytes::Bytes; -use serde::{Deserialize, Serialize}; +use derivative::Derivative; use smallvec::{smallvec, SmallVec}; +use vector_config::configurable_component; use vector_core::{config::DataType, event::Event, schema}; use vrl::value::kind::Collection; use vrl::value::Kind; -use super::Deserializer; +use super::{default_lossy, Deserializer}; use vector_core::config::LogNamespace; /// Config used to build a `NativeJsonDeserializer`. -#[derive(Debug, Clone, Default, Deserialize, Serialize)] -pub struct NativeJsonDeserializerConfig; +#[configurable_component] +#[derive(Debug, Clone, Default)] +pub struct NativeJsonDeserializerConfig { + /// Vector's native JSON-specific decoding options. + #[serde( + default, + skip_serializing_if = "vector_core::serde::skip_serializing_if_default" + )] + pub native_json: NativeJsonDeserializerOptions, +} impl NativeJsonDeserializerConfig { + /// Creates a new `NativeJsonDeserializerConfig`. + pub fn new(options: NativeJsonDeserializerOptions) -> Self { + Self { + native_json: options, + } + } + /// Build the `NativeJsonDeserializer` from this configuration. - pub const fn build(&self) -> NativeJsonDeserializer { - NativeJsonDeserializer + pub fn build(&self) -> NativeJsonDeserializer { + NativeJsonDeserializer { + lossy: self.native_json.lossy, + } } /// Return the type of event build by this deserializer. @@ -37,10 +55,32 @@ impl NativeJsonDeserializerConfig { } } +/// Vector's native JSON-specific decoding options. +#[configurable_component] +#[derive(Debug, Clone, PartialEq, Eq, Derivative)] +#[derivative(Default)] +pub struct NativeJsonDeserializerOptions { + /// Determines whether or not to replace invalid UTF-8 sequences instead of failing. + /// + /// When true, invalid UTF-8 sequences are replaced with the [`U+FFFD REPLACEMENT CHARACTER`][U+FFFD]. + /// + /// [U+FFFD]: https://en.wikipedia.org/wiki/Specials_(Unicode_block)#Replacement_character + #[serde( + default = "default_lossy", + skip_serializing_if = "vector_core::serde::skip_serializing_if_default" + )] + #[derivative(Default(value = "default_lossy()"))] + pub lossy: bool, +} + /// Deserializer that builds `Event`s from a byte frame containing Vector's native JSON /// representation. -#[derive(Debug, Clone, Default)] -pub struct NativeJsonDeserializer; +#[derive(Debug, Clone, Derivative)] +#[derivative(Default)] +pub struct NativeJsonDeserializer { + #[derivative(Default(value = "default_lossy()"))] + lossy: bool, +} impl Deserializer for NativeJsonDeserializer { fn parse( @@ -56,8 +96,11 @@ impl Deserializer for NativeJsonDeserializer { return Ok(smallvec![]); } - let json: serde_json::Value = serde_json::from_slice(&bytes) - .map_err(|error| format!("Error parsing JSON: {:?}", error))?; + let json: serde_json::Value = match self.lossy { + true => serde_json::from_str(&String::from_utf8_lossy(&bytes)), + false => serde_json::from_slice(&bytes), + } + .map_err(|error| format!("Error parsing JSON: {:?}", error))?; let events = match json { serde_json::Value::Array(values) => values @@ -79,7 +122,7 @@ mod test { #[test] fn parses_top_level_arrays() { - let config = NativeJsonDeserializerConfig; + let config = NativeJsonDeserializerConfig::default(); let deserializer = config.build(); let json1 = json!({"a": "b", "c": "d"}); diff --git a/lib/codecs/src/decoding/format/syslog.rs b/lib/codecs/src/decoding/format/syslog.rs index 68824b0e923d5..336d7c1aa232f 100644 --- a/lib/codecs/src/decoding/format/syslog.rs +++ b/lib/codecs/src/decoding/format/syslog.rs @@ -1,11 +1,13 @@ use bytes::Bytes; use chrono::{DateTime, Datelike, Utc}; +use derivative::Derivative; use lookup::lookup_v2::parse_value_path; use lookup::{event_path, owned_value_path, OwnedTargetPath, OwnedValuePath, PathPrefix}; -use serde::{Deserialize, Serialize}; use smallvec::{smallvec, SmallVec}; +use std::borrow::Cow; use std::collections::BTreeMap; use syslog_loose::{IncompleteDate, Message, ProcId, Protocol}; +use vector_config::configurable_component; use vector_core::config::{LegacyKey, LogNamespace}; use vector_core::{ config::{log_schema, DataType}, @@ -14,25 +16,46 @@ use vector_core::{ }; use vrl::value::{kind::Collection, Kind}; -use super::Deserializer; +use super::{default_lossy, Deserializer}; /// Config used to build a `SyslogDeserializer`. -#[derive(Debug, Clone, Default, Deserialize, Serialize)] +#[configurable_component] +#[derive(Debug, Clone, Default)] pub struct SyslogDeserializerConfig { + #[serde(skip)] source: Option<&'static str>, + + /// Syslog-specific decoding options. + #[serde( + default, + skip_serializing_if = "vector_core::serde::skip_serializing_if_default" + )] + pub syslog: SyslogDeserializerOptions, } impl SyslogDeserializerConfig { + /// Creates a new `SyslogDeserializerConfig`. + pub fn new(options: SyslogDeserializerOptions) -> Self { + Self { + source: None, + syslog: options, + } + } + /// Create the `SyslogDeserializer` from the given source name. pub fn from_source(source: &'static str) -> Self { Self { source: Some(source), + ..Default::default() } } /// Build the `SyslogDeserializer` from this configuration. pub const fn build(&self) -> SyslogDeserializer { - SyslogDeserializer { source: None } + SyslogDeserializer { + source: self.source, + lossy: self.syslog.lossy, + } } /// Return the type of event build by this deserializer. @@ -71,7 +94,11 @@ impl SyslogDeserializerConfig { ) .optional_field(&owned_value_path!("facility"), Kind::bytes(), None) .optional_field(&owned_value_path!("version"), Kind::integer(), None) - .optional_field(&owned_value_path!("appname"), Kind::bytes(), None) + .optional_field( + &owned_value_path!("appname"), + Kind::bytes(), + Some("service"), + ) .optional_field(&owned_value_path!("msgid"), Kind::bytes(), None) .optional_field( &owned_value_path!("procid"), @@ -112,7 +139,11 @@ impl SyslogDeserializerConfig { ) .optional_field(&owned_value_path!("facility"), Kind::bytes(), None) .optional_field(&owned_value_path!("version"), Kind::integer(), None) - .optional_field(&owned_value_path!("appname"), Kind::bytes(), None) + .optional_field( + &owned_value_path!("appname"), + Kind::bytes(), + Some("service"), + ) .optional_field(&owned_value_path!("msgid"), Kind::bytes(), None) .optional_field( &owned_value_path!("procid"), @@ -172,7 +203,7 @@ impl SyslogDeserializerConfig { None, &owned_value_path!("appname"), Kind::bytes().or_undefined(), - None, + Some("service"), ) .with_source_metadata( source, @@ -210,14 +241,35 @@ impl SyslogDeserializerConfig { } } +/// Syslog-specific decoding options. +#[configurable_component] +#[derive(Debug, Clone, PartialEq, Eq, Derivative)] +#[derivative(Default)] +pub struct SyslogDeserializerOptions { + /// Determines whether or not to replace invalid UTF-8 sequences instead of failing. + /// + /// When true, invalid UTF-8 sequences are replaced with the [`U+FFFD REPLACEMENT CHARACTER`][U+FFFD]. + /// + /// [U+FFFD]: https://en.wikipedia.org/wiki/Specials_(Unicode_block)#Replacement_character + #[serde( + default = "default_lossy", + skip_serializing_if = "vector_core::serde::skip_serializing_if_default" + )] + #[derivative(Default(value = "default_lossy()"))] + pub lossy: bool, +} + /// Deserializer that builds an `Event` from a byte frame containing a syslog /// message. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Derivative)] +#[derivative(Default)] pub struct SyslogDeserializer { /// The syslog source needs it's own syslog deserializer separate from the /// syslog codec since it needs to handle the structured of the decoded data /// differently when using the Vector lognamespace. pub source: Option<&'static str>, + #[derivative(Default(value = "default_lossy()"))] + lossy: bool, } impl Deserializer for SyslogDeserializer { @@ -226,7 +278,10 @@ impl Deserializer for SyslogDeserializer { bytes: Bytes, log_namespace: LogNamespace, ) -> vector_common::Result> { - let line = std::str::from_utf8(&bytes)?; + let line: Cow = match self.lossy { + true => String::from_utf8_lossy(&bytes), + false => Cow::from(std::str::from_utf8(&bytes)?), + }; let line = line.trim(); let parsed = syslog_loose::parse_message_with_year_exact(line, resolve_year)?; @@ -441,7 +496,7 @@ mod tests { let input = Bytes::from("<34>1 2003-10-11T22:14:15.003Z mymachine.example.com su - ID47 - MSG"); - let deserializer = SyslogDeserializer { source: None }; + let deserializer = SyslogDeserializer::default(); let events = deserializer.parse(input, LogNamespace::Legacy).unwrap(); assert_eq!(events.len(), 1); @@ -457,7 +512,7 @@ mod tests { let input = Bytes::from("<34>1 2003-10-11T22:14:15.003Z mymachine.example.com su - ID47 - MSG"); - let deserializer = SyslogDeserializer { source: None }; + let deserializer = SyslogDeserializer::default(); let events = deserializer.parse(input, LogNamespace::Vector).unwrap(); assert_eq!(events.len(), 1); diff --git a/lib/codecs/src/decoding/framing/character_delimited.rs b/lib/codecs/src/decoding/framing/character_delimited.rs index ef7e49c63f759..2c4d0ef918125 100644 --- a/lib/codecs/src/decoding/framing/character_delimited.rs +++ b/lib/codecs/src/decoding/framing/character_delimited.rs @@ -1,6 +1,5 @@ use bytes::{Buf, Bytes, BytesMut}; use memchr::memchr; -use serde::{Deserialize, Serialize}; use tokio_util::codec::Decoder; use tracing::{trace, warn}; use vector_config::configurable_component; @@ -8,7 +7,8 @@ use vector_config::configurable_component; use super::BoxedFramingError; /// Config used to build a `CharacterDelimitedDecoder`. -#[derive(Debug, Clone, Deserialize, Serialize)] +#[configurable_component] +#[derive(Debug, Clone)] pub struct CharacterDelimitedDecoderConfig { /// Options for the character delimited decoder. pub character_delimited: CharacterDelimitedDecoderOptions, diff --git a/lib/codecs/src/decoding/framing/newline_delimited.rs b/lib/codecs/src/decoding/framing/newline_delimited.rs index cc96d08d7ab40..6a1f2c81caca1 100644 --- a/lib/codecs/src/decoding/framing/newline_delimited.rs +++ b/lib/codecs/src/decoding/framing/newline_delimited.rs @@ -1,19 +1,19 @@ use bytes::{Bytes, BytesMut}; use derivative::Derivative; -use serde::{Deserialize, Serialize}; use tokio_util::codec::Decoder; use vector_config::configurable_component; use super::{BoxedFramingError, CharacterDelimitedDecoder}; /// Config used to build a `NewlineDelimitedDecoder`. -#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq, Eq)] +#[configurable_component] +#[derive(Debug, Clone, Default, PartialEq, Eq)] pub struct NewlineDelimitedDecoderConfig { + /// Options for the newline delimited decoder. #[serde( default, skip_serializing_if = "vector_core::serde::skip_serializing_if_default" )] - /// Options for the newline delimited decoder. pub newline_delimited: NewlineDelimitedDecoderOptions, } diff --git a/lib/codecs/src/decoding/framing/octet_counting.rs b/lib/codecs/src/decoding/framing/octet_counting.rs index 0e9f2d15a4b4d..281a8b64dba03 100644 --- a/lib/codecs/src/decoding/framing/octet_counting.rs +++ b/lib/codecs/src/decoding/framing/octet_counting.rs @@ -2,7 +2,6 @@ use std::io; use bytes::{Buf, Bytes, BytesMut}; use derivative::Derivative; -use serde::{Deserialize, Serialize}; use tokio_util::codec::{LinesCodec, LinesCodecError}; use tracing::trace; use vector_config::configurable_component; @@ -10,7 +9,8 @@ use vector_config::configurable_component; use super::BoxedFramingError; /// Config used to build a `OctetCountingDecoder`. -#[derive(Debug, Clone, Default, Deserialize, Serialize)] +#[configurable_component] +#[derive(Debug, Clone, Default)] pub struct OctetCountingDecoderConfig { #[serde( default, diff --git a/lib/codecs/src/decoding/mod.rs b/lib/codecs/src/decoding/mod.rs index 31237bd4af9f5..5af56aed89de6 100644 --- a/lib/codecs/src/decoding/mod.rs +++ b/lib/codecs/src/decoding/mod.rs @@ -9,12 +9,12 @@ use bytes::{Bytes, BytesMut}; pub use error::StreamDecodingError; pub use format::{ BoxedDeserializer, BytesDeserializer, BytesDeserializerConfig, GelfDeserializer, - GelfDeserializerConfig, JsonDeserializer, JsonDeserializerConfig, MezmoDeserializer, - NativeDeserializer, NativeDeserializerConfig, NativeJsonDeserializer, - NativeJsonDeserializerConfig, + GelfDeserializerConfig, GelfDeserializerOptions, JsonDeserializer, JsonDeserializerConfig, + JsonDeserializerOptions, MezmoDeserializer, NativeDeserializer, NativeDeserializerConfig, + NativeJsonDeserializer, NativeJsonDeserializerConfig, NativeJsonDeserializerOptions, }; #[cfg(feature = "syslog")] -pub use format::{SyslogDeserializer, SyslogDeserializerConfig}; +pub use format::{SyslogDeserializer, SyslogDeserializerConfig, SyslogDeserializerOptions}; pub use framing::{ BoxedFramer, BoxedFramingError, BytesDecoder, BytesDecoderConfig, CharacterDelimitedDecoder, CharacterDelimitedDecoderConfig, CharacterDelimitedDecoderOptions, FramingError, @@ -73,9 +73,6 @@ impl StreamDecodingError for Error { /// Framing handles how events are separated when encoded in a raw byte form, where each event is /// a frame that must be prefixed, or delimited, in a way that marks where an event begins and /// ends within the byte stream. -// Unfortunately, copying options of the nested enum variants is necessary -// since `serde` doesn't allow `flatten`ing these: -// https://github.com/serde-rs/serde/issues/1402. #[configurable_component] #[derive(Clone, Debug)] #[serde(tag = "method", rename_all = "snake_case")] @@ -85,35 +82,18 @@ pub enum FramingConfig { Bytes, /// Byte frames which are delimited by a chosen character. - CharacterDelimited { - /// Options for the character delimited decoder. - character_delimited: CharacterDelimitedDecoderOptions, - }, + CharacterDelimited(CharacterDelimitedDecoderConfig), /// Byte frames which are prefixed by an unsigned big-endian 32-bit integer indicating the length. LengthDelimited, /// Byte frames which are delimited by a newline character. - NewlineDelimited { - #[serde( - default, - skip_serializing_if = "vector_core::serde::skip_serializing_if_default" - )] - /// Options for the newline delimited decoder. - newline_delimited: NewlineDelimitedDecoderOptions, - }, + NewlineDelimited(NewlineDelimitedDecoderConfig), /// Byte frames according to the [octet counting][octet_counting] format. /// /// [octet_counting]: https://tools.ietf.org/html/rfc6587#section-3.4.1 - OctetCounting { - #[serde( - default, - skip_serializing_if = "vector_core::serde::skip_serializing_if_default" - )] - /// Options for the octet counting decoder. - octet_counting: OctetCountingDecoderOptions, - }, + OctetCounting(OctetCountingDecoderConfig), } impl From for FramingConfig { @@ -124,9 +104,7 @@ impl From for FramingConfig { impl From for FramingConfig { fn from(config: CharacterDelimitedDecoderConfig) -> Self { - Self::CharacterDelimited { - character_delimited: config.character_delimited, - } + Self::CharacterDelimited(config) } } @@ -138,17 +116,13 @@ impl From for FramingConfig { impl From for FramingConfig { fn from(config: NewlineDelimitedDecoderConfig) -> Self { - Self::NewlineDelimited { - newline_delimited: config.newline_delimited, - } + Self::NewlineDelimited(config) } } impl From for FramingConfig { fn from(config: OctetCountingDecoderConfig) -> Self { - Self::OctetCounting { - octet_counting: config.octet_counting, - } + Self::OctetCounting(config) } } @@ -157,29 +131,12 @@ impl FramingConfig { pub fn build(&self) -> Framer { match self { FramingConfig::Bytes => Framer::Bytes(BytesDecoderConfig.build()), - FramingConfig::CharacterDelimited { - character_delimited, - } => Framer::CharacterDelimited( - CharacterDelimitedDecoderConfig { - character_delimited: character_delimited.clone(), - } - .build(), - ), + FramingConfig::CharacterDelimited(config) => Framer::CharacterDelimited(config.build()), FramingConfig::LengthDelimited => { Framer::LengthDelimited(LengthDelimitedDecoderConfig.build()) } - FramingConfig::NewlineDelimited { newline_delimited } => Framer::NewlineDelimited( - NewlineDelimitedDecoderConfig { - newline_delimited: newline_delimited.clone(), - } - .build(), - ), - FramingConfig::OctetCounting { octet_counting } => Framer::OctetCounting( - OctetCountingDecoderConfig { - octet_counting: octet_counting.clone(), - } - .build(), - ), + FramingConfig::NewlineDelimited(config) => Framer::NewlineDelimited(config.build()), + FramingConfig::OctetCounting(config) => Framer::OctetCounting(config.build()), } } } @@ -229,9 +186,6 @@ impl tokio_util::codec::Decoder for Framer { } /// Deserializer configuration. -// Unfortunately, copying options of the nested enum variants is necessary -// since `serde` doesn't allow `flatten`ing these: -// https://github.com/serde-rs/serde/issues/1402. #[configurable_component] #[derive(Clone, Debug)] #[serde(tag = "codec", rename_all = "snake_case")] @@ -244,7 +198,7 @@ pub enum DeserializerConfig { /// Decodes the raw bytes as [JSON][json]. /// /// [json]: https://www.json.org/ - Json, + Json(JsonDeserializerConfig), #[cfg(feature = "syslog")] /// Decodes the raw bytes as a Syslog message. @@ -254,7 +208,7 @@ pub enum DeserializerConfig { /// /// [rfc3164]: https://www.ietf.org/rfc/rfc3164.txt /// [rfc5424]: https://www.ietf.org/rfc/rfc5424.txt - Syslog, + Syslog(SyslogDeserializerConfig), /// Decodes the raw bytes as Vector’s [native Protocol Buffers format][vector_native_protobuf]. /// @@ -270,12 +224,12 @@ pub enum DeserializerConfig { /// /// [vector_native_json]: https://github.com/vectordotdev/vector/blob/master/lib/codecs/tests/data/native_encoding/schema.cue /// [experimental]: https://vector.dev/highlights/2022-03-31-native-event-codecs - NativeJson, + NativeJson(NativeJsonDeserializerConfig), /// Decodes the raw bytes as a [GELF][gelf] message. /// /// [gelf]: https://docs.graylog.org/docs/gelf - Gelf, + Gelf(GelfDeserializerConfig), /// Decode the raw bytes using one of Mezmo's deserializers /// @@ -291,21 +245,33 @@ impl From for DeserializerConfig { } impl From for DeserializerConfig { - fn from(_: JsonDeserializerConfig) -> Self { - Self::Json + fn from(config: JsonDeserializerConfig) -> Self { + Self::Json(config) } } #[cfg(feature = "syslog")] impl From for DeserializerConfig { - fn from(_: SyslogDeserializerConfig) -> Self { - Self::Syslog + fn from(config: SyslogDeserializerConfig) -> Self { + Self::Syslog(config) } } impl From for DeserializerConfig { - fn from(_: GelfDeserializerConfig) -> Self { - Self::Gelf + fn from(config: GelfDeserializerConfig) -> Self { + Self::Gelf(config) + } +} + +impl From for DeserializerConfig { + fn from(_: NativeDeserializerConfig) -> Self { + Self::Native + } +} + +impl From for DeserializerConfig { + fn from(config: NativeJsonDeserializerConfig) -> Self { + Self::NativeJson(config) } } @@ -314,16 +280,12 @@ impl DeserializerConfig { pub fn build(&self) -> Deserializer { match self { DeserializerConfig::Bytes => Deserializer::Bytes(BytesDeserializerConfig.build()), - DeserializerConfig::Json => Deserializer::Json(JsonDeserializerConfig.build()), + DeserializerConfig::Json(config) => Deserializer::Json(config.build()), #[cfg(feature = "syslog")] - DeserializerConfig::Syslog => { - Deserializer::Syslog(SyslogDeserializerConfig::default().build()) - } + DeserializerConfig::Syslog(config) => Deserializer::Syslog(config.build()), DeserializerConfig::Native => Deserializer::Native(NativeDeserializerConfig.build()), - DeserializerConfig::NativeJson => { - Deserializer::NativeJson(NativeJsonDeserializerConfig.build()) - } - DeserializerConfig::Gelf => Deserializer::Gelf(GelfDeserializerConfig.build()), + DeserializerConfig::NativeJson(config) => Deserializer::NativeJson(config.build()), + DeserializerConfig::Gelf(config) => Deserializer::Gelf(config.build()), DeserializerConfig::Mezmo(config) => Deserializer::Boxed(config.build()), } } @@ -333,16 +295,14 @@ impl DeserializerConfig { match self { DeserializerConfig::Native => FramingConfig::LengthDelimited, DeserializerConfig::Bytes - | DeserializerConfig::Json - | DeserializerConfig::Gelf - | DeserializerConfig::NativeJson => FramingConfig::NewlineDelimited { - newline_delimited: Default::default(), - }, - DeserializerConfig::Mezmo(config) => config.default_stream_framing(), + | DeserializerConfig::Json(_) + | DeserializerConfig::Gelf(_) + | DeserializerConfig::NativeJson(_) => { + FramingConfig::NewlineDelimited(Default::default()) + } #[cfg(feature = "syslog")] - DeserializerConfig::Syslog => FramingConfig::NewlineDelimited { - newline_delimited: Default::default(), - }, + DeserializerConfig::Syslog(_) => FramingConfig::NewlineDelimited(Default::default()), + DeserializerConfig::Mezmo(config) => config.default_stream_framing(), } } @@ -350,12 +310,12 @@ impl DeserializerConfig { pub fn output_type(&self) -> DataType { match self { DeserializerConfig::Bytes => BytesDeserializerConfig.output_type(), - DeserializerConfig::Json => JsonDeserializerConfig.output_type(), + DeserializerConfig::Json(config) => config.output_type(), #[cfg(feature = "syslog")] - DeserializerConfig::Syslog => SyslogDeserializerConfig::default().output_type(), + DeserializerConfig::Syslog(config) => config.output_type(), DeserializerConfig::Native => NativeDeserializerConfig.output_type(), - DeserializerConfig::NativeJson => NativeJsonDeserializerConfig.output_type(), - DeserializerConfig::Gelf => GelfDeserializerConfig.output_type(), + DeserializerConfig::NativeJson(config) => config.output_type(), + DeserializerConfig::Gelf(config) => config.output_type(), DeserializerConfig::Mezmo(config) => config.output_type(), } } @@ -364,16 +324,12 @@ impl DeserializerConfig { pub fn schema_definition(&self, log_namespace: LogNamespace) -> schema::Definition { match self { DeserializerConfig::Bytes => BytesDeserializerConfig.schema_definition(log_namespace), - DeserializerConfig::Json => JsonDeserializerConfig.schema_definition(log_namespace), + DeserializerConfig::Json(config) => config.schema_definition(log_namespace), #[cfg(feature = "syslog")] - DeserializerConfig::Syslog => { - SyslogDeserializerConfig::default().schema_definition(log_namespace) - } + DeserializerConfig::Syslog(config) => config.schema_definition(log_namespace), DeserializerConfig::Native => NativeDeserializerConfig.schema_definition(log_namespace), - DeserializerConfig::NativeJson => { - NativeJsonDeserializerConfig.schema_definition(log_namespace) - } - DeserializerConfig::Gelf => GelfDeserializerConfig.schema_definition(log_namespace), + DeserializerConfig::NativeJson(config) => config.schema_definition(log_namespace), + DeserializerConfig::Gelf(config) => config.schema_definition(log_namespace), DeserializerConfig::Mezmo(config) => config.schema_definition(log_namespace), } } @@ -382,31 +338,31 @@ impl DeserializerConfig { pub const fn content_type(&self, framer: &FramingConfig) -> &'static str { match (&self, framer) { ( - DeserializerConfig::Json | DeserializerConfig::NativeJson, - FramingConfig::NewlineDelimited { .. }, + DeserializerConfig::Json(_) | DeserializerConfig::NativeJson(_), + FramingConfig::NewlineDelimited(_), ) => "application/x-ndjson", ( - DeserializerConfig::Gelf - | DeserializerConfig::Json - | DeserializerConfig::NativeJson, - FramingConfig::CharacterDelimited { + DeserializerConfig::Gelf(_) + | DeserializerConfig::Json(_) + | DeserializerConfig::NativeJson(_), + FramingConfig::CharacterDelimited(CharacterDelimitedDecoderConfig { character_delimited: CharacterDelimitedDecoderOptions { delimiter: b',', max_length: Some(usize::MAX), }, - }, + }), ) => "application/json", (DeserializerConfig::Native, _) => "application/octet-stream", ( - DeserializerConfig::Json - | DeserializerConfig::NativeJson + DeserializerConfig::Json(_) + | DeserializerConfig::NativeJson(_) | DeserializerConfig::Bytes - | DeserializerConfig::Gelf, + | DeserializerConfig::Gelf(_), _, ) => "text/plain", #[cfg(feature = "syslog")] - (DeserializerConfig::Syslog, _) => "text/plain", + (DeserializerConfig::Syslog(_), _) => "text/plain", (DeserializerConfig::Mezmo(config), framer) => config.content_type(framer), } } diff --git a/lib/codecs/src/encoding/format/avro.rs b/lib/codecs/src/encoding/format/avro.rs index 35bf45890043d..bfac175f18c21 100644 --- a/lib/codecs/src/encoding/format/avro.rs +++ b/lib/codecs/src/encoding/format/avro.rs @@ -47,6 +47,7 @@ pub struct AvroSerializerOptions { #[configurable(metadata( docs::examples = r#"{ "type": "record", "name": "log", "fields": [{ "name": "message", "type": "string" }] }"# ))] + #[configurable(metadata(docs::human_name = "Schema JSON"))] pub schema: String, } @@ -81,7 +82,7 @@ mod tests { use bytes::BytesMut; use indoc::indoc; use vector_core::event::{LogEvent, Value}; - use vrl::value::btreemap; + use vrl::btreemap; use super::*; diff --git a/lib/codecs/src/encoding/format/gelf.rs b/lib/codecs/src/encoding/format/gelf.rs index 799fe41adb526..3a0c6d0461991 100644 --- a/lib/codecs/src/encoding/format/gelf.rs +++ b/lib/codecs/src/encoding/format/gelf.rs @@ -241,7 +241,7 @@ mod tests { use super::*; use chrono::{DateTime, NaiveDateTime, Utc}; use vector_core::event::{Event, EventMetadata}; - use vrl::value::btreemap; + use vrl::btreemap; use vrl::value::Value; fn do_serialize( diff --git a/lib/codecs/src/encoding/format/json.rs b/lib/codecs/src/encoding/format/json.rs index 73ab79c78c095..943a421c77556 100644 --- a/lib/codecs/src/encoding/format/json.rs +++ b/lib/codecs/src/encoding/format/json.rs @@ -91,7 +91,7 @@ mod tests { use chrono::{TimeZone, Timelike, Utc}; use vector_core::event::{LogEvent, Metric, MetricKind, MetricValue, StatisticKind, Value}; use vector_core::metric_tags; - use vrl::value::btreemap; + use vrl::btreemap; use super::*; diff --git a/lib/codecs/src/encoding/format/logfmt.rs b/lib/codecs/src/encoding/format/logfmt.rs index 37b0351b085ee..e656cc2c52164 100644 --- a/lib/codecs/src/encoding/format/logfmt.rs +++ b/lib/codecs/src/encoding/format/logfmt.rs @@ -60,7 +60,7 @@ mod tests { use super::*; use bytes::BytesMut; use vector_core::event::{LogEvent, Value}; - use vrl::value::btreemap; + use vrl::btreemap; #[test] fn serialize_logfmt() { diff --git a/lib/codecs/src/encoding/format/native_json.rs b/lib/codecs/src/encoding/format/native_json.rs index 6762c92737834..854bba9d97ec2 100644 --- a/lib/codecs/src/encoding/format/native_json.rs +++ b/lib/codecs/src/encoding/format/native_json.rs @@ -53,7 +53,7 @@ impl Encoder for NativeJsonSerializer { mod tests { use bytes::BytesMut; use vector_core::event::{LogEvent, Value}; - use vrl::value::btreemap; + use vrl::btreemap; use super::*; diff --git a/lib/codecs/src/encoding/framing/character_delimited.rs b/lib/codecs/src/encoding/framing/character_delimited.rs index f93c279d61cf8..2e536976618b8 100644 --- a/lib/codecs/src/encoding/framing/character_delimited.rs +++ b/lib/codecs/src/encoding/framing/character_delimited.rs @@ -1,12 +1,12 @@ use bytes::{BufMut, BytesMut}; -use serde::{Deserialize, Serialize}; use tokio_util::codec::Encoder; use vector_config::configurable_component; use super::BoxedFramingError; /// Config used to build a `CharacterDelimitedEncoder`. -#[derive(Debug, Clone, Deserialize, Serialize)] +#[configurable_component] +#[derive(Debug, Clone, Eq, PartialEq)] pub struct CharacterDelimitedEncoderConfig { /// Options for the character delimited encoder. pub character_delimited: CharacterDelimitedEncoderOptions, diff --git a/lib/codecs/src/encoding/mod.rs b/lib/codecs/src/encoding/mod.rs index d798e8d7bf9d9..f9516411720d1 100644 --- a/lib/codecs/src/encoding/mod.rs +++ b/lib/codecs/src/encoding/mod.rs @@ -61,10 +61,7 @@ pub enum FramingConfig { Bytes, /// Event data is delimited by a single ASCII (7-bit) character. - CharacterDelimited { - /// Options for the character delimited encoder. - character_delimited: CharacterDelimitedEncoderOptions, - }, + CharacterDelimited(CharacterDelimitedEncoderConfig), /// Event data is prefixed with its length in bytes. /// @@ -83,9 +80,7 @@ impl From for FramingConfig { impl From for FramingConfig { fn from(config: CharacterDelimitedEncoderConfig) -> Self { - Self::CharacterDelimited { - character_delimited: config.character_delimited, - } + Self::CharacterDelimited(config) } } @@ -106,14 +101,7 @@ impl FramingConfig { pub fn build(&self) -> Framer { match self { FramingConfig::Bytes => Framer::Bytes(BytesEncoderConfig.build()), - FramingConfig::CharacterDelimited { - character_delimited, - } => Framer::CharacterDelimited( - CharacterDelimitedEncoderConfig { - character_delimited: character_delimited.clone(), - } - .build(), - ), + FramingConfig::CharacterDelimited(config) => Framer::CharacterDelimited(config.build()), FramingConfig::LengthDelimited => { Framer::LengthDelimited(LengthDelimitedEncoderConfig.build()) } @@ -201,10 +189,7 @@ pub enum SerializerConfig { /// /// This codec must be configured with fields to encode. /// - Csv( - /// Options for the CSV encoder. - CsvSerializerConfig, - ), + Csv(CsvSerializerConfig), /// Encodes an event as a [GELF][gelf] message. /// @@ -214,10 +199,7 @@ pub enum SerializerConfig { /// Encodes an event as [JSON][json]. /// /// [json]: https://www.json.org/ - Json( - /// Encoding options specific to the text serializer. - JsonSerializerConfig, - ), + Json(JsonSerializerConfig), /// Encodes an event as a [logfmt][logfmt] message. /// @@ -257,10 +239,7 @@ pub enum SerializerConfig { /// Be careful if you are modifying your log events (for example, by using a `remap` /// transform) and removing the message field while doing additional parsing on it, as this /// could lead to the encoding emitting empty strings for the given event. - Text( - /// Encoding options specific to the text serializer. - TextSerializerConfig, - ), + Text(TextSerializerConfig), } impl From for SerializerConfig { diff --git a/lib/codecs/src/lib.rs b/lib/codecs/src/lib.rs index c2bd4ec1c1e7f..9ec6bcb767538 100644 --- a/lib/codecs/src/lib.rs +++ b/lib/codecs/src/lib.rs @@ -4,15 +4,9 @@ #![deny(missing_docs)] #![deny(warnings)] #![allow( - clippy::arc_with_non_send_sync, - clippy::default_constructed_unit_structs, clippy::explicit_iter_loop, - clippy::missing_fields_in_debug, clippy::missing_panics_doc, clippy::needless_lifetimes, - clippy::needless_pub_self, - clippy::needless_raw_string_hashes, - clippy::non_minimal_cfg, clippy::redundant_closure_call, clippy::redundant_pattern_matching, clippy::useless_conversion, diff --git a/lib/codecs/tests/native.rs b/lib/codecs/tests/native.rs index d4f9a139d35a4..3d2e9f0ea8a34 100644 --- a/lib/codecs/tests/native.rs +++ b/lib/codecs/tests/native.rs @@ -27,7 +27,7 @@ fn roundtrip_current_native_json_fixtures() { roundtrip_fixtures( "json", "", - &NativeJsonDeserializerConfig.build(), + &NativeJsonDeserializerConfig::default().build(), &mut NativeJsonSerializerConfig.build(), false, ); @@ -51,7 +51,7 @@ fn reserialize_pre_v24_native_json_fixtures() { roundtrip_fixtures( "json", "pre-v24", - &NativeJsonDeserializerConfig.build(), + &NativeJsonDeserializerConfig::default().build(), &mut NativeJsonSerializerConfig.build(), true, ); @@ -100,7 +100,7 @@ fn pre_v24_native_decoding_matches() { fn rebuild_json_fixtures() { rebuild_fixtures( "json", - &NativeJsonDeserializerConfig.build(), + &NativeJsonDeserializerConfig::default().build(), &mut NativeJsonSerializerConfig.build(), ); } @@ -134,7 +134,7 @@ fn fixtures_match(suffix: &str) { /// This test ensures we can load the serialized binaries binary and that they match across /// protocols. fn decoding_matches(suffix: &str) { - let json_deserializer = NativeJsonDeserializerConfig.build(); + let json_deserializer = NativeJsonDeserializerConfig::default().build(); let proto_deserializer = NativeDeserializerConfig.build(); let json_entries = list_fixtures("json", suffix); diff --git a/lib/dnsmsg-parser/Cargo.toml b/lib/dnsmsg-parser/Cargo.toml index eb1d8a270df35..e6ba28689e1aa 100644 --- a/lib/dnsmsg-parser/Cargo.toml +++ b/lib/dnsmsg-parser/Cargo.toml @@ -7,12 +7,12 @@ publish = false license = "MIT" [dependencies] -data-encoding = "2.3" +data-encoding = "2.4" thiserror = "1.0" trust-dns-proto = { version = "0.22", features = ["dnssec"] } [dev-dependencies] -criterion = "0.4" +criterion = "0.5" [lib] bench = false diff --git a/lib/enrichment/Cargo.toml b/lib/enrichment/Cargo.toml index 53d451951b1e6..1a8e2467f0fed 100644 --- a/lib/enrichment/Cargo.toml +++ b/lib/enrichment/Cargo.toml @@ -7,7 +7,6 @@ publish = false [dependencies] arc-swap = { version = "1.6.0", default-features = false } -dyn-clone = { version = "1.0.11", default-features = false } chrono = { version = "0.4.19", default-features = false } -vector-common = { path = "../vector-common", default-features = false, features = [ "btreemap", "conversion", "serde" ] } -vrl = { git = "ssh://git@github.com/answerbook/vrl.git", rev = "v0.6.0", default-features = false, features = ["diagnostic"] } +dyn-clone = { version = "1.0.11", default-features = false } +vrl.workspace = true diff --git a/lib/enrichment/src/find_enrichment_table_records.rs b/lib/enrichment/src/find_enrichment_table_records.rs index f4fbfd03c3ea6..3ade47b7c55a6 100644 --- a/lib/enrichment/src/find_enrichment_table_records.rs +++ b/lib/enrichment/src/find_enrichment_table_records.rs @@ -191,9 +191,10 @@ impl FunctionExpression for FindEnrichmentTableRecordsFn { #[cfg(test)] mod tests { - use vector_common::TimeZone; use vrl::compiler::state::RuntimeState; use vrl::compiler::TargetValue; + use vrl::compiler::TimeZone; + use vrl::value; use vrl::value::Secrets; use super::*; diff --git a/lib/enrichment/src/get_enrichment_table_record.rs b/lib/enrichment/src/get_enrichment_table_record.rs index 93c432ccdd09f..3678d85e4fea0 100644 --- a/lib/enrichment/src/get_enrichment_table_record.rs +++ b/lib/enrichment/src/get_enrichment_table_record.rs @@ -183,9 +183,10 @@ impl FunctionExpression for GetEnrichmentTableRecordFn { #[cfg(test)] mod tests { - use vector_common::TimeZone; + use vrl::compiler::prelude::TimeZone; use vrl::compiler::state::RuntimeState; use vrl::compiler::TargetValue; + use vrl::value; use vrl::value::Secrets; use super::*; diff --git a/lib/file-source/Cargo.toml b/lib/file-source/Cargo.toml index 21ff50cea5b50..50631a6765175 100644 --- a/lib/file-source/Cargo.toml +++ b/lib/file-source/Cargo.toml @@ -19,7 +19,7 @@ vector-config-common = { path = "../vector-config-common", default-features = fa vector-config-macros = { path = "../vector-config-macros", default-features = false } [dependencies.bstr] -version = "1.4" +version = "1.5" default-features = false features = [] @@ -39,7 +39,7 @@ default-features = false features = [] [dependencies.indexmap] -version = "~1.9.3" +version = "~2.0.0" default-features = false features = ["serde"] @@ -69,14 +69,14 @@ default-features = false features = [] [dependencies.tokio] -version = "1.28.1" +version = "1.29.0" default-features = false features = ["full"] [dev-dependencies] -criterion = "0.4" +criterion = "0.5" quickcheck = "1" -tempfile = "3.5.0" +tempfile = "3.6.0" similar-asserts = "1.4.2" [[bench]] diff --git a/lib/k8s-e2e-tests/Cargo.toml b/lib/k8s-e2e-tests/Cargo.toml index 2489b6c769273..fd1a5aa7ab672 100644 --- a/lib/k8s-e2e-tests/Cargo.toml +++ b/lib/k8s-e2e-tests/Cargo.toml @@ -12,9 +12,9 @@ futures = "0.3" k8s-openapi = { version = "0.16.0", default-features = false, features = ["v1_19"] } k8s-test-framework = { version = "0.1", path = "../k8s-test-framework" } regex = "1" -reqwest = { version = "0.11.17", features = ["json"] } +reqwest = { version = "0.11.18", features = ["json"] } serde_json = "1" -tokio = { version = "1.28.1", features = ["full"] } +tokio = { version = "1.29.0", features = ["full"] } indoc = "2.0.1" env_logger = "0.10" tracing = { version = "0.1", features = ["log"] } diff --git a/lib/k8s-test-framework/Cargo.toml b/lib/k8s-test-framework/Cargo.toml index cdf8e55178a1a..7ab378add13ff 100644 --- a/lib/k8s-test-framework/Cargo.toml +++ b/lib/k8s-test-framework/Cargo.toml @@ -11,5 +11,5 @@ license = "MPL-2.0" k8s-openapi = { version = "0.16.0", default-features = false, features = ["v1_19"] } serde_json = "1" tempfile = "3" -tokio = { version = "1.28.1", features = ["full"] } +tokio = { version = "1.29.0", features = ["full"] } log = "0.4" diff --git a/lib/opentelemetry-proto/Cargo.toml b/lib/opentelemetry-proto/Cargo.toml index cb378f0be20a0..c2e3dbfac011a 100644 --- a/lib/opentelemetry-proto/Cargo.toml +++ b/lib/opentelemetry-proto/Cargo.toml @@ -17,5 +17,5 @@ lookup = { package = "vector-lookup", path = "../vector-lookup", default-feature ordered-float = { version = "3.7.0", default-features = false } prost = { version = "0.11", default-features = false, features = ["std"] } tonic = { version = "0.9", default-features = false, features = ["codegen", "gzip", "prost", "tls", "tls-roots", "transport"] } +vrl.workspace = true vector-core = { path = "../vector-core", default-features = false } -vrl = { git = "ssh://git@github.com/answerbook/vrl.git", rev = "v0.6.0" } diff --git a/lib/prometheus-parser/Cargo.toml b/lib/prometheus-parser/Cargo.toml index 9fd225bbefba3..a52d1dd0ac22e 100644 --- a/lib/prometheus-parser/Cargo.toml +++ b/lib/prometheus-parser/Cargo.toml @@ -9,14 +9,14 @@ license = "MPL-2.0" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -indexmap = "~1.9.3" +indexmap = "~2.0.0" nom = "7.1.3" num_enum = "0.6.1" prost = "0.11" prost-types = "0.11" snafu = { version = "0.7" } vector-common = { path = "../vector-common", features = ["btreemap"] } -value = { package = "value", git = "ssh://git@github.com/answerbook/vrl.git", rev = "v0.6.0" } +vrl.workspace = true [build-dependencies] prost-build = "0.11" diff --git a/lib/prometheus-parser/src/lib.rs b/lib/prometheus-parser/src/lib.rs index 9ce69f7ab5bf4..89b38100e206e 100644 --- a/lib/prometheus-parser/src/lib.rs +++ b/lib/prometheus-parser/src/lib.rs @@ -1,11 +1,5 @@ #![deny(warnings)] -#![allow( - clippy::default_constructed_unit_structs, - clippy::explicit_iter_loop, - clippy::needless_pub_self, - clippy::needless_raw_string_hashes, - clippy::useless_conversion -)] +#![allow(clippy::explicit_iter_loop, clippy::useless_conversion)] use std::{collections::BTreeMap, convert::TryFrom}; diff --git a/lib/tracing-limit/Cargo.toml b/lib/tracing-limit/Cargo.toml index ab5c634ff7ce3..48c0bf0762a98 100644 --- a/lib/tracing-limit/Cargo.toml +++ b/lib/tracing-limit/Cargo.toml @@ -12,7 +12,7 @@ tracing-subscriber = { version = "0.3", default-features = false, features = ["r dashmap = { version = "5.2.0", default-features = false } [dev-dependencies] -criterion = "0.4" +criterion = "0.5" tracing = "0.1.34" mock_instant = { version = "0.3" } tracing-subscriber = { version = "0.3.17", default-features = false, features = ["env-filter", "fmt"] } diff --git a/lib/vector-api-client/Cargo.toml b/lib/vector-api-client/Cargo.toml index 9d1b578ade2dd..ed243d3e368ec 100644 --- a/lib/vector-api-client/Cargo.toml +++ b/lib/vector-api-client/Cargo.toml @@ -9,8 +9,8 @@ license = "MPL-2.0" [dependencies] # Serde -serde = { version = "1.0.163", default-features = false, features = ["derive"] } -serde_json = { version = "1.0.96", default-features = false, features = ["raw_value"] } +serde = { version = "1.0.164", default-features = false, features = ["derive"] } +serde_json = { version = "1.0.99", default-features = false, features = ["raw_value"] } # Error handling anyhow = { version = "1.0.71", default-features = false, features = ["std"] } @@ -18,19 +18,19 @@ anyhow = { version = "1.0.71", default-features = false, features = ["std"] } # Tokio / Futures async-trait = { version = "0.1", default-features = false } futures = { version = "0.3", default-features = false, features = ["compat", "io-compat"] } -tokio = { version = "1.28.1", default-features = false, features = ["macros", "rt", "sync"] } +tokio = { version = "1.29.0", default-features = false, features = ["macros", "rt", "sync"] } tokio-stream = { version = "0.1.14", default-features = false, features = ["sync"] } # GraphQL -graphql_client = { version = "0.12.0", default-features = false, features = ["graphql_query_derive"] } +graphql_client = { version = "0.13.0", default-features = false, features = ["graphql_query_derive"] } # HTTP / WebSockets -reqwest = { version = "0.11.17", default-features = false, features = ["json"] } -tokio-tungstenite = { version = "0.20.1", default-features = false, features = ["connect", "rustls"] } +reqwest = { version = "0.11.18", default-features = false, features = ["json"] } +tokio-tungstenite = { version = "0.19.0", default-features = false, features = ["connect", "rustls"] } # External libs chrono = { version = "0.4.6", default-features = false, features = ["serde"] } clap = { version = "4.1.14", default-features = false, features = ["derive"] } -url = { version = "2.3.1", default-features = false } +url = { version = "2.4.0", default-features = false } uuid = { version = "1", default-features = false, features = ["serde", "v4"] } indoc = { version = "2.0.1", default-features = false } diff --git a/lib/vector-api-client/graphql/queries/components.graphql b/lib/vector-api-client/graphql/queries/components.graphql index 5be42459f8ef0..3a6f78fc27989 100644 --- a/lib/vector-api-client/graphql/queries/components.graphql +++ b/lib/vector-api-client/graphql/queries/components.graphql @@ -14,11 +14,8 @@ query ComponentsQuery($first: Int!) { } metrics { __typename - processedEventsTotal { - processedEventsTotal - } - processedBytesTotal { - processedBytesTotal + receivedBytesTotal { + receivedBytesTotal } receivedEventsTotal { receivedEventsTotal @@ -37,12 +34,6 @@ query ComponentsQuery($first: Int!) { } metrics { __typename - processedEventsTotal { - processedEventsTotal - } - processedBytesTotal { - processedBytesTotal - } receivedEventsTotal { receivedEventsTotal } @@ -54,15 +45,12 @@ query ComponentsQuery($first: Int!) { ... on Sink { metrics { __typename - processedEventsTotal { - processedEventsTotal - } - processedBytesTotal { - processedBytesTotal - } receivedEventsTotal { receivedEventsTotal } + sentBytesTotal { + sentBytesTotal + } sentEventsTotal { sentEventsTotal } diff --git a/lib/vector-api-client/graphql/schema.json b/lib/vector-api-client/graphql/schema.json index d9a413bd864ce..48c067d8ca850 100644 --- a/lib/vector-api-client/graphql/schema.json +++ b/lib/vector-api-client/graphql/schema.json @@ -399,12 +399,12 @@ }, { "kind": "OBJECT", - "name": "ComponentProcessedBytesThroughput", + "name": "ComponentReceivedBytesThroughput", "description": null, "fields": [ { "name": "componentId", - "description": "Component id", + "description": "Component ID.", "args": [], "type": { "kind": "NON_NULL", @@ -420,7 +420,7 @@ }, { "name": "throughput", - "description": "Bytes processed throughput", + "description": "Throughput of bytes sent.", "args": [], "type": { "kind": "NON_NULL", @@ -442,12 +442,12 @@ }, { "kind": "OBJECT", - "name": "ComponentProcessedBytesTotal", + "name": "ComponentReceivedBytesTotal", "description": null, "fields": [ { "name": "componentId", - "description": "Component id", + "description": "Component ID.", "args": [], "type": { "kind": "NON_NULL", @@ -463,14 +463,14 @@ }, { "name": "metric", - "description": "Bytes processed total metric", + "description": "Metric for total bytes received.", "args": [], "type": { "kind": "NON_NULL", "name": null, "ofType": { "kind": "OBJECT", - "name": "ProcessedBytesTotal", + "name": "ReceivedBytesTotal", "ofType": null } }, @@ -485,7 +485,7 @@ }, { "kind": "OBJECT", - "name": "ComponentProcessedEventsThroughput", + "name": "ComponentReceivedEventsThroughput", "description": null, "fields": [ { @@ -506,7 +506,7 @@ }, { "name": "throughput", - "description": "Events processed throughput", + "description": "Received events throughput", "args": [], "type": { "kind": "NON_NULL", @@ -528,7 +528,7 @@ }, { "kind": "OBJECT", - "name": "ComponentProcessedEventsTotal", + "name": "ComponentReceivedEventsTotal", "description": null, "fields": [ { @@ -549,14 +549,14 @@ }, { "name": "metric", - "description": "Events processed total metric", + "description": "Total received events metric", "args": [], "type": { "kind": "NON_NULL", "name": null, "ofType": { "kind": "OBJECT", - "name": "ProcessedEventsTotal", + "name": "ReceivedEventsTotal", "ofType": null } }, @@ -571,12 +571,12 @@ }, { "kind": "OBJECT", - "name": "ComponentReceivedEventsThroughput", + "name": "ComponentSentBytesThroughput", "description": null, "fields": [ { "name": "componentId", - "description": "Component id", + "description": "Component ID.", "args": [], "type": { "kind": "NON_NULL", @@ -592,7 +592,7 @@ }, { "name": "throughput", - "description": "Received events throughput", + "description": "Throughput of bytes sent.", "args": [], "type": { "kind": "NON_NULL", @@ -614,12 +614,12 @@ }, { "kind": "OBJECT", - "name": "ComponentReceivedEventsTotal", + "name": "ComponentSentBytesTotal", "description": null, "fields": [ { "name": "componentId", - "description": "Component id", + "description": "Component ID.", "args": [], "type": { "kind": "NON_NULL", @@ -635,14 +635,14 @@ }, { "name": "metric", - "description": "Total received events metric", + "description": "Metric for total bytes sent.", "args": [], "type": { "kind": "NON_NULL", "name": null, "ofType": { "kind": "OBJECT", - "name": "ReceivedEventsTotal", + "name": "SentBytesTotal", "ofType": null } }, @@ -1162,84 +1162,6 @@ "enumValues": null, "possibleTypes": null }, - { - "kind": "OBJECT", - "name": "EventsInTotal", - "description": null, - "fields": [ - { - "name": "timestamp", - "description": "Metric timestamp", - "args": [], - "type": { - "kind": "SCALAR", - "name": "DateTime", - "ofType": null - }, - "isDeprecated": false, - "deprecationReason": null - }, - { - "name": "eventsInTotal", - "description": "Total incoming events", - "args": [], - "type": { - "kind": "NON_NULL", - "name": null, - "ofType": { - "kind": "SCALAR", - "name": "Float", - "ofType": null - } - }, - "isDeprecated": false, - "deprecationReason": null - } - ], - "inputFields": null, - "interfaces": [], - "enumValues": null, - "possibleTypes": null - }, - { - "kind": "OBJECT", - "name": "EventsOutTotal", - "description": null, - "fields": [ - { - "name": "timestamp", - "description": "Metric timestamp", - "args": [], - "type": { - "kind": "SCALAR", - "name": "DateTime", - "ofType": null - }, - "isDeprecated": false, - "deprecationReason": null - }, - { - "name": "eventsOutTotal", - "description": "Total outgoing events", - "args": [], - "type": { - "kind": "NON_NULL", - "name": null, - "ofType": { - "kind": "SCALAR", - "name": "Float", - "ofType": null - } - }, - "isDeprecated": false, - "deprecationReason": null - } - ], - "inputFields": null, - "interfaces": [], - "enumValues": null, - "possibleTypes": null - }, { "kind": "OBJECT", "name": "FileSourceMetricFile", @@ -1262,36 +1184,12 @@ "deprecationReason": null }, { - "name": "processedEventsTotal", - "description": "Metric indicating events processed for the current file", - "args": [], - "type": { - "kind": "OBJECT", - "name": "ProcessedEventsTotal", - "ofType": null - }, - "isDeprecated": false, - "deprecationReason": null - }, - { - "name": "processedBytesTotal", - "description": "Metric indicating bytes processed for the current file", + "name": "receivedBytesTotal", + "description": "Metric indicating bytes received for the current file", "args": [], "type": { "kind": "OBJECT", - "name": "ProcessedBytesTotal", - "ofType": null - }, - "isDeprecated": false, - "deprecationReason": null - }, - { - "name": "eventsInTotal", - "description": "Metric indicating incoming events for the current file", - "args": [], - "type": { - "kind": "OBJECT", - "name": "EventsInTotal", + "name": "ReceivedBytesTotal", "ofType": null }, "isDeprecated": false, @@ -1309,18 +1207,6 @@ "isDeprecated": false, "deprecationReason": null }, - { - "name": "eventsOutTotal", - "description": "Metric indicating outgoing events for the current file", - "args": [], - "type": { - "kind": "OBJECT", - "name": "EventsOutTotal", - "ofType": null - }, - "isDeprecated": false, - "deprecationReason": null - }, { "name": "sentEventsTotal", "description": "Metric indicating outgoing events for the current file", @@ -1527,13 +1413,7 @@ "deprecationReason": null }, { - "name": "PROCESSED_BYTES_TOTAL", - "description": null, - "isDeprecated": false, - "deprecationReason": null - }, - { - "name": "PROCESSED_EVENTS_TOTAL", + "name": "RECEIVED_BYTES_TOTAL", "description": null, "isDeprecated": false, "deprecationReason": null @@ -1544,23 +1424,11 @@ "isDeprecated": false, "deprecationReason": null }, - { - "name": "EVENTS_IN_TOTAL", - "description": null, - "isDeprecated": false, - "deprecationReason": null - }, { "name": "SENT_EVENTS_TOTAL", "description": null, "isDeprecated": false, "deprecationReason": null - }, - { - "name": "EVENTS_OUT_TOTAL", - "description": null, - "isDeprecated": false, - "deprecationReason": null } ], "possibleTypes": null @@ -1656,36 +1524,12 @@ "deprecationReason": null }, { - "name": "processedEventsTotal", - "description": "Events processed for the current file source", - "args": [], - "type": { - "kind": "OBJECT", - "name": "ProcessedEventsTotal", - "ofType": null - }, - "isDeprecated": false, - "deprecationReason": null - }, - { - "name": "processedBytesTotal", - "description": "Bytes processed for the current file source", - "args": [], - "type": { - "kind": "OBJECT", - "name": "ProcessedBytesTotal", - "ofType": null - }, - "isDeprecated": false, - "deprecationReason": null - }, - { - "name": "eventsInTotal", - "description": "Total incoming events for the current file source", + "name": "receivedBytesTotal", + "description": "Total received bytes for the current file source", "args": [], "type": { "kind": "OBJECT", - "name": "EventsInTotal", + "name": "ReceivedBytesTotal", "ofType": null }, "isDeprecated": false, @@ -1703,21 +1547,9 @@ "isDeprecated": false, "deprecationReason": null }, - { - "name": "eventsOutTotal", - "description": "Total outgoing events for the current file source", - "args": [], - "type": { - "kind": "OBJECT", - "name": "EventsOutTotal", - "ofType": null - }, - "isDeprecated": false, - "deprecationReason": null - }, { "name": "sentEventsTotal", - "description": "Total outgoing events for the current file source", + "description": "Total sent events for the current file source", "args": [], "type": { "kind": "OBJECT", @@ -1860,42 +1692,6 @@ "name": "GenericSinkMetrics", "description": null, "fields": [ - { - "name": "processedEventsTotal", - "description": "Events processed for the current sink", - "args": [], - "type": { - "kind": "OBJECT", - "name": "ProcessedEventsTotal", - "ofType": null - }, - "isDeprecated": false, - "deprecationReason": null - }, - { - "name": "processedBytesTotal", - "description": "Bytes processed for the current sink", - "args": [], - "type": { - "kind": "OBJECT", - "name": "ProcessedBytesTotal", - "ofType": null - }, - "isDeprecated": false, - "deprecationReason": null - }, - { - "name": "eventsInTotal", - "description": "Total incoming events for the current sink", - "args": [], - "type": { - "kind": "OBJECT", - "name": "EventsInTotal", - "ofType": null - }, - "isDeprecated": false, - "deprecationReason": null - }, { "name": "receivedEventsTotal", "description": "Total received events for the current sink", @@ -1909,12 +1705,12 @@ "deprecationReason": null }, { - "name": "eventsOutTotal", - "description": "Total outgoing events for the current sink", + "name": "sentBytesTotal", + "description": "Total sent bytes for the current sink", "args": [], "type": { "kind": "OBJECT", - "name": "EventsOutTotal", + "name": "SentBytesTotal", "ofType": null }, "isDeprecated": false, @@ -1922,7 +1718,7 @@ }, { "name": "sentEventsTotal", - "description": "Total outgoing events for the current sink", + "description": "Total sent events for the current sink", "args": [], "type": { "kind": "OBJECT", @@ -1950,36 +1746,12 @@ "description": null, "fields": [ { - "name": "processedEventsTotal", - "description": "Events processed for the current source", - "args": [], - "type": { - "kind": "OBJECT", - "name": "ProcessedEventsTotal", - "ofType": null - }, - "isDeprecated": false, - "deprecationReason": null - }, - { - "name": "processedBytesTotal", - "description": "Bytes processed for the current source", - "args": [], - "type": { - "kind": "OBJECT", - "name": "ProcessedBytesTotal", - "ofType": null - }, - "isDeprecated": false, - "deprecationReason": null - }, - { - "name": "eventsInTotal", - "description": "Total incoming events for the current source", + "name": "receivedBytesTotal", + "description": "Total received bytes for the current source", "args": [], "type": { "kind": "OBJECT", - "name": "EventsInTotal", + "name": "ReceivedBytesTotal", "ofType": null }, "isDeprecated": false, @@ -1997,21 +1769,9 @@ "isDeprecated": false, "deprecationReason": null }, - { - "name": "eventsOutTotal", - "description": "Total outgoing events for the current source", - "args": [], - "type": { - "kind": "OBJECT", - "name": "EventsOutTotal", - "ofType": null - }, - "isDeprecated": false, - "deprecationReason": null - }, { "name": "sentEventsTotal", - "description": "Total outgoing events for the current source", + "description": "Total sent events for the current source", "args": [], "type": { "kind": "OBJECT", @@ -2038,42 +1798,6 @@ "name": "GenericTransformMetrics", "description": null, "fields": [ - { - "name": "processedEventsTotal", - "description": "Events processed for the current transform", - "args": [], - "type": { - "kind": "OBJECT", - "name": "ProcessedEventsTotal", - "ofType": null - }, - "isDeprecated": false, - "deprecationReason": null - }, - { - "name": "processedBytesTotal", - "description": "Bytes processed for the current transform", - "args": [], - "type": { - "kind": "OBJECT", - "name": "ProcessedBytesTotal", - "ofType": null - }, - "isDeprecated": false, - "deprecationReason": null - }, - { - "name": "eventsInTotal", - "description": "Total incoming events for the current transform", - "args": [], - "type": { - "kind": "OBJECT", - "name": "EventsInTotal", - "ofType": null - }, - "isDeprecated": false, - "deprecationReason": null - }, { "name": "receivedEventsTotal", "description": "Total received events for the current transform", @@ -2086,21 +1810,9 @@ "isDeprecated": false, "deprecationReason": null }, - { - "name": "eventsOutTotal", - "description": "Total outgoing events for the current transform", - "args": [], - "type": { - "kind": "OBJECT", - "name": "EventsOutTotal", - "ofType": null - }, - "isDeprecated": false, - "deprecationReason": null - }, { "name": "sentEventsTotal", - "description": "Total outgoing events for the current transform", + "description": "Total sent events for the current transform", "args": [], "type": { "kind": "OBJECT", @@ -3050,16 +2762,6 @@ "kind": "OBJECT", "name": "Uptime", "ofType": null - }, - { - "kind": "OBJECT", - "name": "ProcessedEventsTotal", - "ofType": null - }, - { - "kind": "OBJECT", - "name": "ProcessedBytesTotal", - "ofType": null } ] }, @@ -3413,97 +3115,7 @@ }, { "kind": "OBJECT", - "name": "ProcessedBytesTotal", - "description": null, - "fields": [ - { - "name": "timestamp", - "description": "Metric timestamp", - "args": [], - "type": { - "kind": "SCALAR", - "name": "DateTime", - "ofType": null - }, - "isDeprecated": false, - "deprecationReason": null - }, - { - "name": "processedBytesTotal", - "description": "Total number of bytes processed", - "args": [], - "type": { - "kind": "NON_NULL", - "name": null, - "ofType": { - "kind": "SCALAR", - "name": "Float", - "ofType": null - } - }, - "isDeprecated": false, - "deprecationReason": null - } - ], - "inputFields": null, - "interfaces": [ - { - "kind": "INTERFACE", - "name": "MetricType", - "ofType": null - } - ], - "enumValues": null, - "possibleTypes": null - }, - { - "kind": "OBJECT", - "name": "ProcessedEventsTotal", - "description": null, - "fields": [ - { - "name": "timestamp", - "description": "Metric timestamp", - "args": [], - "type": { - "kind": "SCALAR", - "name": "DateTime", - "ofType": null - }, - "isDeprecated": false, - "deprecationReason": null - }, - { - "name": "processedEventsTotal", - "description": "Total number of events processed", - "args": [], - "type": { - "kind": "NON_NULL", - "name": null, - "ofType": { - "kind": "SCALAR", - "name": "Float", - "ofType": null - } - }, - "isDeprecated": false, - "deprecationReason": null - } - ], - "inputFields": null, - "interfaces": [ - { - "kind": "INTERFACE", - "name": "MetricType", - "ofType": null - } - ], - "enumValues": null, - "possibleTypes": null - }, - { - "kind": "OBJECT", - "name": "Query", + "name": "Query", "description": null, "fields": [ { @@ -3927,6 +3539,45 @@ "enumValues": null, "possibleTypes": null }, + { + "kind": "OBJECT", + "name": "ReceivedBytesTotal", + "description": null, + "fields": [ + { + "name": "timestamp", + "description": "Metric timestamp.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "DateTime", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "receivedBytesTotal", + "description": "Total number of bytes received.", + "args": [], + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + } + }, + "isDeprecated": false, + "deprecationReason": null + } + ], + "inputFields": null, + "interfaces": [], + "enumValues": null, + "possibleTypes": null + }, { "kind": "OBJECT", "name": "ReceivedEventsTotal", @@ -3966,6 +3617,45 @@ "enumValues": null, "possibleTypes": null }, + { + "kind": "OBJECT", + "name": "SentBytesTotal", + "description": null, + "fields": [ + { + "name": "timestamp", + "description": "Metric timestamp.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "DateTime", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "sentBytesTotal", + "description": "Total number of bytes sent.", + "args": [], + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + } + }, + "isDeprecated": false, + "deprecationReason": null + } + ], + "inputFields": null, + "interfaces": [], + "enumValues": null, + "possibleTypes": null + }, { "kind": "OBJECT", "name": "SentEventsTotal", @@ -4257,30 +3947,6 @@ "name": "SinkMetrics", "description": null, "fields": [ - { - "name": "processedEventsTotal", - "description": null, - "args": [], - "type": { - "kind": "OBJECT", - "name": "ProcessedEventsTotal", - "ofType": null - }, - "isDeprecated": false, - "deprecationReason": null - }, - { - "name": "processedBytesTotal", - "description": null, - "args": [], - "type": { - "kind": "OBJECT", - "name": "ProcessedBytesTotal", - "ofType": null - }, - "isDeprecated": false, - "deprecationReason": null - }, { "name": "receivedEventsTotal", "description": null, @@ -4294,16 +3960,16 @@ "deprecationReason": null }, { - "name": "eventsInTotal", + "name": "sentBytesTotal", "description": null, "args": [], "type": { "kind": "OBJECT", - "name": "EventsInTotal", + "name": "SentBytesTotal", "ofType": null }, - "isDeprecated": true, - "deprecationReason": "Use received_events_total instead" + "isDeprecated": false, + "deprecationReason": null }, { "name": "sentEventsTotal", @@ -4316,18 +3982,6 @@ }, "isDeprecated": false, "deprecationReason": null - }, - { - "name": "eventsOutTotal", - "description": null, - "args": [], - "type": { - "kind": "OBJECT", - "name": "EventsOutTotal", - "ofType": null - }, - "isDeprecated": true, - "deprecationReason": "Use sent_events_total instead" } ], "inputFields": null, @@ -4769,24 +4423,12 @@ "description": null, "fields": [ { - "name": "processedEventsTotal", + "name": "receivedBytesTotal", "description": null, "args": [], "type": { "kind": "OBJECT", - "name": "ProcessedEventsTotal", - "ofType": null - }, - "isDeprecated": false, - "deprecationReason": null - }, - { - "name": "processedBytesTotal", - "description": null, - "args": [], - "type": { - "kind": "OBJECT", - "name": "ProcessedBytesTotal", + "name": "ReceivedBytesTotal", "ofType": null }, "isDeprecated": false, @@ -4804,18 +4446,6 @@ "isDeprecated": false, "deprecationReason": null }, - { - "name": "eventsInTotal", - "description": null, - "args": [], - "type": { - "kind": "OBJECT", - "name": "EventsInTotal", - "ofType": null - }, - "isDeprecated": true, - "deprecationReason": "Use received_events_total instead" - }, { "name": "sentEventsTotal", "description": null, @@ -4827,18 +4457,6 @@ }, "isDeprecated": false, "deprecationReason": null - }, - { - "name": "eventsOutTotal", - "description": null, - "args": [], - "type": { - "kind": "OBJECT", - "name": "EventsOutTotal", - "ofType": null - }, - "isDeprecated": true, - "deprecationReason": "Use sent_events_total instead" } ], "inputFields": null, @@ -5252,146 +4870,6 @@ "isDeprecated": false, "deprecationReason": null }, - { - "name": "processedEventsTotal", - "description": "Event processing metrics.", - "args": [ - { - "name": "interval", - "description": null, - "type": { - "kind": "NON_NULL", - "name": null, - "ofType": { - "kind": "SCALAR", - "name": "Int", - "ofType": null - } - }, - "defaultValue": "1000" - } - ], - "type": { - "kind": "NON_NULL", - "name": null, - "ofType": { - "kind": "OBJECT", - "name": "ProcessedEventsTotal", - "ofType": null - } - }, - "isDeprecated": false, - "deprecationReason": null - }, - { - "name": "processedEventsThroughput", - "description": "Event processing throughput sampled over the provided millisecond `interval`.", - "args": [ - { - "name": "interval", - "description": null, - "type": { - "kind": "NON_NULL", - "name": null, - "ofType": { - "kind": "SCALAR", - "name": "Int", - "ofType": null - } - }, - "defaultValue": "1000" - } - ], - "type": { - "kind": "NON_NULL", - "name": null, - "ofType": { - "kind": "SCALAR", - "name": "Int", - "ofType": null - } - }, - "isDeprecated": false, - "deprecationReason": null - }, - { - "name": "componentProcessedEventsThroughputs", - "description": "Component event processing throughput metrics over `interval`.", - "args": [ - { - "name": "interval", - "description": null, - "type": { - "kind": "NON_NULL", - "name": null, - "ofType": { - "kind": "SCALAR", - "name": "Int", - "ofType": null - } - }, - "defaultValue": "1000" - } - ], - "type": { - "kind": "NON_NULL", - "name": null, - "ofType": { - "kind": "LIST", - "name": null, - "ofType": { - "kind": "NON_NULL", - "name": null, - "ofType": { - "kind": "OBJECT", - "name": "ComponentProcessedEventsThroughput", - "ofType": null - } - } - } - }, - "isDeprecated": false, - "deprecationReason": null - }, - { - "name": "componentProcessedEventsTotals", - "description": "Component event processing metrics over `interval`.", - "args": [ - { - "name": "interval", - "description": null, - "type": { - "kind": "NON_NULL", - "name": null, - "ofType": { - "kind": "SCALAR", - "name": "Int", - "ofType": null - } - }, - "defaultValue": "1000" - } - ], - "type": { - "kind": "NON_NULL", - "name": null, - "ofType": { - "kind": "LIST", - "name": null, - "ofType": { - "kind": "NON_NULL", - "name": null, - "ofType": { - "kind": "OBJECT", - "name": "ComponentProcessedEventsTotal", - "ofType": null - } - } - } - }, - "isDeprecated": false, - "deprecationReason": null - }, { "name": "receivedEventsTotal", "description": "Total received events metrics", @@ -5673,8 +5151,8 @@ "deprecationReason": null }, { - "name": "processedBytesTotal", - "description": "Byte processing metrics.", + "name": "componentReceivedBytesTotals", + "description": "Component bytes received metrics over `interval`.", "args": [ { "name": "interval", @@ -5695,17 +5173,25 @@ "kind": "NON_NULL", "name": null, "ofType": { - "kind": "OBJECT", - "name": "ProcessedBytesTotal", - "ofType": null + "kind": "LIST", + "name": null, + "ofType": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "OBJECT", + "name": "ComponentReceivedBytesTotal", + "ofType": null + } + } } }, "isDeprecated": false, "deprecationReason": null }, { - "name": "processedBytesThroughput", - "description": "Byte processing throughput sampled over a provided millisecond `interval`.", + "name": "componentReceivedBytesThroughputs", + "description": "Component bytes received throughput over `interval`", "args": [ { "name": "interval", @@ -5726,17 +5212,25 @@ "kind": "NON_NULL", "name": null, "ofType": { - "kind": "SCALAR", - "name": "Int", - "ofType": null + "kind": "LIST", + "name": null, + "ofType": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "OBJECT", + "name": "ComponentReceivedBytesThroughput", + "ofType": null + } + } } }, "isDeprecated": false, "deprecationReason": null }, { - "name": "componentProcessedBytesTotals", - "description": "Component byte processing metrics over `interval`.", + "name": "componentSentBytesTotals", + "description": "Component bytes sent metrics over `interval`.", "args": [ { "name": "interval", @@ -5764,7 +5258,7 @@ "name": null, "ofType": { "kind": "OBJECT", - "name": "ComponentProcessedBytesTotal", + "name": "ComponentSentBytesTotal", "ofType": null } } @@ -5774,8 +5268,8 @@ "deprecationReason": null }, { - "name": "componentProcessedBytesThroughputs", - "description": "Component byte processing throughput over `interval`", + "name": "componentSentBytesThroughputs", + "description": "Component bytes sent throughput over `interval`", "args": [ { "name": "interval", @@ -5803,7 +5297,7 @@ "name": null, "ofType": { "kind": "OBJECT", - "name": "ComponentProcessedBytesThroughput", + "name": "ComponentSentBytesThroughput", "ofType": null } } @@ -6614,30 +6108,6 @@ "name": "TransformMetrics", "description": null, "fields": [ - { - "name": "processedEventsTotal", - "description": null, - "args": [], - "type": { - "kind": "OBJECT", - "name": "ProcessedEventsTotal", - "ofType": null - }, - "isDeprecated": false, - "deprecationReason": null - }, - { - "name": "processedBytesTotal", - "description": null, - "args": [], - "type": { - "kind": "OBJECT", - "name": "ProcessedBytesTotal", - "ofType": null - }, - "isDeprecated": false, - "deprecationReason": null - }, { "name": "receivedEventsTotal", "description": null, @@ -6650,18 +6120,6 @@ "isDeprecated": false, "deprecationReason": null }, - { - "name": "eventsInTotal", - "description": null, - "args": [], - "type": { - "kind": "OBJECT", - "name": "EventsInTotal", - "ofType": null - }, - "isDeprecated": true, - "deprecationReason": "Use received_events_total instead" - }, { "name": "sentEventsTotal", "description": null, @@ -6673,18 +6131,6 @@ }, "isDeprecated": false, "deprecationReason": null - }, - { - "name": "eventsOutTotal", - "description": null, - "args": [], - "type": { - "kind": "OBJECT", - "name": "EventsOutTotal", - "ofType": null - }, - "isDeprecated": true, - "deprecationReason": "Use sent_events_total instead" } ], "inputFields": null, diff --git a/lib/vector-api-client/graphql/subscriptions/component_processed_bytes_throughputs.graphql b/lib/vector-api-client/graphql/subscriptions/component_processed_bytes_throughputs.graphql deleted file mode 100644 index 4c9229b173927..0000000000000 --- a/lib/vector-api-client/graphql/subscriptions/component_processed_bytes_throughputs.graphql +++ /dev/null @@ -1,6 +0,0 @@ -subscription ComponentProcessedBytesThroughputsSubscription($interval: Int!) { - componentProcessedBytesThroughputs(interval: $interval) { - componentId - throughput - } -} diff --git a/lib/vector-api-client/graphql/subscriptions/component_processed_bytes_totals.graphql b/lib/vector-api-client/graphql/subscriptions/component_processed_bytes_totals.graphql deleted file mode 100644 index 9b0862c31b997..0000000000000 --- a/lib/vector-api-client/graphql/subscriptions/component_processed_bytes_totals.graphql +++ /dev/null @@ -1,8 +0,0 @@ -subscription ComponentProcessedBytesTotalsSubscription($interval: Int!) { - componentProcessedBytesTotals(interval: $interval) { - componentId - metric { - processedBytesTotal - } - } -} diff --git a/lib/vector-api-client/graphql/subscriptions/component_processed_events_throughputs.graphql b/lib/vector-api-client/graphql/subscriptions/component_processed_events_throughputs.graphql deleted file mode 100644 index 727c3440a9144..0000000000000 --- a/lib/vector-api-client/graphql/subscriptions/component_processed_events_throughputs.graphql +++ /dev/null @@ -1,6 +0,0 @@ -subscription ComponentProcessedEventsThroughputsSubscription($interval: Int!) { - componentProcessedEventsThroughputs(interval: $interval) { - componentId - throughput - } -} diff --git a/lib/vector-api-client/graphql/subscriptions/component_processed_events_totals.graphql b/lib/vector-api-client/graphql/subscriptions/component_processed_events_totals.graphql deleted file mode 100644 index bbfe1c20b573b..0000000000000 --- a/lib/vector-api-client/graphql/subscriptions/component_processed_events_totals.graphql +++ /dev/null @@ -1,8 +0,0 @@ -subscription ComponentProcessedEventsTotalsSubscription($interval: Int!) { - componentProcessedEventsTotals(interval: $interval) { - componentId - metric { - processedEventsTotal - } - } -} diff --git a/lib/vector-api-client/graphql/subscriptions/component_received_bytes_throughputs.graphql b/lib/vector-api-client/graphql/subscriptions/component_received_bytes_throughputs.graphql new file mode 100644 index 0000000000000..ec1b018d27107 --- /dev/null +++ b/lib/vector-api-client/graphql/subscriptions/component_received_bytes_throughputs.graphql @@ -0,0 +1,6 @@ +subscription ComponentReceivedBytesThroughputsSubscription($interval: Int!) { + componentReceivedBytesThroughputs(interval: $interval) { + componentId + throughput + } +} diff --git a/lib/vector-api-client/graphql/subscriptions/component_received_bytes_totals.graphql b/lib/vector-api-client/graphql/subscriptions/component_received_bytes_totals.graphql new file mode 100644 index 0000000000000..748b8930fb087 --- /dev/null +++ b/lib/vector-api-client/graphql/subscriptions/component_received_bytes_totals.graphql @@ -0,0 +1,8 @@ +subscription ComponentReceivedBytesTotalsSubscription($interval: Int!) { + componentReceivedBytesTotals(interval: $interval) { + componentId + metric { + receivedBytesTotal + } + } +} diff --git a/lib/vector-api-client/graphql/subscriptions/component_sent_bytes_throughputs.graphql b/lib/vector-api-client/graphql/subscriptions/component_sent_bytes_throughputs.graphql new file mode 100644 index 0000000000000..a1b4f54c1a300 --- /dev/null +++ b/lib/vector-api-client/graphql/subscriptions/component_sent_bytes_throughputs.graphql @@ -0,0 +1,6 @@ +subscription ComponentSentBytesThroughputsSubscription($interval: Int!) { + componentSentBytesThroughputs(interval: $interval) { + componentId + throughput + } +} diff --git a/lib/vector-api-client/graphql/subscriptions/component_sent_bytes_totals.graphql b/lib/vector-api-client/graphql/subscriptions/component_sent_bytes_totals.graphql new file mode 100644 index 0000000000000..aa2c2f6dd0832 --- /dev/null +++ b/lib/vector-api-client/graphql/subscriptions/component_sent_bytes_totals.graphql @@ -0,0 +1,8 @@ +subscription ComponentSentBytesTotalsSubscription($interval: Int!) { + componentSentBytesTotals(interval: $interval) { + componentId + metric { + sentBytesTotal + } + } +} diff --git a/lib/vector-api-client/graphql/subscriptions/processed_events_throughput.graphql b/lib/vector-api-client/graphql/subscriptions/processed_events_throughput.graphql deleted file mode 100644 index f6b5b37a29d91..0000000000000 --- a/lib/vector-api-client/graphql/subscriptions/processed_events_throughput.graphql +++ /dev/null @@ -1,3 +0,0 @@ -subscription ProcessedEventsThroughputSubscription($interval: Int!) { - processedEventsThroughput(interval: $interval) -} diff --git a/lib/vector-api-client/graphql/subscriptions/processed_events_total.graphql b/lib/vector-api-client/graphql/subscriptions/processed_events_total.graphql deleted file mode 100644 index d10dde89a6451..0000000000000 --- a/lib/vector-api-client/graphql/subscriptions/processed_events_total.graphql +++ /dev/null @@ -1,5 +0,0 @@ -subscription ProcessedEventsTotalSubscription($interval: Int!) { - processedEventsTotal(interval: $interval) { - processedEventsTotal - } -} diff --git a/lib/vector-api-client/src/gql/components.rs b/lib/vector-api-client/src/gql/components.rs index 4efb95c47316f..00dcb2f8a72e5 100644 --- a/lib/vector-api-client/src/gql/components.rs +++ b/lib/vector-api-client/src/gql/components.rs @@ -70,71 +70,53 @@ impl ComponentsSubscriptionExt for crate::SubscriptionClient { } impl components_query::ComponentsQueryComponentsEdgesNodeOn { - pub fn processed_events_total(&self) -> i64 { + pub fn received_bytes_total(&self) -> i64 { + // This is network bytes received, and only sources can receive events. match self { components_query::ComponentsQueryComponentsEdgesNodeOn::Source(s) => s .metrics - .processed_events_total + .received_bytes_total .as_ref() - .map(|p| p.processed_events_total as i64) - .unwrap_or(0), - components_query::ComponentsQueryComponentsEdgesNodeOn::Transform(t) => t - .metrics - .processed_events_total - .as_ref() - .map(|p| p.processed_events_total as i64) - .unwrap_or(0), - components_query::ComponentsQueryComponentsEdgesNodeOn::Sink(s) => s - .metrics - .processed_events_total - .as_ref() - .map(|p| p.processed_events_total as i64) + .map(|p| p.received_bytes_total as i64) .unwrap_or(0), + components_query::ComponentsQueryComponentsEdgesNodeOn::Transform(_) => 0, + components_query::ComponentsQueryComponentsEdgesNodeOn::Sink(_) => 0, } } - pub fn processed_bytes_total(&self) -> i64 { + pub fn received_events_total(&self) -> i64 { match self { components_query::ComponentsQueryComponentsEdgesNodeOn::Source(s) => s .metrics - .processed_bytes_total + .received_events_total .as_ref() - .map(|p| p.processed_bytes_total as i64) + .map(|p| p.received_events_total as i64) .unwrap_or(0), components_query::ComponentsQueryComponentsEdgesNodeOn::Transform(t) => t .metrics - .processed_bytes_total + .received_events_total .as_ref() - .map(|p| p.processed_bytes_total as i64) + .map(|p| p.received_events_total as i64) .unwrap_or(0), components_query::ComponentsQueryComponentsEdgesNodeOn::Sink(s) => s .metrics - .processed_bytes_total + .received_events_total .as_ref() - .map(|p| p.processed_bytes_total as i64) + .map(|p| p.received_events_total as i64) .unwrap_or(0), } } - pub fn received_events_total(&self) -> i64 { + pub fn sent_bytes_total(&self) -> i64 { + // This is network bytes sent, and only sinks can send out events. match self { - components_query::ComponentsQueryComponentsEdgesNodeOn::Source(s) => s - .metrics - .received_events_total - .as_ref() - .map(|p| p.received_events_total as i64) - .unwrap_or(0), - components_query::ComponentsQueryComponentsEdgesNodeOn::Transform(t) => t - .metrics - .received_events_total - .as_ref() - .map(|p| p.received_events_total as i64) - .unwrap_or(0), + components_query::ComponentsQueryComponentsEdgesNodeOn::Source(_) => 0, + components_query::ComponentsQueryComponentsEdgesNodeOn::Transform(_) => 0, components_query::ComponentsQueryComponentsEdgesNodeOn::Sink(s) => s .metrics - .received_events_total + .sent_bytes_total .as_ref() - .map(|p| p.received_events_total as i64) + .map(|p| p.sent_bytes_total as i64) .unwrap_or(0), } } diff --git a/lib/vector-api-client/src/gql/metrics.rs b/lib/vector-api-client/src/gql/metrics.rs index 516de83a95313..a9df90f7c0bd7 100644 --- a/lib/vector-api-client/src/gql/metrics.rs +++ b/lib/vector-api-client/src/gql/metrics.rs @@ -14,56 +14,6 @@ use crate::BoxedSubscription; )] pub struct UptimeSubscription; -/// ProcessedEventsTotalSubscription contains metrics on the number of events -/// that have been processed by a Vector instance. -#[derive(GraphQLQuery, Debug, Copy, Clone)] -#[graphql( - schema_path = "graphql/schema.json", - query_path = "graphql/subscriptions/processed_events_total.graphql", - response_derives = "Debug" -)] -pub struct ProcessedEventsTotalSubscription; - -/// ProcessedEventsThroughputSubscription contains metrics on the number of events -/// that have been processed between `interval` samples. -#[derive(GraphQLQuery, Debug, Copy, Clone)] -#[graphql( - schema_path = "graphql/schema.json", - query_path = "graphql/subscriptions/processed_events_throughput.graphql", - response_derives = "Debug" -)] -pub struct ProcessedEventsThroughputSubscription; - -/// ProcessedBytesThroughputSubscription contains metrics on the number of bytes -/// that have been processed between `interval` samples. -#[derive(GraphQLQuery, Debug, Copy, Clone)] -#[graphql( - schema_path = "graphql/schema.json", - query_path = "graphql/subscriptions/processed_bytes_throughput.graphql", - response_derives = "Debug" -)] -pub struct ProcessedBytesThroughputSubscription; - -/// ComponentProcessedEventsThroughputsSubscription contains metrics on the number of events -/// that have been processed between `interval` samples, against specific components. -#[derive(GraphQLQuery, Debug, Copy, Clone)] -#[graphql( - schema_path = "graphql/schema.json", - query_path = "graphql/subscriptions/component_processed_events_throughputs.graphql", - response_derives = "Debug" -)] -pub struct ComponentProcessedEventsThroughputsSubscription; - -/// ComponentProcessedEventsTotalsSubscription contains metrics on the number of events -/// that have been processed by a Vector instance, against specific components. -#[derive(GraphQLQuery, Debug, Copy, Clone)] -#[graphql( - schema_path = "graphql/schema.json", - query_path = "graphql/subscriptions/component_processed_events_totals.graphql", - response_derives = "Debug" -)] -pub struct ComponentProcessedEventsTotalsSubscription; - /// ComponentAllocatedBytesSubscription contains metrics on the number of allocated bytes /// that have been processed by a Vector instance, against specific components. #[derive(GraphQLQuery, Debug, Copy, Clone)] @@ -74,25 +24,25 @@ pub struct ComponentProcessedEventsTotalsSubscription; )] pub struct ComponentAllocatedBytesSubscription; -/// ComponentProcessedBytesThroughputsSubscription contains metrics on the number of bytes -/// that have been processed between `interval` samples, against specific components. +/// ComponentReceivedBytesThroughputsSubscription contains metrics on the number of bytes +/// that have been received between `interval` samples, against specific components. #[derive(GraphQLQuery, Debug, Copy, Clone)] #[graphql( schema_path = "graphql/schema.json", - query_path = "graphql/subscriptions/component_processed_bytes_throughputs.graphql", + query_path = "graphql/subscriptions/component_received_bytes_throughputs.graphql", response_derives = "Debug" )] -pub struct ComponentProcessedBytesThroughputsSubscription; +pub struct ComponentReceivedBytesThroughputsSubscription; -/// ComponentProcessedBytesTotalsSubscription contains metrics on the number of bytes -/// that have been processed by a Vector instance, against a specific component. +/// ComponentReceivedBytesTotalsSubscription contains metrics on the number of bytes +/// that have been received by a Vector instance, against a specific component. #[derive(GraphQLQuery, Debug, Copy, Clone)] #[graphql( schema_path = "graphql/schema.json", - query_path = "graphql/subscriptions/component_processed_bytes_totals.graphql", + query_path = "graphql/subscriptions/component_received_bytes_totals.graphql", response_derives = "Debug" )] -pub struct ComponentProcessedBytesTotalsSubscription; +pub struct ComponentReceivedBytesTotalsSubscription; /// ComponentReceivedEventsThroughputsSubscription contains metrics on the number of events /// that have been accepted for processing between `interval` samples, against specific components. @@ -114,6 +64,26 @@ pub struct ComponentReceivedEventsThroughputsSubscription; )] pub struct ComponentReceivedEventsTotalsSubscription; +/// ComponentSentBytesThroughputsSubscription contains metrics on the number of bytes +/// that have been received between `interval` samples, against specific components. +#[derive(GraphQLQuery, Debug, Copy, Clone)] +#[graphql( + schema_path = "graphql/schema.json", + query_path = "graphql/subscriptions/component_sent_bytes_throughputs.graphql", + response_derives = "Debug" +)] +pub struct ComponentSentBytesThroughputsSubscription; + +/// ComponentSentBytesTotalsSubscription contains metrics on the number of bytes +/// that have been received by a Vector instance, against a specific component. +#[derive(GraphQLQuery, Debug, Copy, Clone)] +#[graphql( + schema_path = "graphql/schema.json", + query_path = "graphql/subscriptions/component_sent_bytes_totals.graphql", + response_derives = "Debug" +)] +pub struct ComponentSentBytesTotalsSubscription; + /// ComponentSentEventsThroughputsSubscription contains metrics on the number of events /// that have been emitted between `interval` samples, against specific components. #[derive(GraphQLQuery, Debug, Copy, Clone)] @@ -188,47 +158,17 @@ pub trait MetricsSubscriptionExt { interval: i64, ) -> BoxedSubscription; - /// Executes an events processed metrics subscription. - fn processed_events_total_subscription( + /// Executes a component bytes received totals subscription. + fn component_received_bytes_totals_subscription( &self, interval: i64, - ) -> crate::BoxedSubscription; + ) -> crate::BoxedSubscription; - /// Executes an events processed throughput subscription. - fn processed_events_throughput_subscription( + /// Executes a component bytes received throughput subscription. + fn component_received_bytes_throughputs_subscription( &self, interval: i64, - ) -> crate::BoxedSubscription; - - /// Executes a bytes processed throughput subscription. - fn processed_bytes_throughput_subscription( - &self, - interval: i64, - ) -> crate::BoxedSubscription; - - /// Executes a component events processed totals subscription - fn component_processed_events_totals_subscription( - &self, - interval: i64, - ) -> crate::BoxedSubscription; - - /// Executes a component events processed throughputs subscription. - fn component_processed_events_throughputs_subscription( - &self, - interval: i64, - ) -> crate::BoxedSubscription; - - /// Executes a component bytes processed totals subscription. - fn component_processed_bytes_totals_subscription( - &self, - interval: i64, - ) -> crate::BoxedSubscription; - - /// Executes a component bytes processed throughputs subscription. - fn component_processed_bytes_throughputs_subscription( - &self, - interval: i64, - ) -> crate::BoxedSubscription; + ) -> crate::BoxedSubscription; /// Executes a component received events totals subscription. fn component_received_events_totals_subscription( @@ -242,6 +182,18 @@ pub trait MetricsSubscriptionExt { interval: i64, ) -> crate::BoxedSubscription; + /// Executes a component bytes sent totals subscription. + fn component_sent_bytes_totals_subscription( + &self, + interval: i64, + ) -> crate::BoxedSubscription; + + /// Executes a component bytes sent throughput subscription. + fn component_sent_bytes_throughputs_subscription( + &self, + interval: i64, + ) -> crate::BoxedSubscription; + /// Executes a component events totals subscription. fn component_sent_events_totals_subscription( &self, @@ -268,101 +220,42 @@ impl MetricsSubscriptionExt for crate::SubscriptionClient { self.start::(&request_body) } - /// Executes an events processed metrics subscription. - fn processed_events_total_subscription( - &self, - interval: i64, - ) -> BoxedSubscription { - let request_body = ProcessedEventsTotalSubscription::build_query( - processed_events_total_subscription::Variables { interval }, - ); - - self.start::(&request_body) - } - - /// Executes an events processed throughput subscription. - fn processed_events_throughput_subscription( - &self, - interval: i64, - ) -> BoxedSubscription { - let request_body = ProcessedEventsThroughputSubscription::build_query( - processed_events_throughput_subscription::Variables { interval }, - ); - - self.start::(&request_body) - } - - /// Executes a bytes processed throughput subscription. - fn processed_bytes_throughput_subscription( - &self, - interval: i64, - ) -> BoxedSubscription { - let request_body = ProcessedBytesThroughputSubscription::build_query( - processed_bytes_throughput_subscription::Variables { interval }, - ); - - self.start::(&request_body) - } - - /// Executes an all component events processed totals subscription. - fn component_processed_events_totals_subscription( - &self, - interval: i64, - ) -> BoxedSubscription { - let request_body = ComponentProcessedEventsTotalsSubscription::build_query( - component_processed_events_totals_subscription::Variables { interval }, - ); - - self.start::(&request_body) - } - - /// Executes an all component events processed throughputs subscription. - fn component_processed_events_throughputs_subscription( + /// Executes an all component allocated bytes subscription. + fn component_allocated_bytes_subscription( &self, interval: i64, - ) -> BoxedSubscription { - let request_body = ComponentProcessedEventsThroughputsSubscription::build_query( - component_processed_events_throughputs_subscription::Variables { interval }, + ) -> BoxedSubscription { + let request_body = ComponentAllocatedBytesSubscription::build_query( + component_allocated_bytes_subscription::Variables { interval }, ); - self.start::(&request_body) + self.start::(&request_body) } - /// Executes an all component bytes processed totals subscription. - fn component_processed_bytes_totals_subscription( + /// Executes an all component bytes received totals subscription. + fn component_received_bytes_totals_subscription( &self, interval: i64, - ) -> BoxedSubscription { - let request_body = ComponentProcessedBytesTotalsSubscription::build_query( - component_processed_bytes_totals_subscription::Variables { interval }, + ) -> BoxedSubscription { + let request_body = ComponentReceivedBytesTotalsSubscription::build_query( + component_received_bytes_totals_subscription::Variables { interval }, ); - self.start::(&request_body) + self.start::(&request_body) } - /// Executes an all component bytes processed throughputs subscription. - fn component_processed_bytes_throughputs_subscription( + /// Executes a component bytes received throughput subscription. + fn component_received_bytes_throughputs_subscription( &self, interval: i64, - ) -> BoxedSubscription { - let request_body = ComponentProcessedBytesThroughputsSubscription::build_query( - component_processed_bytes_throughputs_subscription::Variables { interval }, + ) -> BoxedSubscription { + let request_body = ComponentReceivedBytesThroughputsSubscription::build_query( + component_received_bytes_throughputs_subscription::Variables { interval }, ); - self.start::(&request_body) + self.start::(&request_body) } - /// Executes an all component allocated bytes subscription. - fn component_allocated_bytes_subscription( - &self, - interval: i64, - ) -> BoxedSubscription { - let request_body = ComponentAllocatedBytesSubscription::build_query( - component_allocated_bytes_subscription::Variables { interval }, - ); - - self.start::(&request_body) - } /// Executes an all component received events totals subscription. fn component_received_events_totals_subscription( &self, @@ -387,6 +280,30 @@ impl MetricsSubscriptionExt for crate::SubscriptionClient { self.start::(&request_body) } + /// Executes an all component bytes sent totals subscription. + fn component_sent_bytes_totals_subscription( + &self, + interval: i64, + ) -> BoxedSubscription { + let request_body = ComponentSentBytesTotalsSubscription::build_query( + component_sent_bytes_totals_subscription::Variables { interval }, + ); + + self.start::(&request_body) + } + + /// Executes a component bytes sent throughput subscription. + fn component_sent_bytes_throughputs_subscription( + &self, + interval: i64, + ) -> BoxedSubscription { + let request_body = ComponentSentBytesThroughputsSubscription::build_query( + component_sent_bytes_throughputs_subscription::Variables { interval }, + ); + + self.start::(&request_body) + } + /// Executes a component sent events totals subscription. fn component_sent_events_totals_subscription( &self, diff --git a/lib/vector-api-client/tests/queries/file_source_metrics.graphql b/lib/vector-api-client/tests/queries/file_source_metrics.graphql index 2a4cb2aae2fe6..7b9d2afb0f10e 100644 --- a/lib/vector-api-client/tests/queries/file_source_metrics.graphql +++ b/lib/vector-api-client/tests/queries/file_source_metrics.graphql @@ -11,9 +11,6 @@ query FileSourceMetricsQuery($after: String, $before: String, $first: Int, $last edges { node { name - processedEventsTotal { - processedEventsTotal - } receivedEventsTotal { receivedEventsTotal } diff --git a/lib/vector-buffers/Cargo.toml b/lib/vector-buffers/Cargo.toml index 0084fc048d9d7..09de0d0ac6153 100644 --- a/lib/vector-buffers/Cargo.toml +++ b/lib/vector-buffers/Cargo.toml @@ -13,18 +13,18 @@ bytecheck = { version = "0.6.9", default-features = false, features = ["std"] } bytes = { version = "1.4.0", default-features = false } crc32fast = { version = "1.3.2", default-features = false } crossbeam-queue = { version = "0.3.8", default-features = false, features = ["std"] } -crossbeam-utils = { version = "0.8.15", default-features = false } +crossbeam-utils = { version = "0.8.16", default-features = false } fslock = { version = "0.2.1", default-features = false, features = ["std"] } futures = { version = "0.3.28", default-features = false, features = ["std"] } -memmap2 = { version = "0.6.1", default-features = false } +memmap2 = { version = "0.7.1", default-features = false } metrics = "0.21.0" num-traits = { version = "0.2.15", default-features = false } -pin-project = { version = "1.1.0", default-features = false } +pin-project = { version = "1.1.1", default-features = false } rkyv = { version = "0.7.40", default-features = false, features = ["size_32", "std", "strict", "validation"] } -serde = { version = "1.0.163", default-features = false, features = ["derive"] } +serde = { version = "1.0.164", default-features = false, features = ["derive"] } snafu = { version = "0.7.4", default-features = false, features = ["std"] } tokio-util = { version = "0.7.0", default-features = false } -tokio = { version = "1.28.1", default-features = false, features = ["rt", "macros", "rt-multi-thread", "sync", "fs", "io-util", "time"] } +tokio = { version = "1.29.0", default-features = false, features = ["rt", "macros", "rt-multi-thread", "sync", "fs", "io-util", "time"] } tracing = { version = "0.1.34", default-features = false, features = ["attributes"] } vector-config = { path = "../vector-config", default-features = false } vector-config-common = { path = "../vector-config-common", default-features = false } @@ -33,13 +33,13 @@ vector-common = { path = "../vector-common", default-features = false, features [dev-dependencies] clap = "4.1.14" -criterion = { version = "0.4", features = ["html_reports", "async_tokio"] } +criterion = { version = "0.5", features = ["html_reports", "async_tokio"] } crossbeam-queue = "0.3.8" hdrhistogram = "7.5.2" metrics-tracing-context = { version = "0.14.0", default-features = false } metrics-util = { version = "0.15.0", default-features = false, features = ["debugging"] } -once_cell = "1.17" -proptest = "1.1" +once_cell = "1.18" +proptest = "1.2" quickcheck = "1.0" rand = "0.8.5" serde_yaml = { version = "0.9", default-features = false } diff --git a/lib/vector-buffers/src/lib.rs b/lib/vector-buffers/src/lib.rs index 152cb3f9795f0..e478b2285e8db 100644 --- a/lib/vector-buffers/src/lib.rs +++ b/lib/vector-buffers/src/lib.rs @@ -10,7 +10,6 @@ #![allow(clippy::type_complexity)] // long-types happen, especially in async code #![allow(clippy::must_use_candidate)] #![allow( - clippy::missing_fields_in_debug, clippy::missing_panics_doc, clippy::redundant_async_block, clippy::redundant_closure_call diff --git a/lib/vector-buffers/src/variants/disk_v2/reader.rs b/lib/vector-buffers/src/variants/disk_v2/reader.rs index 42bdcefdab59f..e2ad6a81a941c 100644 --- a/lib/vector-buffers/src/variants/disk_v2/reader.rs +++ b/lib/vector-buffers/src/variants/disk_v2/reader.rs @@ -901,12 +901,8 @@ where while self.last_reader_record_id < ledger_last { match self.next().await { Ok(maybe_record) => { - if maybe_record.is_none() && self.last_reader_record_id == 0 { - // We've hit a point where there's no more data to read. If our "last reader record - // ID" hasn't moved at all, that means the buffer was already empty and we're caught - // up, so we just pin ourselves to where the ledger says we left off, and we're good - // to go. - self.last_reader_record_id = ledger_last; + if maybe_record.is_none() { + // We've hit the end of the current data file so we've gone as far as we can. break; } } diff --git a/lib/vector-buffers/src/variants/disk_v2/tests/initialization.rs b/lib/vector-buffers/src/variants/disk_v2/tests/initialization.rs index 8c44937563170..217057d3e0c7e 100644 --- a/lib/vector-buffers/src/variants/disk_v2/tests/initialization.rs +++ b/lib/vector-buffers/src/variants/disk_v2/tests/initialization.rs @@ -87,3 +87,98 @@ async fn reader_doesnt_block_from_partial_write_on_last_record() { let parent = trace_span!("reader_doesnt_block_from_partial_write_on_last_record"); fut.instrument(parent.or_current()).await; } + +#[tokio::test] +async fn reader_doesnt_block_when_ahead_of_last_record_in_current_data_file() { + // When initializing, the reader will be catching up to the last record it read, which involves + // reading individual records in the current reader data file until a record is returned whose + // record ID matches the "last record ID read" field from the ledger. + // + // If the current data file contains a valid last record when we initialize, but that last + // record is _behind_ the last record read as tracked by the ledger, then we need to ensure we + // can break out of the catch-up loop when we get to the end of the current data file. + // + // Our existing logic for corrupted event detection, and the writer's own initialization logic, + // will emit an error message when we realize that data is missing based on record ID gaps. + let _a = install_tracing_helpers(); + + let fut = with_temp_dir(|dir| { + let data_dir = dir.to_path_buf(); + + async move { + // Create a regular buffer, no customizations required. + let (mut writer, mut reader, ledger) = create_default_buffer_v2(data_dir.clone()).await; + + // Write two records, and then read and acknowledge both. + // + // This puts the buffer into a state where there's data in the current data file, and + // the ledger has a non-zero record ID for where it thinks the reader needs to be. This + // ensures that the reader actually does at least two calls to `Reader::next` during + // `Reader::seek_to_next_record`, which is necessary to ensure that the reader leaves + // the default state of `self.last_reader_record_id == 0`. + let first_bytes_written = writer + .write_record(SizedRecord::new(64)) + .await + .expect("should not fail to write"); + writer.flush().await.expect("flush should not fail"); + + let second_bytes_written = writer + .write_record(SizedRecord::new(68)) + .await + .expect("should not fail to write"); + writer.flush().await.expect("flush should not fail"); + + writer.close(); + + let first_read = reader + .next() + .await + .expect("should not fail to read record") + .expect("should contain first record"); + assert_eq!(SizedRecord::new(64), first_read); + acknowledge(first_read).await; + + let second_read = reader + .next() + .await + .expect("should not fail to read record") + .expect("should contain first record"); + assert_eq!(SizedRecord::new(68), second_read); + acknowledge(second_read).await; + + let third_read = reader.next().await.expect("should not fail to read record"); + assert!(third_read.is_none()); + + ledger.flush().expect("should not fail to flush ledger"); + + // Grab the current writer data file path before dropping the buffer. + let data_file_path = ledger.get_current_writer_data_file_path(); + drop(reader); + drop(writer); + drop(ledger); + + // Open the data file and truncate the second record. This will ensure that the reader + // hits EOF after the first read, which we need to do in order to exercise the logic + // that breaks out of the loop. + let initial_len = first_bytes_written as u64 + second_bytes_written as u64; + let target_len = first_bytes_written as u64; + set_file_length(&data_file_path, initial_len, target_len) + .await + .expect("should not fail to truncate data file"); + + // Now reopen the buffer, which should complete in a timely fashion without an immediate error. + let reopen = timeout( + Duration::from_millis(500), + create_default_buffer_v2::<_, SizedRecord>(data_dir), + ) + .await; + assert!( + reopen.is_ok(), + "failed to reopen buffer in a timely fashion; likely deadlock" + ); + } + }); + + let parent = trace_span!("reader_doesnt_block_when_ahead_of_last_record_in_current_data_file"); + fut.instrument(parent.or_current()).await; +} diff --git a/lib/vector-buffers/src/variants/disk_v2/tests/invariants.rs b/lib/vector-buffers/src/variants/disk_v2/tests/invariants.rs index ef56bb19d3f4e..29adb49f17243 100644 --- a/lib/vector-buffers/src/variants/disk_v2/tests/invariants.rs +++ b/lib/vector-buffers/src/variants/disk_v2/tests/invariants.rs @@ -5,7 +5,8 @@ use tracing::Instrument; use super::{create_buffer_v2_with_max_data_file_size, read_next, read_next_some}; use crate::{ assert_buffer_is_empty, assert_buffer_records, assert_buffer_size, assert_enough_bytes_written, - assert_reader_writer_v2_file_positions, await_timeout, set_data_file_length, + assert_reader_last_writer_next_positions, assert_reader_writer_v2_file_positions, + await_timeout, set_data_file_length, test::{acknowledge, install_tracing_helpers, with_temp_dir, MultiEventRecord, SizedRecord}, variants::disk_v2::{ common::{DEFAULT_FLUSH_INTERVAL, MAX_FILE_ID}, @@ -820,3 +821,110 @@ async fn writer_updates_ledger_when_buffered_writer_reports_implicit_flush() { }) .await; } + +#[tokio::test] +async fn reader_writer_positions_aligned_through_multiple_files_and_records() { + // This test ensures that the reader/writer position stay aligned through multiple records and + // data files. This is to say, that, if we write 5 records, each with 10 events, and then read + // and acknowledge all of those events... the writer's next record ID should be 51 (the 50th + // event would correspond to ID 50, so next ID would be 51) and the reader's last read record ID + // should be 50. + // + // Testing this across multiple data files isn't super germane to the position logic, but it + // just ensures we're also testing that aspect. + + let _a = install_tracing_helpers(); + let fut = with_temp_dir(|dir| { + let data_dir = dir.to_path_buf(); + + async move { + // Create our buffer with an arbitrarily low maximum data file size. We'll use this to + // control how many records make it into a given data file. Just another way to ensure + // we're testing the position logic with multiple writes to one data file, one write to + // a data file, etc. + let (mut writer, mut reader, ledger) = + create_buffer_v2_with_max_data_file_size(data_dir, 256).await; + + // We'll write multi-event records with N events based on these sizes, and as we do so, + // we'll assert that our writer position moves as expected after the write, and that + // after reading and acknowledging, the reader position also moves as expected. + let record_sizes = &[176, 52, 91, 137, 54, 87]; + + let mut expected_writer_position = ledger.state().get_next_writer_record_id(); + let mut expected_reader_position = ledger.state().get_last_reader_record_id(); + let mut trailing_reader_position_delta = 0; + + for record_size in record_sizes { + // Initial check before writing/reading the next record. + assert_reader_last_writer_next_positions!( + ledger, + expected_reader_position, + expected_writer_position + ); + + let record = MultiEventRecord::new(*record_size); + assert_eq!( + record.event_count(), + usize::try_from(*record_size).unwrap_or(usize::MAX) + ); + + writer + .write_record(record) + .await + .expect("write should not fail"); + writer.flush().await.expect("flush should not fail"); + + expected_writer_position += u64::from(*record_size); + + // Make sure the writer position advanced after flushing. + assert_reader_last_writer_next_positions!( + ledger, + expected_reader_position, + expected_writer_position + ); + + let record_via_read = read_next_some(&mut reader).await; + assert_eq!(record_via_read, MultiEventRecord::new(*record_size)); + acknowledge(record_via_read).await; + + // Increment the expected reader position by the trailing reader position delta, and + // then now that we've done a read, we should be able to have seen actually move + // forward. + expected_reader_position += trailing_reader_position_delta; + assert_reader_last_writer_next_positions!( + ledger, + expected_reader_position, + expected_writer_position + ); + + // Set the trailing reader position delta to the record we just read. + // + // We do it this way because reads themselves have to drive acknowledgement logic to + // then drive updates to the ledger, so we will only see the change in the reader's + // position the _next_ time we do a read. + trailing_reader_position_delta = u64::from(*record_size); + } + + // Close the writer and do a final read, thus driving the acknowledgement logic, and + // position update logic, before we do our final position check. + writer.close(); + assert_eq!(reader.next().await, Ok(None)); + + // Calculate the absolute reader/writer positions we would expect based on all of the + // records/events written and read. This is to double check our work and make sure that + // the "expected" positions didn't hide any bugs from us. + let expected_final_reader_position = + record_sizes.iter().copied().map(u64::from).sum::(); + let expected_final_writer_position = expected_final_reader_position + 1; + + assert_reader_last_writer_next_positions!( + ledger, + expected_final_reader_position, + expected_final_writer_position + ); + } + }); + + let parent = trace_span!("reader_writer_positions_aligned_through_multiple_files_and_records"); + fut.instrument(parent.or_current()).await; +} diff --git a/lib/vector-buffers/src/variants/disk_v2/tests/mod.rs b/lib/vector-buffers/src/variants/disk_v2/tests/mod.rs index e123a39c228d6..8c479827355fd 100644 --- a/lib/vector-buffers/src/variants/disk_v2/tests/mod.rs +++ b/lib/vector-buffers/src/variants/disk_v2/tests/mod.rs @@ -134,6 +134,24 @@ macro_rules! assert_reader_writer_v2_file_positions { }}; } +#[macro_export] +macro_rules! assert_reader_last_writer_next_positions { + ($ledger:expr, $reader_expected:expr, $writer_expected:expr) => {{ + let reader_actual = $ledger.state().get_last_reader_record_id(); + let writer_actual = $ledger.state().get_next_writer_record_id(); + assert_eq!( + $reader_expected, reader_actual, + "expected reader last read record ID of {}, got {} instead", + $reader_expected, reader_actual, + ); + assert_eq!( + $writer_expected, writer_actual, + "expected writer next record ID of {}, got {} instead", + $writer_expected, writer_actual, + ); + }}; +} + #[macro_export] macro_rules! assert_enough_bytes_written { ($written:expr, $record_type:ty, $record_payload_size:expr) => { diff --git a/lib/vector-buffers/src/variants/disk_v2/tests/model/sequencer.rs b/lib/vector-buffers/src/variants/disk_v2/tests/model/sequencer.rs index d00e95ea1e406..270cf7c84e9d6 100644 --- a/lib/vector-buffers/src/variants/disk_v2/tests/model/sequencer.rs +++ b/lib/vector-buffers/src/variants/disk_v2/tests/model/sequencer.rs @@ -25,11 +25,9 @@ impl TrackedFuture { where F: Future + Send + 'static, { - let wrapped = async move { fut.await }; - Self { polled_once: false, - fut: spawn(wrapped.boxed()), + fut: spawn(fut.boxed()), } } diff --git a/lib/vector-common/Cargo.toml b/lib/vector-common/Cargo.toml index 1097df9f0122b..24023729e5104 100644 --- a/lib/vector-common/Cargo.toml +++ b/lib/vector-common/Cargo.toml @@ -43,32 +43,32 @@ tokenize = [ [dependencies] async-stream = "0.3.5" bytes = { version = "1.4.0", default-features = false, optional = true } -chrono-tz = { version = "0.8.2", default-features = false, features = ["serde"] } +chrono-tz = { version = "0.8.3", default-features = false, features = ["serde"] } chrono = { version = "0.4", default-features = false, optional = true, features = ["clock"] } -crossbeam-utils = { version = "0.8.15", default-features = false } -derivative = "2.1.3" +crossbeam-utils = { version = "0.8.16", default-features = false } +derivative = { version = "2.2.0", default-features = false } futures = { version = "0.3.28", default-features = false, features = ["std"] } -indexmap = { version = "~1.9.3", default-features = false } +indexmap = { version = "~2.0.0", default-features = false, features = ["std"] } metrics = "0.21.0" nom = { version = "7", optional = true } ordered-float = { version = "3.7.0", default-features = false } paste = "1.0.12" -pin-project = { version = "1.1.0", default-features = false } +pin-project = { version = "1.1.1", default-features = false } ryu = { version = "1", default-features = false } -serde_json = { version = "1.0.96", default-features = false, features = ["std", "raw_value"] } -serde = { version = "1.0.163", optional = true, features = ["derive"] } +serde_json = { version = "1.0.99", default-features = false, features = ["std", "raw_value"] } +serde = { version = "1.0.164", optional = true, features = ["derive"] } smallvec = { version = "1", default-features = false } snafu = { version = "0.7", optional = true } stream-cancel = { version = "0.8.1", default-features = false } -tokio = { version = "1.28.1", default-features = false, features = ["macros", "time"] } +tokio = { version = "1.29.0", default-features = false, features = ["macros", "time"] } tracing = { version = "0.1.34", default-features = false } -vrl = { git = "ssh://git@github.com/answerbook/vrl.git", rev = "v0.6.0" } +vrl.workspace = true vector-config = { path = "../vector-config" } vector-config-common = { path = "../vector-config-common" } vector-config-macros = { path = "../vector-config-macros" } [dev-dependencies] futures = { version = "0.3.28", default-features = false, features = ["async-await", "std"] } -tokio = { version = "1.28.1", default-features = false, features = ["rt", "time"] } +tokio = { version = "1.29.0", default-features = false, features = ["rt", "time"] } quickcheck = "1" quickcheck_macros = "1" diff --git a/lib/vector-common/src/internal_event/cached_event.rs b/lib/vector-common/src/internal_event/cached_event.rs new file mode 100644 index 0000000000000..e672848c93584 --- /dev/null +++ b/lib/vector-common/src/internal_event/cached_event.rs @@ -0,0 +1,69 @@ +use std::{ + collections::BTreeMap, + sync::{Arc, RwLock}, +}; + +use derivative::Derivative; + +use super::{InternalEventHandle, RegisterInternalEvent}; + +/// Metrics (eg. `component_sent_event_bytes_total`) may need to emit tags based on +/// values contained within the events. These tags can't be determined in advance. +/// +/// Metrics need to be registered and the handle needs to be held onto in order to +/// prevent them from expiring and being dropped (this would result in the counter +/// resetting to zero). +/// `CachedEvent` is used to maintain a store of these registered metrics. When a +/// new event is emitted for a previously unseen set of tags an event is registered +/// and stored in the cache. +#[derive(Derivative)] +#[derivative(Clone(bound = ""), Default(bound = ""))] +pub struct RegisteredEventCache { + cache: Arc< + RwLock< + BTreeMap< + ::Tags, + ::Handle, + >, + >, + >, +} + +/// This trait must be implemented by events that emit dynamic tags. `register` must +/// be implemented to register an event based on the set of tags passed. +pub trait RegisterTaggedInternalEvent: RegisterInternalEvent { + /// The type that will contain the data necessary to extract the tags + /// that will be used when registering the event. + type Tags; + + fn register(tags: Self::Tags) -> ::Handle; +} + +impl RegisteredEventCache +where + Data: Sized, + EventHandle: InternalEventHandle, + Tags: Ord + Clone, + Event: RegisterInternalEvent + RegisterTaggedInternalEvent, +{ + /// Emits the event with the given tags. + /// It will register the event and store in the cache if this has not already + /// been done. + /// + /// # Panics + /// + /// This will panic if the lock is poisoned. + pub fn emit(&self, tags: &Tags, value: Data) { + let read = self.cache.read().unwrap(); + if let Some(event) = read.get(tags) { + event.emit(value); + } else { + let event = ::register(tags.clone()); + event.emit(value); + + // Ensure the read lock is dropped so we can write. + drop(read); + self.cache.write().unwrap().insert(tags.clone(), event); + } + } +} diff --git a/lib/vector-common/src/internal_event/events_received.rs b/lib/vector-common/src/internal_event/events_received.rs index ee9ffa3ef0ee8..c25cc228c9fd1 100644 --- a/lib/vector-common/src/internal_event/events_received.rs +++ b/lib/vector-common/src/internal_event/events_received.rs @@ -7,7 +7,6 @@ crate::registered_event!( EventsReceived => { events_count: Histogram = register_histogram!("component_received_events_count"), events: Counter = register_counter!("component_received_events_total"), - events_in: Counter = register_counter!("events_in_total"), event_bytes: Counter = register_counter!("component_received_event_bytes_total"), } @@ -19,7 +18,6 @@ crate::registered_event!( #[allow(clippy::cast_precision_loss)] self.events_count.record(count as f64); self.events.increment(count as u64); - self.events_in.increment(count as u64); - self.event_bytes.increment(byte_size as u64); + self.event_bytes.increment(byte_size.get() as u64); } ); diff --git a/lib/vector-common/src/internal_event/events_sent.rs b/lib/vector-common/src/internal_event/events_sent.rs index 061d11b6527af..d12a22bf17e8a 100644 --- a/lib/vector-common/src/internal_event/events_sent.rs +++ b/lib/vector-common/src/internal_event/events_sent.rs @@ -1,7 +1,11 @@ +use std::sync::Arc; + use metrics::{register_counter, Counter}; use tracing::trace; -use super::{CountByteSize, Output, SharedString}; +use crate::{config::ComponentKey, request_metadata::EventCountTags}; + +use super::{CountByteSize, OptionalTag, Output, SharedString}; pub const DEFAULT_OUTPUT: &str = "_default"; @@ -14,11 +18,6 @@ crate::registered_event!( } else { register_counter!("component_sent_events_total") }, - events_out: Counter = if let Some(output) = &self.output { - register_counter!("events_out_total", "output" => output.clone()) - } else { - register_counter!("events_out_total") - }, event_bytes: Counter = if let Some(output) = &self.output { register_counter!("component_sent_event_bytes_total", "output" => output.clone()) } else { @@ -32,16 +31,15 @@ crate::registered_event!( match &self.output { Some(output) => { - trace!(message = "Events sent.", count = %count, byte_size = %byte_size, output = %output); + trace!(message = "Events sent.", count = %count, byte_size = %byte_size.get(), output = %output); } None => { - trace!(message = "Events sent.", count = %count, byte_size = %byte_size); + trace!(message = "Events sent.", count = %count, byte_size = %byte_size.get()); } } self.events.increment(count as u64); - self.events_out.increment(count as u64); - self.event_bytes.increment(byte_size as u64); + self.event_bytes.increment(byte_size.get() as u64); } ); @@ -50,3 +48,62 @@ impl From for EventsSent { Self { output: output.0 } } } + +/// Makes a list of the tags to use with the events sent event. +fn make_tags( + source: &OptionalTag>, + service: &OptionalTag, +) -> Vec<(&'static str, String)> { + let mut tags = Vec::new(); + if let OptionalTag::Specified(tag) = source { + tags.push(( + "source", + tag.as_ref() + .map_or_else(|| "-".to_string(), |tag| tag.id().to_string()), + )); + } + + if let OptionalTag::Specified(tag) = service { + tags.push(("service", tag.clone().unwrap_or("-".to_string()))); + } + + tags +} + +crate::registered_event!( + TaggedEventsSent { + source: OptionalTag>, + service: OptionalTag, + } => { + events: Counter = { + register_counter!("component_sent_events_total", &make_tags(&self.source, &self.service)) + }, + event_bytes: Counter = { + register_counter!("component_sent_event_bytes_total", &make_tags(&self.source, &self.service)) + }, + } + + fn emit(&self, data: CountByteSize) { + let CountByteSize(count, byte_size) = data; + trace!(message = "Events sent.", %count, %byte_size); + + self.events.increment(count as u64); + self.event_bytes.increment(byte_size.get() as u64); + } + + fn register(tags: EventCountTags) { + super::register(TaggedEventsSent::new( + tags, + )) + } +); + +impl TaggedEventsSent { + #[must_use] + pub fn new(tags: EventCountTags) -> Self { + Self { + source: tags.source, + service: tags.service, + } + } +} diff --git a/lib/vector-common/src/internal_event/mod.rs b/lib/vector-common/src/internal_event/mod.rs index d7d2db1a174b2..2915c88797bf8 100644 --- a/lib/vector-common/src/internal_event/mod.rs +++ b/lib/vector-common/src/internal_event/mod.rs @@ -1,22 +1,31 @@ mod bytes_received; mod bytes_sent; +mod cached_event; pub mod component_events_dropped; mod events_received; mod events_sent; +mod optional_tag; mod prelude; pub mod service; pub mod usage_metrics; +use std::ops::{Add, AddAssign}; + pub use metrics::SharedString; pub use bytes_received::BytesReceived; pub use bytes_sent::BytesSent; +#[allow(clippy::module_name_repetitions)] +pub use cached_event::{RegisterTaggedInternalEvent, RegisteredEventCache}; pub use component_events_dropped::{ComponentEventsDropped, INTENTIONAL, UNINTENTIONAL}; pub use events_received::EventsReceived; -pub use events_sent::{EventsSent, DEFAULT_OUTPUT}; +pub use events_sent::{EventsSent, TaggedEventsSent, DEFAULT_OUTPUT}; +pub use optional_tag::OptionalTag; pub use prelude::{error_stage, error_type}; pub use service::{CallError, PollReadyError}; +use crate::json_size::JsonSize; + pub trait InternalEvent: Sized { fn emit(self); @@ -107,9 +116,24 @@ pub struct ByteSize(pub usize); #[derive(Clone, Copy)] pub struct Count(pub usize); -/// Holds the tuple `(count_of_events, size_of_events_in_bytes)`. -#[derive(Clone, Copy)] -pub struct CountByteSize(pub usize, pub usize); +/// Holds the tuple `(count_of_events, estimated_json_size_of_events)`. +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct CountByteSize(pub usize, pub JsonSize); + +impl AddAssign for CountByteSize { + fn add_assign(&mut self, rhs: Self) { + self.0 += rhs.0; + self.1 += rhs.1; + } +} + +impl Add for CountByteSize { + type Output = CountByteSize; + + fn add(self, rhs: CountByteSize) -> Self::Output { + CountByteSize(self.0 + rhs.0, self.1 + rhs.1) + } +} // Wrapper types used to hold parameters for registering events @@ -132,6 +156,12 @@ impl From<&'static str> for Protocol { } } +impl From for SharedString { + fn from(value: Protocol) -> Self { + value.0 + } +} + /// Macro to take care of some of the repetitive boilerplate in implementing a registered event. See /// the other events in this module for examples of how to use this. /// @@ -189,6 +219,9 @@ macro_rules! registered_event { fn emit(&$slf:ident, $data_name:ident: $data:ident) $emit_body:block + + $(fn register($tags_name:ident: $tags:ty) + $register_body:block)? ) => { paste::paste!{ #[derive(Clone)] @@ -216,6 +249,17 @@ macro_rules! registered_event { fn emit(&$slf, $data_name: $data) $emit_body } + + $(impl $crate::internal_event::cached_event::RegisterTaggedInternalEvent for $event { + type Tags = $tags; + + fn register( + $tags_name: $tags, + ) -> ::Handle { + $register_body + } + })? + } }; } diff --git a/lib/vector-common/src/internal_event/optional_tag.rs b/lib/vector-common/src/internal_event/optional_tag.rs new file mode 100644 index 0000000000000..400bc554630d1 --- /dev/null +++ b/lib/vector-common/src/internal_event/optional_tag.rs @@ -0,0 +1,14 @@ +/// The user can configure whether a tag should be emitted. If they configure it to +/// be emitted, but the value doesn't exist - we should emit the tag but with a value +/// of `-`. +#[derive(Clone, Debug, PartialEq, PartialOrd, Eq, Ord, Hash)] +pub enum OptionalTag { + Ignored, + Specified(Option), +} + +impl From> for OptionalTag { + fn from(value: Option) -> Self { + Self::Specified(value) + } +} diff --git a/lib/vector-common/src/json_size.rs b/lib/vector-common/src/json_size.rs new file mode 100644 index 0000000000000..746b6335716d1 --- /dev/null +++ b/lib/vector-common/src/json_size.rs @@ -0,0 +1,105 @@ +use std::{ + fmt, + iter::Sum, + ops::{Add, AddAssign, Sub}, +}; + +/// A newtype for the JSON size of an event. +/// Used to emit the `component_received_event_bytes_total` and +/// `component_sent_event_bytes_total` metrics. +#[derive(Clone, Copy, Default, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct JsonSize(usize); + +impl fmt::Display for JsonSize { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl Sub for JsonSize { + type Output = JsonSize; + + #[inline] + fn sub(mut self, rhs: Self) -> Self::Output { + self.0 -= rhs.0; + self + } +} + +impl Add for JsonSize { + type Output = JsonSize; + + #[inline] + fn add(mut self, rhs: Self) -> Self::Output { + self.0 += rhs.0; + self + } +} + +impl AddAssign for JsonSize { + #[inline] + fn add_assign(&mut self, rhs: Self) { + self.0 += rhs.0; + } +} + +impl Sum for JsonSize { + #[inline] + fn sum>(iter: I) -> Self { + let mut accum = 0; + for val in iter { + accum += val.get(); + } + + JsonSize::new(accum) + } +} + +impl From for JsonSize { + #[inline] + fn from(value: usize) -> Self { + Self(value) + } +} + +impl JsonSize { + /// Create a new instance with the specified size. + #[must_use] + #[inline] + pub const fn new(size: usize) -> Self { + Self(size) + } + + /// Create a new instance with size 0. + #[must_use] + #[inline] + pub const fn zero() -> Self { + Self(0) + } + + /// Returns the contained size. + #[must_use] + #[inline] + pub fn get(&self) -> usize { + self.0 + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +#[allow(clippy::module_name_repetitions)] +pub struct NonZeroJsonSize(JsonSize); + +impl NonZeroJsonSize { + #[must_use] + #[inline] + pub fn new(size: JsonSize) -> Option { + (size.0 > 0).then_some(NonZeroJsonSize(size)) + } +} + +impl From for JsonSize { + #[inline] + fn from(value: NonZeroJsonSize) -> Self { + value.0 + } +} diff --git a/lib/vector-common/src/lib.rs b/lib/vector-common/src/lib.rs index 7e092982712e4..49a4ffd9151be 100644 --- a/lib/vector-common/src/lib.rs +++ b/lib/vector-common/src/lib.rs @@ -11,20 +11,16 @@ #![deny(unused_extern_crates)] #![deny(unused_assignments)] #![deny(unused_comparisons)] -#![allow(clippy::default_constructed_unit_structs)] -#![allow( - clippy::default_constructed_unit_structs, - clippy::explicit_iter_loop, - clippy::needless_pub_self, - clippy::useless_conversion -)] +#![allow(clippy::explicit_iter_loop, clippy::useless_conversion)] #[cfg(feature = "btreemap")] -pub use vrl::value::btreemap; +pub use vrl::btreemap; #[cfg(feature = "byte_size_of")] pub mod byte_size_of; +pub mod json_size; + pub mod config; #[cfg(feature = "conversion")] diff --git a/lib/vector-common/src/request_metadata.rs b/lib/vector-common/src/request_metadata.rs index be68c319dcadf..9b93a63df7626 100644 --- a/lib/vector-common/src/request_metadata.rs +++ b/lib/vector-common/src/request_metadata.rs @@ -1,14 +1,215 @@ use std::ops::Add; +use std::{collections::HashMap, sync::Arc}; + +use crate::{ + config::ComponentKey, + internal_event::{ + CountByteSize, InternalEventHandle, OptionalTag, RegisterTaggedInternalEvent, + RegisteredEventCache, + }, + json_size::JsonSize, +}; + +/// Tags that are used to group the events within a batch for emitting telemetry. +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct EventCountTags { + pub source: OptionalTag>, + pub service: OptionalTag, +} + +impl EventCountTags { + #[must_use] + pub fn new_empty() -> Self { + Self { + source: OptionalTag::Specified(None), + service: OptionalTag::Specified(None), + } + } + + #[must_use] + pub fn new_unspecified() -> Self { + Self { + source: OptionalTag::Ignored, + service: OptionalTag::Ignored, + } + } +} + +/// Must be implemented by events to get the tags that will be attached to +/// the `component_sent_event_*` emitted metrics. +pub trait GetEventCountTags { + fn get_tags(&self) -> EventCountTags; +} + +/// Keeps track of the estimated json size of a given batch of events by +/// source and service. +#[derive(Clone, Debug)] +pub enum GroupedCountByteSize { + /// When we need to keep track of the events by certain tags we use this + /// variant. + Tagged { + sizes: HashMap, + }, + /// If we don't need to track the events by certain tags we can use + /// this variant to avoid allocating a `HashMap`, + Untagged { size: CountByteSize }, +} + +impl Default for GroupedCountByteSize { + fn default() -> Self { + Self::Untagged { + size: CountByteSize(0, JsonSize::zero()), + } + } +} + +impl GroupedCountByteSize { + /// Creates a new Tagged variant for when we need to track events by + /// certain tags. + #[must_use] + pub fn new_tagged() -> Self { + Self::Tagged { + sizes: HashMap::new(), + } + } + + /// Creates a new Tagged variant for when we do not need to track events by + /// tags. + #[must_use] + pub fn new_untagged() -> Self { + Self::Untagged { + size: CountByteSize(0, JsonSize::zero()), + } + } + + /// Returns a `HashMap` of tags => event counts for when we are tracking by tags. + /// Returns `None` if we are not tracking by tags. + #[must_use] + #[cfg(test)] + pub fn sizes(&self) -> Option<&HashMap> { + match self { + Self::Tagged { sizes } => Some(sizes), + Self::Untagged { .. } => None, + } + } + + /// Returns a single count for when we are not tracking by tags. + #[must_use] + #[cfg(test)] + fn size(&self) -> Option { + match self { + Self::Tagged { .. } => None, + Self::Untagged { size } => Some(*size), + } + } + + /// Adds the given estimated json size of the event to current count. + pub fn add_event(&mut self, event: &E, json_size: JsonSize) + where + E: GetEventCountTags, + { + match self { + Self::Tagged { sizes } => { + let size = CountByteSize(1, json_size); + let tags = event.get_tags(); + + match sizes.get_mut(&tags) { + Some(current) => { + *current += size; + } + None => { + sizes.insert(tags, size); + } + } + } + Self::Untagged { size } => { + *size += CountByteSize(1, json_size); + } + } + } + + /// Emits our counts to a `RegisteredEvent` cached event. + pub fn emit_event(&self, event_cache: &RegisteredEventCache) + where + T: RegisterTaggedInternalEvent, + H: InternalEventHandle, + { + match self { + GroupedCountByteSize::Tagged { sizes } => { + for (tags, size) in sizes { + event_cache.emit(tags, *size); + } + } + GroupedCountByteSize::Untagged { size } => { + event_cache.emit(&EventCountTags::new_unspecified(), *size); + } + } + } +} + +impl From for GroupedCountByteSize { + fn from(value: CountByteSize) -> Self { + Self::Untagged { size: value } + } +} + +impl<'a> Add<&'a GroupedCountByteSize> for GroupedCountByteSize { + type Output = GroupedCountByteSize; + + fn add(self, other: &'a Self::Output) -> Self::Output { + match (self, other) { + (Self::Tagged { sizes: mut us }, Self::Tagged { sizes: them }) => { + for (key, value) in them { + match us.get_mut(key) { + Some(size) => *size += *value, + None => { + us.insert(key.clone(), *value); + } + } + } + + Self::Tagged { sizes: us } + } + + (Self::Untagged { size: us }, Self::Untagged { size: them }) => { + Self::Untagged { size: us + *them } + } + + // The following two scenarios shouldn't really occur in practice, but are provided for completeness. + (Self::Tagged { mut sizes }, Self::Untagged { size }) => { + match sizes.get_mut(&EventCountTags::new_empty()) { + Some(empty_size) => *empty_size += *size, + None => { + sizes.insert(EventCountTags::new_empty(), *size); + } + } + + Self::Tagged { sizes } + } + (Self::Untagged { size }, Self::Tagged { sizes }) => { + let mut sizes = sizes.clone(); + match sizes.get_mut(&EventCountTags::new_empty()) { + Some(empty_size) => *empty_size += size, + None => { + sizes.insert(EventCountTags::new_empty(), size); + } + } + + Self::Tagged { sizes } + } + } + } +} /// Metadata for batch requests. -#[derive(Clone, Copy, Debug, Default)] +#[derive(Clone, Debug, Default)] pub struct RequestMetadata { /// Number of events represented by this batch request. event_count: usize, /// Size, in bytes, of the in-memory representation of all events in this batch request. events_byte_size: usize, /// Size, in bytes, of the estimated JSON-encoded representation of all events in this batch request. - events_estimated_json_encoded_byte_size: usize, + events_estimated_json_encoded_byte_size: GroupedCountByteSize, /// Uncompressed size, in bytes, of the encoded events in this batch request. request_encoded_size: usize, /// On-the-wire size, in bytes, of the batch request itself after compression, etc. @@ -17,7 +218,6 @@ pub struct RequestMetadata { request_wire_size: usize, } -// TODO: Make this struct the object which emits the actual internal telemetry i.e. events sent, bytes sent, etc. impl RequestMetadata { #[must_use] pub fn new( @@ -25,7 +225,7 @@ impl RequestMetadata { events_byte_size: usize, request_encoded_size: usize, request_wire_size: usize, - events_estimated_json_encoded_byte_size: usize, + events_estimated_json_encoded_byte_size: GroupedCountByteSize, ) -> Self { Self { event_count, @@ -47,7 +247,14 @@ impl RequestMetadata { } #[must_use] - pub const fn events_estimated_json_encoded_byte_size(&self) -> usize { + pub fn events_estimated_json_encoded_byte_size(&self) -> &GroupedCountByteSize { + &self.events_estimated_json_encoded_byte_size + } + + /// Consumes the object and returns the byte size of the request grouped by + /// the tags (source and service). + #[must_use] + pub fn into_events_estimated_json_encoded_byte_size(self) -> GroupedCountByteSize { self.events_estimated_json_encoded_byte_size } @@ -64,7 +271,7 @@ impl RequestMetadata { /// Constructs a `RequestMetadata` by summation of the "batch" of `RequestMetadata` provided. #[must_use] pub fn from_batch>(metadata_iter: T) -> Self { - let mut metadata_sum = RequestMetadata::new(0, 0, 0, 0, 0); + let mut metadata_sum = RequestMetadata::new(0, 0, 0, 0, GroupedCountByteSize::default()); for metadata in metadata_iter { metadata_sum = metadata_sum + &metadata; @@ -82,7 +289,7 @@ impl<'a> Add<&'a RequestMetadata> for RequestMetadata { event_count: self.event_count + other.event_count, events_byte_size: self.events_byte_size + other.events_byte_size, events_estimated_json_encoded_byte_size: self.events_estimated_json_encoded_byte_size - + other.events_estimated_json_encoded_byte_size, + + &other.events_estimated_json_encoded_byte_size, request_encoded_size: self.request_encoded_size + other.request_encoded_size, request_wire_size: self.request_wire_size + other.request_wire_size, } @@ -92,5 +299,102 @@ impl<'a> Add<&'a RequestMetadata> for RequestMetadata { /// Objects implementing this trait have metadata that describes the request. pub trait MetaDescriptive { /// Returns the `RequestMetadata` associated with this object. - fn get_metadata(&self) -> RequestMetadata; + fn get_metadata(&self) -> &RequestMetadata; + + // Returns a mutable reference to the `RequestMetadata` associated with this object. + fn metadata_mut(&mut self) -> &mut RequestMetadata; +} + +#[cfg(test)] +mod tests { + use super::*; + + struct DummyEvent { + source: OptionalTag>, + service: OptionalTag, + } + + impl GetEventCountTags for DummyEvent { + fn get_tags(&self) -> EventCountTags { + EventCountTags { + source: self.source.clone(), + service: self.service.clone(), + } + } + } + + #[test] + fn add_request_count_bytesize_event_untagged() { + let mut bytesize = GroupedCountByteSize::new_untagged(); + let event = DummyEvent { + source: Some(Arc::new(ComponentKey::from("carrot"))).into(), + service: Some("cabbage".to_string()).into(), + }; + + bytesize.add_event(&event, JsonSize::new(42)); + + let event = DummyEvent { + source: Some(Arc::new(ComponentKey::from("pea"))).into(), + service: Some("potato".to_string()).into(), + }; + + bytesize.add_event(&event, JsonSize::new(36)); + + assert_eq!(Some(CountByteSize(2, JsonSize::new(78))), bytesize.size()); + assert_eq!(None, bytesize.sizes()); + } + + #[test] + fn add_request_count_bytesize_event_tagged() { + let mut bytesize = GroupedCountByteSize::new_tagged(); + let event = DummyEvent { + source: OptionalTag::Ignored, + service: Some("cabbage".to_string()).into(), + }; + + bytesize.add_event(&event, JsonSize::new(42)); + + let event = DummyEvent { + source: OptionalTag::Ignored, + service: Some("cabbage".to_string()).into(), + }; + + bytesize.add_event(&event, JsonSize::new(36)); + + let event = DummyEvent { + source: OptionalTag::Ignored, + service: Some("tomato".to_string()).into(), + }; + + bytesize.add_event(&event, JsonSize::new(23)); + + assert_eq!(None, bytesize.size()); + let mut sizes = bytesize + .sizes() + .unwrap() + .clone() + .into_iter() + .collect::>(); + sizes.sort(); + + assert_eq!( + vec![ + ( + EventCountTags { + source: OptionalTag::Ignored, + service: Some("cabbage".to_string()).into() + }, + CountByteSize(2, JsonSize::new(78)) + ), + ( + EventCountTags { + source: OptionalTag::Ignored, + service: Some("tomato".to_string()).into() + }, + CountByteSize(1, JsonSize::new(23)) + ), + ], + sizes + ); + } } diff --git a/lib/vector-common/src/shutdown.rs b/lib/vector-common/src/shutdown.rs index 0e19e5461137c..bb2e6e2a6a8ea 100644 --- a/lib/vector-common/src/shutdown.rs +++ b/lib/vector-common/src/shutdown.rs @@ -122,9 +122,11 @@ impl ShutdownSignal { } } +type IsInternal = bool; + #[derive(Debug, Default)] pub struct SourceShutdownCoordinator { - shutdown_begun_triggers: HashMap, + shutdown_begun_triggers: HashMap, shutdown_force_triggers: HashMap, shutdown_complete_tripwires: HashMap, } @@ -136,13 +138,14 @@ impl SourceShutdownCoordinator { pub fn register_source( &mut self, id: &ComponentKey, + internal: bool, ) -> (ShutdownSignal, impl Future) { let (shutdown_begun_trigger, shutdown_begun_tripwire) = Tripwire::new(); let (force_shutdown_trigger, force_shutdown_tripwire) = Tripwire::new(); let (shutdown_complete_trigger, shutdown_complete_tripwire) = Tripwire::new(); self.shutdown_begun_triggers - .insert(id.clone(), shutdown_begun_trigger); + .insert(id.clone(), (internal, shutdown_begun_trigger)); self.shutdown_force_triggers .insert(id.clone(), force_shutdown_trigger); self.shutdown_complete_tripwires @@ -215,14 +218,15 @@ impl SourceShutdownCoordinator { /// /// Panics if this coordinator has had its triggers removed (ie /// has been taken over with `Self::takeover_source`). - pub fn shutdown_all(self, deadline: Instant) -> impl Future { - let mut complete_futures = Vec::new(); + pub fn shutdown_all(self, deadline: Option) -> impl Future { + let mut internal_sources_complete_futures = Vec::new(); + let mut external_sources_complete_futures = Vec::new(); let shutdown_begun_triggers = self.shutdown_begun_triggers; let mut shutdown_complete_tripwires = self.shutdown_complete_tripwires; let mut shutdown_force_triggers = self.shutdown_force_triggers; - for (id, trigger) in shutdown_begun_triggers { + for (id, (internal, trigger)) in shutdown_begun_triggers { trigger.cancel(); let shutdown_complete_tripwire = @@ -244,10 +248,16 @@ impl SourceShutdownCoordinator { deadline, ); - complete_futures.push(source_complete); + if internal { + internal_sources_complete_futures.push(source_complete); + } else { + external_sources_complete_futures.push(source_complete); + } } - futures::future::join_all(complete_futures).map(|_| ()) + futures::future::join_all(external_sources_complete_futures) + .then(|_| futures::future::join_all(internal_sources_complete_futures)) + .map(|_| ()) } /// Sends the signal to the given source to begin shutting down. Returns a future that resolves @@ -265,11 +275,12 @@ impl SourceShutdownCoordinator { id: &ComponentKey, deadline: Instant, ) -> impl Future { - let begin_shutdown_trigger = self.shutdown_begun_triggers.remove(id).unwrap_or_else(|| { - panic!( + let (_, begin_shutdown_trigger) = + self.shutdown_begun_triggers.remove(id).unwrap_or_else(|| { + panic!( "shutdown_begun_trigger for source \"{id}\" not found in the ShutdownCoordinator" ) - }); + }); // This is what actually triggers the source to begin shutting down. begin_shutdown_trigger.cancel(); @@ -290,7 +301,7 @@ impl SourceShutdownCoordinator { shutdown_complete_tripwire, shutdown_force_trigger, id.clone(), - deadline, + Some(deadline), ) } @@ -310,23 +321,27 @@ impl SourceShutdownCoordinator { shutdown_complete_tripwire: Tripwire, shutdown_force_trigger: Trigger, id: ComponentKey, - deadline: Instant, + deadline: Option, ) -> impl Future { async move { - // Call `shutdown_force_trigger.disable()` on drop. - let shutdown_force_trigger = DisabledTrigger::new(shutdown_force_trigger); - let fut = shutdown_complete_tripwire.then(tripwire_handler); - if timeout_at(deadline, fut).await.is_ok() { - shutdown_force_trigger.into_inner().disable(); - true + if let Some(deadline) = deadline { + // Call `shutdown_force_trigger.disable()` on drop. + let shutdown_force_trigger = DisabledTrigger::new(shutdown_force_trigger); + if timeout_at(deadline, fut).await.is_ok() { + shutdown_force_trigger.into_inner().disable(); + true + } else { + error!( + "Source '{}' failed to shutdown before deadline. Forcing shutdown.", + id, + ); + shutdown_force_trigger.into_inner().cancel(); + false + } } else { - error!( - "Source '{}' failed to shutdown before deadline. Forcing shutdown.", - id, - ); - shutdown_force_trigger.into_inner().cancel(); - false + fut.await; + true } } .boxed() @@ -345,7 +360,7 @@ mod test { let mut shutdown = SourceShutdownCoordinator::default(); let id = ComponentKey::from("test"); - let (shutdown_signal, _) = shutdown.register_source(&id); + let (shutdown_signal, _) = shutdown.register_source(&id, false); let deadline = Instant::now() + Duration::from_secs(1); let shutdown_complete = shutdown.shutdown_source(&id, deadline); @@ -361,7 +376,7 @@ mod test { let mut shutdown = SourceShutdownCoordinator::default(); let id = ComponentKey::from("test"); - let (_shutdown_signal, force_shutdown_tripwire) = shutdown.register_source(&id); + let (_shutdown_signal, force_shutdown_tripwire) = shutdown.register_source(&id, false); let deadline = Instant::now() + Duration::from_secs(1); let shutdown_complete = shutdown.shutdown_source(&id, deadline); diff --git a/lib/vector-config-common/Cargo.toml b/lib/vector-config-common/Cargo.toml index 9e29eb538f98a..0d4ddaed219a0 100644 --- a/lib/vector-config-common/Cargo.toml +++ b/lib/vector-config-common/Cargo.toml @@ -7,7 +7,6 @@ license = "MPL-2.0" [dependencies] convert_case = { version = "0.6", default-features = false } darling = { version = "0.13", default-features = false, features = ["suggestions"] } -indexmap = { version = "1.9", default-features = false, features = ["serde"] } once_cell = { version = "1", default-features = false, features = ["std"] } proc-macro2 = { version = "1.0", default-features = false } serde = { version = "1.0", default-features = false, features = ["derive"] } diff --git a/lib/vector-config-common/src/human_friendly.rs b/lib/vector-config-common/src/human_friendly.rs index 29c9a290efb93..fab77985ecf69 100644 --- a/lib/vector-config-common/src/human_friendly.rs +++ b/lib/vector-config-common/src/human_friendly.rs @@ -48,7 +48,7 @@ static WELL_KNOWN_ACRONYMS: Lazy> = Lazy::new(|| { "api", "amqp", "aws", "ec2", "ecs", "gcp", "hec", "http", "https", "nats", "nginx", "s3", "sqs", "tls", "ssl", "otel", "gelf", "csv", "json", "rfc3339", "lz4", "us", "eu", "bsd", "vrl", "tcp", "udp", "id", "uuid", "kms", "uri", "url", "acp", "uid", "ip", "pid", - "ndjson", "ewma", "rtt", "cpu", "acl", + "ndjson", "ewma", "rtt", "cpu", "acl", "imds", "acl", "alpn", "sasl", ]; acronyms.iter().map(|s| s.to_lowercase()).collect() diff --git a/lib/vector-config-common/src/schema/mod.rs b/lib/vector-config-common/src/schema/mod.rs index 2f4c6876de35e..763cdd217e8fc 100644 --- a/lib/vector-config-common/src/schema/mod.rs +++ b/lib/vector-config-common/src/schema/mod.rs @@ -8,7 +8,9 @@ pub mod visit; pub(crate) const DEFINITIONS_PREFIX: &str = "#/definitions/"; -pub type Map = indexmap::IndexMap; +// We have chosen the `BTree*` types here instead of hash tables to provide for a consistent +// ordering of the output elements between runs and changes to the configuration. +pub type Map = std::collections::BTreeMap; pub type Set = std::collections::BTreeSet; pub use self::gen::{SchemaGenerator, SchemaSettings}; diff --git a/lib/vector-config-macros/Cargo.toml b/lib/vector-config-macros/Cargo.toml index 64d8c48d3bc70..2a133adaa4fcd 100644 --- a/lib/vector-config-macros/Cargo.toml +++ b/lib/vector-config-macros/Cargo.toml @@ -16,5 +16,5 @@ syn = { version = "1.0", default-features = false, features = ["full", "extra-tr vector-config-common = { path = "../vector-config-common" } [dev-dependencies] -serde = { version = "1.0.163", default-features = false } +serde = { version = "1.0.164", default-features = false } vector-config = { path = "../vector-config" } diff --git a/lib/vector-config-macros/src/lib.rs b/lib/vector-config-macros/src/lib.rs index e85fd721cf30a..de4730dec22d5 100644 --- a/lib/vector-config-macros/src/lib.rs +++ b/lib/vector-config-macros/src/lib.rs @@ -1,5 +1,4 @@ #![deny(warnings)] -#![allow(clippy::needless_pub_self)] use proc_macro::TokenStream; diff --git a/lib/vector-config/Cargo.toml b/lib/vector-config/Cargo.toml index 542c02480a035..e1acd6593233c 100644 --- a/lib/vector-config/Cargo.toml +++ b/lib/vector-config/Cargo.toml @@ -12,9 +12,9 @@ path = "tests/integration/lib.rs" [dependencies] chrono = { version = "0.4.19", default-features = false } -chrono-tz = { version = "0.8.2", default-features = false } +chrono-tz = { version = "0.8.3", default-features = false } encoding_rs = { version = "0.8", default-features = false, features = ["alloc", "serde"] } -indexmap = { version = "1.9", default-features = false } +indexmap = { version = "2.0", default-features = false, features = ["std"] } inventory = { version = "0.3" } no-proxy = { version = "0.3.1", default-features = false, features = ["serialize"] } num-traits = { version = "0.2.15", default-features = false } @@ -23,10 +23,10 @@ serde = { version = "1.0", default-features = false } serde_json = { version = "1.0", default-features = false, features = ["std"] } serde_with = { version = "2.3.2", default-features = false, features = ["std"] } snafu = { version = "0.7.4", default-features = false } -toml = { version = "0.7.3", default-features = false } +toml = { version = "0.7.5", default-features = false } tracing = { version = "0.1.34", default-features = false } -url = { version = "2.3.1", default-features = false, features = ["serde"] } -vrl = { git = "ssh://git@github.com/answerbook/vrl.git", rev = "v0.6.0", default-features = false, features = ["compiler"] } +url = { version = "2.4.0", default-features = false, features = ["serde"] } +vrl.workspace = true vector-config-common = { path = "../vector-config-common" } vector-config-macros = { path = "../vector-config-macros" } diff --git a/lib/vector-config/src/lib.rs b/lib/vector-config/src/lib.rs index 11b2e52dd97f6..38904b60c6832 100644 --- a/lib/vector-config/src/lib.rs +++ b/lib/vector-config/src/lib.rs @@ -102,12 +102,7 @@ // title/description from the field instead, which could be useful when using newtype wrappers // around existing/remote types for the purpose of making them `Configurable`. #![deny(warnings)] -#![allow( - clippy::default_constructed_unit_structs, - clippy::explicit_iter_loop, - clippy::needless_pub_self, - clippy::useless_conversion -)] + // Re-export of the various public dependencies required by the generated code to simplify the import requirements for // crates actually using the macros/derives. pub mod indexmap { diff --git a/lib/vector-config/src/schema/visitors/inline_single.rs b/lib/vector-config/src/schema/visitors/inline_single.rs index 68347e36937dd..20035118b4470 100644 --- a/lib/vector-config/src/schema/visitors/inline_single.rs +++ b/lib/vector-config/src/schema/visitors/inline_single.rs @@ -43,17 +43,11 @@ impl Visitor for InlineSingleUseReferencesVisitor { occurrence_visitor.visit_root_schema(root); let occurrence_map = occurrence_visitor.into_occurrences(); - let eligible_to_inline = occurrence_map + self.eligible_to_inline = occurrence_map .into_iter() // Filter out any schemas which have more than one occurrence, as naturally, we're // trying to inline single-use schema references. :) - .filter_map(|(def_name, occurrences)| { - if occurrences == 1 { - Some(def_name) - } else { - None - } - }) + .filter_map(|(def_name, occurrences)| (occurrences == 1).then_some(def_name)) // However, we'll also filter out some specific schema definitions which are only // referenced once, specifically: component base types and component types themselves. // @@ -72,8 +66,6 @@ impl Visitor for InlineSingleUseReferencesVisitor { .map(|s| s.as_ref().to_string()) .collect::>(); - self.eligible_to_inline = eligible_to_inline; - // Now run our own visitor logic, which will use the inline eligibility to determine if a // schema reference in a being-visited schema should be replaced inline with the original // referenced schema, in turn removing the schema definition. diff --git a/lib/vector-config/src/schema/visitors/merge.rs b/lib/vector-config/src/schema/visitors/merge.rs index 741b74157e137..a05478caef168 100644 --- a/lib/vector-config/src/schema/visitors/merge.rs +++ b/lib/vector-config/src/schema/visitors/merge.rs @@ -95,7 +95,7 @@ impl Mergeable for serde_json::Map { impl Mergeable for Map where - K: std::hash::Hash + Eq + Clone, + K: Clone + Eq + Ord, V: Clone + Mergeable, { fn merge(&mut self, other: &Self) { @@ -261,7 +261,7 @@ where fn merge_map(destination: &mut Map, source: &Map) where - K: std::hash::Hash + Eq + Clone, + K: Clone + Eq + Ord, V: Clone + Mergeable, { destination.merge(source); diff --git a/lib/vector-config/src/schema/visitors/unevaluated.rs b/lib/vector-config/src/schema/visitors/unevaluated.rs index 1619465cde24a..105c3dbfcaac4 100644 --- a/lib/vector-config/src/schema/visitors/unevaluated.rs +++ b/lib/vector-config/src/schema/visitors/unevaluated.rs @@ -361,9 +361,9 @@ fn is_markable_schema(definitions: &Map, schema: &SchemaObject) .as_ref() .and_then(|reference| { let reference = get_cleaned_schema_reference(reference); - definitions.get_full(reference) + definitions.get_key_value(reference) }) - .and_then(|(_, name, schema)| schema.as_object().map(|schema| (name, schema))) + .and_then(|(name, schema)| schema.as_object().map(|schema| (name, schema))) .map_or(false, |(name, schema)| { debug!( "Following schema reference '{}' for subschema markability.", diff --git a/lib/vector-core/Cargo.toml b/lib/vector-core/Cargo.toml index 8d4a49cd7c892..44e04b859fb92 100644 --- a/lib/vector-core/Cargo.toml +++ b/lib/vector-core/Cargo.toml @@ -6,12 +6,12 @@ edition = "2021" publish = false [dependencies] -async-graphql = { version = "5.0.8", default-features = false, features = ["playground" ], optional = true } +async-graphql = { version = "5.0.10", default-features = false, features = ["playground" ], optional = true } async-trait = { version = "0.1", default-features = false } bitmask-enum = { version = "2.1.0", default-features = false } bytes = { version = "1.4.0", default-features = false, features = ["serde"] } chrono = { version = "0.4.19", default-features = false, features = ["serde"] } -crossbeam-utils = { version = "0.8.15", default-features = false } +crossbeam-utils = { version = "0.8.16", default-features = false } db-key = { version = "0.0.5", default-features = false, optional = true } deadpool-postgres = { version = "0.10.5"} dyn-clone = { version = "1.0.11", default-features = false } @@ -23,37 +23,37 @@ futures-util = { version = "0.3.28", default-features = false, features = ["std" headers = { version = "0.3.8", default-features = false } http = { version = "0.2.9", default-features = false } hyper-proxy = { version = "0.9.1", default-features = false, features = ["openssl-tls"] } -indexmap = { version = "~1.9.3", default-features = false, features = ["serde"] } +indexmap = { version = "~2.0.0", default-features = false, features = ["serde", "std"] } lookup = { package = "vector-lookup", path = "../vector-lookup" } metrics = "0.21.0" metrics-tracing-context = { version = "0.14.0", default-features = false } metrics-util = { version = "0.15.0", default-features = false, features = ["registry"] } mlua = { version = "0.8.9", default-features = false, features = ["lua54", "send", "vendored"], optional = true } no-proxy = { version = "0.3.2", default-features = false, features = ["serialize"] } -once_cell = { version = "1.17", default-features = false } +once_cell = { version = "1.18", default-features = false } ordered-float = { version = "3.7.0", default-features = false } -openssl = { version = "0.10.52", default-features = false, features = ["vendored"] } +openssl = { version = "0.10.55", default-features = false, features = ["vendored"] } parking_lot = { version = "0.12.1", default-features = false } -pin-project = { version = "1.1.0", default-features = false } -proptest = { version = "1.1", optional = true } +pin-project = { version = "1.1.1", default-features = false } +proptest = { version = "1.2", optional = true } prost-types = { version = "0.11", default-features = false } prost = { version = "0.11", default-features = false, features = ["std"] } -quanta = { version = "0.11.0", default-features = false } -regex = { version = "1.8.1", default-features = false, features = ["std", "perf"] } reqwest = { version = "0.11", features = ["json"] } +quanta = { version = "0.11.1", default-features = false } +regex = { version = "1.8.4", default-features = false, features = ["std", "perf"] } ryu = { version = "1", default-features = false } -serde = { version = "1.0.163", default-features = false, features = ["derive", "rc"] } -serde_json = { version = "1.0.96", default-features = false } +serde = { version = "1.0.164", default-features = false, features = ["derive", "rc"] } +serde_json = { version = "1.0.99", default-features = false } serde_with = { version = "2.3.2", default-features = false, features = ["std", "macros"] } smallvec = { version = "1", default-features = false, features = ["serde", "const_generics"] } snafu = { version = "0.7.4", default-features = false } socket2 = { version = "0.5.3", default-features = false } -tokio = { version = "1.28.1", default-features = false, features = ["net"] } +tokio = { version = "1.29.0", default-features = false, features = ["net"] } tokio-openssl = { version = "0.6.3", default-features = false } tokio-stream = { version = "0.1", default-features = false, features = ["time"], optional = true } tokio-util = { version = "0.7.0", default-features = false, features = ["time"] } tokio-postgres = { version = "0.7.7", default-features = false, features = ["runtime", "with-chrono-0_4", "with-uuid-1", "with-serde_json-1"] } -toml = { version = "0.7.3", default-features = false } +toml = { version = "0.7.5", default-features = false } tonic = { version = "0.9", default-features = false, features = ["transport"] } tower = { version = "0.4", default-features = false, features = ["util"] } tracing = { version = "0.1.34", default-features = false } @@ -70,11 +70,11 @@ vector-common = { path = "../vector-common" } vector-config = { path = "../vector-config" } vector-config-common = { path = "../vector-config-common" } vector-config-macros = { path = "../vector-config-macros" } -vrl = { git = "ssh://git@github.com/answerbook/vrl.git", rev = "v0.6.0" } opentelemetry-rs = { version = "1", branch = "main" , git = "ssh://git@github.com/answerbook/opentelemetry-rs.git" } +vrl.workspace = true [target.'cfg(target_os = "macos")'.dependencies] -security-framework = "2.9.0" +security-framework = "2.9.1" [target.'cfg(windows)'.dependencies] schannel = "0.1.21" @@ -83,17 +83,17 @@ schannel = "0.1.21" prost-build = "0.11" [dev-dependencies] -base64 = "0.21.0" -chrono-tz = { version = "0.8.2", default-features = false } -criterion = { version = "0.4.0", features = ["html_reports"] } +base64 = "0.21.2" +chrono-tz = { version = "0.8.3", default-features = false } +criterion = { version = "0.5.1", features = ["html_reports"] } env-test-util = "1.0.1" httptest = "0.15.4" quickcheck = "1" quickcheck_macros = "1" -proptest = "1.1" +proptest = "1.2" similar-asserts = "1.4.2" tokio-test = "0.4.2" -toml = { version = "0.7.3", default-features = false, features = ["parse"] } +toml = { version = "0.7.5", default-features = false, features = ["parse"] } ndarray = "0.15.6" ndarray-stats = "0.5.1" noisy_float = "0.2.0" @@ -101,7 +101,7 @@ rand = "0.8.5" rand_distr = "0.4.3" tracing-subscriber = { version = "0.3.17", default-features = false, features = ["env-filter", "fmt", "ansi", "registry"] } vector-common = { path = "../vector-common", default-features = false, features = ["test"] } -vrl = { git = "ssh://git@github.com/answerbook/vrl.git", rev = "v0.6.0", default-features = false, features = ["value", "arbitrary", "lua"] } +vrl.workspace = true [features] api = ["dep:async-graphql"] diff --git a/lib/vector-core/src/config/global_options.rs b/lib/vector-core/src/config/global_options.rs index 3e63d863f5ab0..af86b177095d5 100644 --- a/lib/vector-core/src/config/global_options.rs +++ b/lib/vector-core/src/config/global_options.rs @@ -5,6 +5,7 @@ use vector_common::TimeZone; use vector_config::configurable_component; use super::super::default_data_dir; +use super::Telemetry; use super::{proxy::ProxyConfig, AcknowledgementsConfig, LogSchema}; use crate::serde::bool_or_struct; @@ -55,6 +56,16 @@ pub struct GlobalOptions { )] pub log_schema: LogSchema, + /// Telemetry options. + /// + /// Determines whether `source` and `service` tags should be emitted with the + /// `component_sent_*` and `component_received_*` events. + #[serde( + default, + skip_serializing_if = "crate::serde::skip_serializing_if_default" + )] + pub telemetry: Telemetry, + /// The name of the time zone to apply to timestamp conversions that do not contain an explicit time zone. /// /// The time zone name may be any name in the [TZ database][tzdb] or `local` to indicate system @@ -218,10 +229,14 @@ impl GlobalOptions { errors.extend(merge_errors); } + let mut telemetry = self.telemetry.clone(); + telemetry.merge(&with.telemetry); + if errors.is_empty() { Ok(Self { data_dir, log_schema, + telemetry, acknowledgements: self.acknowledgements.merge_default(&with.acknowledgements), timezone: self.timezone.or(with.timezone), proxy: self.proxy.merge(&with.proxy), diff --git a/lib/vector-core/src/config/mod.rs b/lib/vector-core/src/config/mod.rs index 6ef8fc1cb0f5f..71786155d1d8f 100644 --- a/lib/vector-core/src/config/mod.rs +++ b/lib/vector-core/src/config/mod.rs @@ -1,3 +1,4 @@ +use std::sync::Arc; use std::{collections::HashMap, fmt, num::NonZeroUsize}; use bitmask_enum::bitmask; @@ -8,6 +9,7 @@ mod global_options; mod log_schema; pub mod output_id; pub mod proxy; +mod telemetry; use crate::event::LogEvent; pub use global_options::GlobalOptions; @@ -15,6 +17,7 @@ pub use log_schema::{init_log_schema, log_schema, LogSchema}; use lookup::{lookup_v2::ValuePath, path, PathPrefix}; pub use output_id::OutputId; use serde::{Deserialize, Serialize}; +pub use telemetry::{init_telemetry, telemetry, Tags, Telemetry}; pub use vector_common::config::ComponentKey; use vector_config::configurable_component; use vrl::value::Value; @@ -109,7 +112,7 @@ pub struct SourceOutput { // NOTE: schema definitions are only implemented/supported for log-type events. There is no // inherent blocker to support other types as well, but it'll require additional work to add // the relevant schemas, and store them separately in this type. - pub schema_definition: Option, + pub schema_definition: Option>, } impl SourceOutput { @@ -127,7 +130,7 @@ impl SourceOutput { Self { port: None, ty, - schema_definition: Some(schema_definition), + schema_definition: Some(Arc::new(schema_definition)), } } @@ -166,17 +169,15 @@ impl SourceOutput { /// Schema enabled is set in the users configuration. #[must_use] pub fn schema_definition(&self, schema_enabled: bool) -> Option { + use std::ops::Deref; + self.schema_definition.as_ref().map(|definition| { if schema_enabled { - definition.clone() + definition.deref().clone() } else { let mut new_definition = schema::Definition::default_for_namespace(definition.log_namespaces()); - - if definition.log_namespaces().contains(&LogNamespace::Vector) { - new_definition.add_meanings(definition.meanings()); - } - + new_definition.add_meanings(definition.meanings()); new_definition } }) @@ -201,7 +202,7 @@ pub struct TransformOutput { /// enabled, at least one definition should be output. If the transform /// has multiple connected sources, it is possible to have multiple output /// definitions - one for each input. - log_schema_definitions: HashMap, + pub log_schema_definitions: HashMap, } impl TransformOutput { @@ -243,11 +244,7 @@ impl TransformOutput { .map(|(output, definition)| { let mut new_definition = schema::Definition::default_for_namespace(definition.log_namespaces()); - - if definition.log_namespaces().contains(&LogNamespace::Vector) { - new_definition.add_meanings(definition.meanings()); - } - + new_definition.add_meanings(definition.meanings()); (output.clone(), new_definition) }) .collect() @@ -549,10 +546,12 @@ impl LogNamespace { #[cfg(test)] mod test { - use crate::config::{init_log_schema, LogNamespace, LogSchema}; + use super::*; use crate::event::LogEvent; use chrono::Utc; - use lookup::event_path; + use lookup::{event_path, owned_value_path, OwnedTargetPath}; + use vector_common::btreemap; + use vrl::value::Kind; #[test] fn test_insert_standard_vector_source_metadata() { @@ -568,4 +567,110 @@ mod test { assert!(event.get(event_path!("a", "b", "c", "d")).is_some()); } + + #[test] + fn test_source_definitions_legacy() { + let definition = schema::Definition::empty_legacy_namespace() + .with_event_field(&owned_value_path!("zork"), Kind::bytes(), Some("zork")) + .with_event_field(&owned_value_path!("nork"), Kind::integer(), None); + let output = SourceOutput::new_logs(DataType::Log, definition); + + let valid_event = LogEvent::from(Value::from(btreemap! { + "zork" => "norknoog", + "nork" => 32 + })) + .into(); + + let invalid_event = LogEvent::from(Value::from(btreemap! { + "nork" => 32 + })) + .into(); + + // Get a definition with schema enabled. + let new_definition = output.schema_definition(true).unwrap(); + + // Meanings should still exist. + assert_eq!( + Some(&OwnedTargetPath::event(owned_value_path!("zork"))), + new_definition.meaning_path("zork") + ); + + // Events should have the schema validated. + new_definition.assert_valid_for_event(&valid_event); + new_definition.assert_invalid_for_event(&invalid_event); + + // There should be the default legacy definition without schemas enabled. + assert_eq!( + Some( + schema::Definition::default_legacy_namespace() + .with_meaning(OwnedTargetPath::event(owned_value_path!("zork")), "zork") + ), + output.schema_definition(false) + ); + } + + #[test] + fn test_source_definitons_vector() { + let definition = schema::Definition::default_for_namespace(&[LogNamespace::Vector].into()) + .with_metadata_field( + &owned_value_path!("vector", "zork"), + Kind::integer(), + Some("zork"), + ) + .with_event_field(&owned_value_path!("nork"), Kind::integer(), None); + + let output = SourceOutput::new_logs(DataType::Log, definition); + + let mut valid_event = LogEvent::from(Value::from(btreemap! { + "nork" => 32 + })); + + valid_event + .metadata_mut() + .value_mut() + .insert(path!("vector").concat("zork"), 32); + + let valid_event = valid_event.into(); + + let mut invalid_event = LogEvent::from(Value::from(btreemap! { + "nork" => 32 + })); + + invalid_event + .metadata_mut() + .value_mut() + .insert(path!("vector").concat("zork"), "noog"); + + let invalid_event = invalid_event.into(); + + // Get a definition with schema enabled. + let new_definition = output.schema_definition(true).unwrap(); + + // Meanings should still exist. + assert_eq!( + Some(&OwnedTargetPath::metadata(owned_value_path!( + "vector", "zork" + ))), + new_definition.meaning_path("zork") + ); + + // Events should have the schema validated. + new_definition.assert_valid_for_event(&valid_event); + new_definition.assert_invalid_for_event(&invalid_event); + + // Get a definition without schema enabled. + let new_definition = output.schema_definition(false).unwrap(); + + // Meanings should still exist. + assert_eq!( + Some(&OwnedTargetPath::metadata(owned_value_path!( + "vector", "zork" + ))), + new_definition.meaning_path("zork") + ); + + // Events should not have the schema validated. + new_definition.assert_valid_for_event(&valid_event); + new_definition.assert_valid_for_event(&invalid_event); + } } diff --git a/lib/vector-core/src/config/proxy.rs b/lib/vector-core/src/config/proxy.rs index 266ffed72d167..afc4d58a56f21 100644 --- a/lib/vector-core/src/config/proxy.rs +++ b/lib/vector-core/src/config/proxy.rs @@ -41,7 +41,7 @@ impl NoProxyInterceptor { /// /// Configure to proxy traffic through an HTTP(S) proxy when making external requests. /// -/// Similar to common proxy configuration convention, users can set different proxies +/// Similar to common proxy configuration convention, you can set different proxies /// to use based on the type of traffic being proxied, as well as set specific hosts that /// should not be proxied. #[configurable_component] @@ -201,7 +201,12 @@ impl ProxyConfig { mod tests { use base64::prelude::{Engine as _, BASE64_STANDARD}; use env_test_util::TempEnvVar; - use http::{HeaderValue, Uri}; + use http::{ + header::{AUTHORIZATION, PROXY_AUTHORIZATION}, + HeaderName, HeaderValue, Uri, + }; + + const PROXY_HEADERS: [HeaderName; 2] = [AUTHORIZATION, PROXY_AUTHORIZATION]; use super::*; @@ -341,20 +346,18 @@ mod tests { Some(first.uri()), Uri::try_from("http://user:pass@1.2.3.4:5678").as_ref().ok() ); - assert_eq!( - first.headers().get("authorization"), - expected_header_value.as_ref().ok() - ); + for h in &PROXY_HEADERS { + assert_eq!(first.headers().get(h), expected_header_value.as_ref().ok()); + } assert_eq!( Some(second.uri()), Uri::try_from("https://user:pass@2.3.4.5:9876") .as_ref() .ok() ); - assert_eq!( - second.headers().get("authorization"), - expected_header_value.as_ref().ok() - ); + for h in &PROXY_HEADERS { + assert_eq!(second.headers().get(h), expected_header_value.as_ref().ok()); + } } #[ignore] @@ -371,10 +374,8 @@ mod tests { .expect("should not be None"); let encoded_header = format!("Basic {}", BASE64_STANDARD.encode("user:P@ssw0rd")); let expected_header_value = HeaderValue::from_str(encoded_header.as_str()); - - assert_eq!( - first.headers().get("authorization"), - expected_header_value.as_ref().ok() - ); + for h in &PROXY_HEADERS { + assert_eq!(first.headers().get(h), expected_header_value.as_ref().ok()); + } } } diff --git a/lib/vector-core/src/config/telemetry.rs b/lib/vector-core/src/config/telemetry.rs new file mode 100644 index 0000000000000..71348c509ef94 --- /dev/null +++ b/lib/vector-core/src/config/telemetry.rs @@ -0,0 +1,93 @@ +use once_cell::sync::{Lazy, OnceCell}; +use vector_common::request_metadata::GroupedCountByteSize; +use vector_config::configurable_component; + +static TELEMETRY: OnceCell = OnceCell::new(); +static TELEMETRY_DEFAULT: Lazy = Lazy::new(Telemetry::default); + +/// Loads the telemetry options from configurations and sets the global options. +/// Once this is done, configurations can be correctly loaded using configured +/// log schema defaults. +/// +/// # Errors +/// +/// This function will fail if the `builder` fails. +/// +/// # Panics +/// +/// If deny is set, will panic if telemetry has already been set. +pub fn init_telemetry(telemetry: Telemetry, deny_if_set: bool) { + assert!( + !(TELEMETRY.set(telemetry).is_err() && deny_if_set), + "Couldn't set telemetry" + ); +} + +/// Returns the telemetry configuration options. +pub fn telemetry() -> &'static Telemetry { + TELEMETRY.get().unwrap_or(&TELEMETRY_DEFAULT) +} + +/// Sets options for the telemetry that Vector emits. +#[configurable_component] +#[derive(Clone, Debug, Eq, PartialEq, Default)] +#[serde(default)] +pub struct Telemetry { + #[configurable(derived)] + pub tags: Tags, +} + +impl Telemetry { + /// Merge two `Telemetry` instances together. + pub fn merge(&mut self, other: &Telemetry) { + self.tags.emit_service = self.tags.emit_service || other.tags.emit_service; + self.tags.emit_source = self.tags.emit_source || other.tags.emit_source; + } + + /// Returns true if any of the tag options are true. + pub fn has_tags(&self) -> bool { + self.tags.emit_service || self.tags.emit_source + } + + pub fn tags(&self) -> &Tags { + &self.tags + } + + /// The variant of `GroupedCountByteSize` + pub fn create_request_count_byte_size(&self) -> GroupedCountByteSize { + if self.has_tags() { + GroupedCountByteSize::new_tagged() + } else { + GroupedCountByteSize::new_untagged() + } + } +} + +/// Configures whether to emit certain tags +#[configurable_component] +#[derive(Clone, Debug, Eq, PartialEq, Default)] +#[serde(default)] +pub struct Tags { + /// True if the `service` tag should be emitted + /// in the `component_received_*` and `component_sent_*` + /// telemetry. + pub emit_service: bool, + + /// True if the `source` tag should be emitted + /// in the `component_received_*` and `component_sent_*` + /// telemetry. + pub emit_source: bool, +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn partial_telemetry() { + let toml = r#" + emit_source = true + "#; + toml::from_str::(toml).unwrap(); + } +} diff --git a/lib/vector-core/src/event/array.rs b/lib/vector-core/src/event/array.rs index bc30573147d45..9cdafbcf569d4 100644 --- a/lib/vector-core/src/event/array.rs +++ b/lib/vector-core/src/event/array.rs @@ -8,13 +8,17 @@ use futures::{stream, Stream}; #[cfg(test)] use quickcheck::{Arbitrary, Gen}; use vector_buffers::EventCount; -use vector_common::finalization::{AddBatchNotifier, BatchNotifier, EventFinalizers, Finalizable}; +use vector_common::{ + config::ComponentKey, + finalization::{AddBatchNotifier, BatchNotifier, EventFinalizers, Finalizable}, + json_size::JsonSize, +}; use super::{ EstimatedJsonEncodedSizeOf, Event, EventDataEq, EventFinalizer, EventMutRef, EventRef, LogEvent, Metric, TraceEvent, }; -use crate::{config::OutputId, ByteSizeOf}; +use crate::ByteSizeOf; /// The type alias for an array of `LogEvent` elements. pub type LogArray = Vec; @@ -139,7 +143,7 @@ pub enum EventArray { impl EventArray { /// Sets the `OutputId` in the metadata for all the events in this array. - pub fn set_output_id(&mut self, output_id: &Arc) { + pub fn set_output_id(&mut self, output_id: &Arc) { match self { EventArray::Logs(logs) => { for log in logs { @@ -253,7 +257,7 @@ impl ByteSizeOf for EventArray { } impl EstimatedJsonEncodedSizeOf for EventArray { - fn estimated_json_encoded_size_of(&self) -> usize { + fn estimated_json_encoded_size_of(&self) -> JsonSize { match self { Self::Logs(v) => v.estimated_json_encoded_size_of(), Self::Traces(v) => v.estimated_json_encoded_size_of(), diff --git a/lib/vector-core/src/event/estimated_json_encoded_size_of.rs b/lib/vector-core/src/event/estimated_json_encoded_size_of.rs index 8ee523d13f5cd..b671c8a817919 100644 --- a/lib/vector-core/src/event/estimated_json_encoded_size_of.rs +++ b/lib/vector-core/src/event/estimated_json_encoded_size_of.rs @@ -4,11 +4,12 @@ use bytes::Bytes; use chrono::{DateTime, Timelike, Utc}; use ordered_float::NotNan; use smallvec::SmallVec; +use vector_common::json_size::JsonSize; use vrl::value::Value; -const NULL_SIZE: usize = 4; -const TRUE_SIZE: usize = 4; -const FALSE_SIZE: usize = 5; +const NULL_SIZE: JsonSize = JsonSize::new(4); +const TRUE_SIZE: JsonSize = JsonSize::new(4); +const FALSE_SIZE: JsonSize = JsonSize::new(5); const BRACKETS_SIZE: usize = 2; const BRACES_SIZE: usize = 2; @@ -40,17 +41,17 @@ const EPOCH_RFC3339_9: &str = "1970-01-01T00:00:00.000000000Z"; /// /// Ideally, no allocations should take place in any implementation of this function. pub trait EstimatedJsonEncodedSizeOf { - fn estimated_json_encoded_size_of(&self) -> usize; + fn estimated_json_encoded_size_of(&self) -> JsonSize; } impl EstimatedJsonEncodedSizeOf for &T { - fn estimated_json_encoded_size_of(&self) -> usize { + fn estimated_json_encoded_size_of(&self) -> JsonSize { T::estimated_json_encoded_size_of(self) } } impl EstimatedJsonEncodedSizeOf for Option { - fn estimated_json_encoded_size_of(&self) -> usize { + fn estimated_json_encoded_size_of(&self) -> JsonSize { match self { Some(v) => v.estimated_json_encoded_size_of(), None => NULL_SIZE, @@ -61,13 +62,13 @@ impl EstimatedJsonEncodedSizeOf for Option { impl EstimatedJsonEncodedSizeOf for SmallVec<[T; N]> { - fn estimated_json_encoded_size_of(&self) -> usize { + fn estimated_json_encoded_size_of(&self) -> JsonSize { self.iter().map(T::estimated_json_encoded_size_of).sum() } } impl EstimatedJsonEncodedSizeOf for Value { - fn estimated_json_encoded_size_of(&self) -> usize { + fn estimated_json_encoded_size_of(&self) -> JsonSize { match self { Value::Timestamp(v) => v.estimated_json_encoded_size_of(), Value::Object(v) => v.estimated_json_encoded_size_of(), @@ -88,25 +89,25 @@ impl EstimatedJsonEncodedSizeOf for Value { /// This is the main reason why `EstimatedJsonEncodedSizeOf` is named as is, as most other types can /// be calculated exactly without a noticable performance penalty. impl EstimatedJsonEncodedSizeOf for str { - fn estimated_json_encoded_size_of(&self) -> usize { - QUOTES_SIZE + self.len() + fn estimated_json_encoded_size_of(&self) -> JsonSize { + JsonSize::new(QUOTES_SIZE + self.len()) } } impl EstimatedJsonEncodedSizeOf for String { - fn estimated_json_encoded_size_of(&self) -> usize { + fn estimated_json_encoded_size_of(&self) -> JsonSize { self.as_str().estimated_json_encoded_size_of() } } impl EstimatedJsonEncodedSizeOf for Bytes { - fn estimated_json_encoded_size_of(&self) -> usize { - QUOTES_SIZE + self.len() + fn estimated_json_encoded_size_of(&self) -> JsonSize { + JsonSize::new(QUOTES_SIZE + self.len()) } } impl EstimatedJsonEncodedSizeOf for bool { - fn estimated_json_encoded_size_of(&self) -> usize { + fn estimated_json_encoded_size_of(&self) -> JsonSize { if *self { TRUE_SIZE } else { @@ -116,19 +117,19 @@ impl EstimatedJsonEncodedSizeOf for bool { } impl EstimatedJsonEncodedSizeOf for f64 { - fn estimated_json_encoded_size_of(&self) -> usize { - ryu::Buffer::new().format_finite(*self).len() + fn estimated_json_encoded_size_of(&self) -> JsonSize { + ryu::Buffer::new().format_finite(*self).len().into() } } impl EstimatedJsonEncodedSizeOf for f32 { - fn estimated_json_encoded_size_of(&self) -> usize { - ryu::Buffer::new().format_finite(*self).len() + fn estimated_json_encoded_size_of(&self) -> JsonSize { + ryu::Buffer::new().format_finite(*self).len().into() } } impl EstimatedJsonEncodedSizeOf for NotNan { - fn estimated_json_encoded_size_of(&self) -> usize { + fn estimated_json_encoded_size_of(&self) -> JsonSize { self.into_inner().estimated_json_encoded_size_of() } } @@ -140,19 +141,19 @@ where K: AsRef, V: EstimatedJsonEncodedSizeOf, { - fn estimated_json_encoded_size_of(&self) -> usize { + fn estimated_json_encoded_size_of(&self) -> JsonSize { let size = self.iter().fold(BRACES_SIZE, |acc, (k, v)| { - acc + k.as_ref().estimated_json_encoded_size_of() + acc + k.as_ref().estimated_json_encoded_size_of().get() + COLON_SIZE - + v.estimated_json_encoded_size_of() + + v.estimated_json_encoded_size_of().get() + COMMA_SIZE }); - if size > BRACES_SIZE { + JsonSize::new(if size > BRACES_SIZE { size - COMMA_SIZE } else { size - } + }) } } @@ -164,19 +165,19 @@ where V: EstimatedJsonEncodedSizeOf, S: ::std::hash::BuildHasher, { - fn estimated_json_encoded_size_of(&self) -> usize { + fn estimated_json_encoded_size_of(&self) -> JsonSize { let size = self.iter().fold(BRACES_SIZE, |acc, (k, v)| { - acc + k.as_ref().estimated_json_encoded_size_of() + acc + k.as_ref().estimated_json_encoded_size_of().get() + COLON_SIZE - + v.estimated_json_encoded_size_of() + + v.estimated_json_encoded_size_of().get() + COMMA_SIZE }); - if size > BRACES_SIZE { + JsonSize::new(if size > BRACES_SIZE { size - COMMA_SIZE } else { size - } + }) } } @@ -184,16 +185,16 @@ impl EstimatedJsonEncodedSizeOf for Vec where V: EstimatedJsonEncodedSizeOf, { - fn estimated_json_encoded_size_of(&self) -> usize { + fn estimated_json_encoded_size_of(&self) -> JsonSize { let size = self.iter().fold(BRACKETS_SIZE, |acc, v| { - acc + COMMA_SIZE + v.estimated_json_encoded_size_of() + acc + COMMA_SIZE + v.estimated_json_encoded_size_of().get() }); - if size > BRACKETS_SIZE { + JsonSize::new(if size > BRACKETS_SIZE { size - COMMA_SIZE } else { size - } + }) } } @@ -205,7 +206,7 @@ impl EstimatedJsonEncodedSizeOf for DateTime { /// /// - `chrono::SecondsFormat::AutoSi` is used to calculate nanoseconds precision. /// - `use_z` is `true` for the `chrono::DateTime#to_rfc3339_opts` function call. - fn estimated_json_encoded_size_of(&self) -> usize { + fn estimated_json_encoded_size_of(&self) -> JsonSize { let ns = self.nanosecond() % 1_000_000_000; let epoch = if ns == 0 { EPOCH_RFC3339_0 @@ -217,202 +218,218 @@ impl EstimatedJsonEncodedSizeOf for DateTime { EPOCH_RFC3339_9 }; - QUOTES_SIZE + epoch.len() + JsonSize::new(QUOTES_SIZE + epoch.len()) } } impl EstimatedJsonEncodedSizeOf for u8 { #[rustfmt::skip] - fn estimated_json_encoded_size_of(&self) -> usize { + fn estimated_json_encoded_size_of(&self) -> JsonSize { let v = *self; // 0 ..= 255 - if v < 10 { 1 - } else if v < 100 { 2 - } else { 3 } + JsonSize::new( + if v < 10 { 1 + } else if v < 100 { 2 + } else { 3 } + ) } } impl EstimatedJsonEncodedSizeOf for i8 { #[rustfmt::skip] - fn estimated_json_encoded_size_of(&self) -> usize { + fn estimated_json_encoded_size_of(&self) -> JsonSize { let v = *self; // -128 ..= 127 - if v < -99 { 4 - } else if v < -9 { 3 - } else if v < 0 { 2 - } else if v < 10 { 1 - } else if v < 100 { 2 - } else { 3 } + JsonSize::new( + if v < -99 { 4 + } else if v < -9 { 3 + } else if v < 0 { 2 + } else if v < 10 { 1 + } else if v < 100 { 2 + } else { 3 } + ) } } impl EstimatedJsonEncodedSizeOf for u16 { #[rustfmt::skip] - fn estimated_json_encoded_size_of(&self) -> usize { + fn estimated_json_encoded_size_of(&self) -> JsonSize { let v = *self; // 0 ..= 65_535 - if v < 10 { 1 - } else if v < 100 { 2 - } else if v < 1_000 { 3 - } else if v < 10_000 { 4 - } else { 5 } + JsonSize::new( + if v < 10 { 1 + } else if v < 100 { 2 + } else if v < 1_000 { 3 + } else if v < 10_000 { 4 + } else { 5 } + ) } } impl EstimatedJsonEncodedSizeOf for i16 { #[rustfmt::skip] - fn estimated_json_encoded_size_of(&self) -> usize { + fn estimated_json_encoded_size_of(&self) -> JsonSize { let v = *self; // -32_768 ..= 32_767 - if v < -9_999 { 6 - } else if v < -999 { 5 - } else if v < -99 { 4 - } else if v < -9 { 3 - } else if v < 0 { 2 - } else if v < 10 { 1 - } else if v < 100 { 2 - } else if v < 1_000 { 3 - } else if v < 10_000 { 4 - } else { 5 } + JsonSize::new( + if v < -9_999 { 6 + } else if v < -999 { 5 + } else if v < -99 { 4 + } else if v < -9 { 3 + } else if v < 0 { 2 + } else if v < 10 { 1 + } else if v < 100 { 2 + } else if v < 1_000 { 3 + } else if v < 10_000 { 4 + } else { 5 } + ) } } impl EstimatedJsonEncodedSizeOf for u32 { #[rustfmt::skip] - fn estimated_json_encoded_size_of(&self) -> usize { + fn estimated_json_encoded_size_of(&self) -> JsonSize { let v = *self; // 0 ..= 4_294_967_295 - if v < 10 { 1 - } else if v < 100 { 2 - } else if v < 1_000 { 3 - } else if v < 10_000 { 4 - } else if v < 100_000 { 5 - } else if v < 1_000_000 { 6 - } else if v < 10_000_000 { 7 - } else if v < 100_000_000 { 8 - } else if v < 1_000_000_000 { 9 - } else { 10 } + JsonSize::new( + if v < 10 { 1 + } else if v < 100 { 2 + } else if v < 1_000 { 3 + } else if v < 10_000 { 4 + } else if v < 100_000 { 5 + } else if v < 1_000_000 { 6 + } else if v < 10_000_000 { 7 + } else if v < 100_000_000 { 8 + } else if v < 1_000_000_000 { 9 + } else { 10 } + ) } } impl EstimatedJsonEncodedSizeOf for i32 { #[rustfmt::skip] - fn estimated_json_encoded_size_of(&self) -> usize { + fn estimated_json_encoded_size_of(&self) -> JsonSize { let v = *self; // -2_147_483_648 ..= 2_147_483_647 - if v < -999_999_999 { 11 - } else if v < -99_999_999 { 10 - } else if v < -9_999_999 { 9 - } else if v < -999_999 { 8 - } else if v < -99_999 { 7 - } else if v < -9_999 { 6 - } else if v < -999 { 5 - } else if v < -99 { 4 - } else if v < -9 { 3 - } else if v < 0 { 2 - } else if v < 10 { 1 - } else if v < 100 { 2 - } else if v < 1_000 { 3 - } else if v < 10_000 { 4 - } else if v < 100_000 { 5 - } else if v < 1_000_000 { 6 - } else if v < 10_000_000 { 7 - } else if v < 100_000_000 { 8 - } else if v < 1_000_000_000 { 9 - } else { 10 } + JsonSize::new( + if v < -999_999_999 { 11 + } else if v < -99_999_999 { 10 + } else if v < -9_999_999 { 9 + } else if v < -999_999 { 8 + } else if v < -99_999 { 7 + } else if v < -9_999 { 6 + } else if v < -999 { 5 + } else if v < -99 { 4 + } else if v < -9 { 3 + } else if v < 0 { 2 + } else if v < 10 { 1 + } else if v < 100 { 2 + } else if v < 1_000 { 3 + } else if v < 10_000 { 4 + } else if v < 100_000 { 5 + } else if v < 1_000_000 { 6 + } else if v < 10_000_000 { 7 + } else if v < 100_000_000 { 8 + } else if v < 1_000_000_000 { 9 + } else { 10 } + ) } } impl EstimatedJsonEncodedSizeOf for u64 { #[rustfmt::skip] - fn estimated_json_encoded_size_of(&self) -> usize { + fn estimated_json_encoded_size_of(&self) -> JsonSize { let v = *self; // 0 ..= 18_446_744_073_709_551_615 - if v < 10 { 1 - } else if v < 100 { 2 - } else if v < 1_000 { 3 - } else if v < 10_000 { 4 - } else if v < 100_000 { 5 - } else if v < 1_000_000 { 6 - } else if v < 10_000_000 { 7 - } else if v < 100_000_000 { 8 - } else if v < 1_000_000_000 { 9 - } else if v < 10_000_000_000 { 10 - } else if v < 100_000_000_000 { 11 - } else if v < 1_000_000_000_000 { 12 - } else if v < 10_000_000_000_000 { 13 - } else if v < 100_000_000_000_000 { 14 - } else if v < 1_000_000_000_000_000 { 15 - } else if v < 10_000_000_000_000_000 { 16 - } else if v < 100_000_000_000_000_000 { 17 - } else if v < 1_000_000_000_000_000_000 { 18 - } else if v < 10_000_000_000_000_000_000 { 19 - } else { 20 } + JsonSize::new( + if v < 10 { 1 + } else if v < 100 { 2 + } else if v < 1_000 { 3 + } else if v < 10_000 { 4 + } else if v < 100_000 { 5 + } else if v < 1_000_000 { 6 + } else if v < 10_000_000 { 7 + } else if v < 100_000_000 { 8 + } else if v < 1_000_000_000 { 9 + } else if v < 10_000_000_000 { 10 + } else if v < 100_000_000_000 { 11 + } else if v < 1_000_000_000_000 { 12 + } else if v < 10_000_000_000_000 { 13 + } else if v < 100_000_000_000_000 { 14 + } else if v < 1_000_000_000_000_000 { 15 + } else if v < 10_000_000_000_000_000 { 16 + } else if v < 100_000_000_000_000_000 { 17 + } else if v < 1_000_000_000_000_000_000 { 18 + } else if v < 10_000_000_000_000_000_000 { 19 + } else { 20 } + ) } } impl EstimatedJsonEncodedSizeOf for i64 { #[rustfmt::skip] - fn estimated_json_encoded_size_of(&self) -> usize { + fn estimated_json_encoded_size_of(&self) -> JsonSize { let v = *self; // -9_223_372_036_854_775_808 ..= 9_223_372_036_854_775_807 - if v < -999_999_999_999_999_999 { 20 - } else if v < -99_999_999_999_999_999 { 19 - } else if v < -9_999_999_999_999_999 { 18 - } else if v < -999_999_999_999_999 { 17 - } else if v < -99_999_999_999_999 { 16 - } else if v < -9_999_999_999_999 { 15 - } else if v < -999_999_999_999 { 14 - } else if v < -99_999_999_999 { 13 - } else if v < -9_999_999_999 { 12 - } else if v < -999_999_999 { 11 - } else if v < -99_999_999 { 10 - } else if v < -9_999_999 { 9 - } else if v < -999_999 { 8 - } else if v < -99_999 { 7 - } else if v < -9_999 { 6 - } else if v < -999 { 5 - } else if v < -99 { 4 - } else if v < -9 { 3 - } else if v < 0 { 2 - } else if v < 10 { 1 - } else if v < 100 { 2 - } else if v < 1_000 { 3 - } else if v < 10_000 { 4 - } else if v < 100_000 { 5 - } else if v < 1_000_000 { 6 - } else if v < 10_000_000 { 7 - } else if v < 100_000_000 { 8 - } else if v < 1_000_000_000 { 9 - } else if v < 10_000_000_000 { 10 - } else if v < 100_000_000_000 { 11 - } else if v < 1_000_000_000_000 { 12 - } else if v < 10_000_000_000_000 { 13 - } else if v < 100_000_000_000_000 { 14 - } else if v < 1_000_000_000_000_000 { 15 - } else if v < 10_000_000_000_000_000 { 16 - } else if v < 100_000_000_000_000_000 { 17 - } else if v < 1_000_000_000_000_000_000 { 18 - } else { 19 } + JsonSize::new( + if v < -999_999_999_999_999_999 { 20 + } else if v < -99_999_999_999_999_999 { 19 + } else if v < -9_999_999_999_999_999 { 18 + } else if v < -999_999_999_999_999 { 17 + } else if v < -99_999_999_999_999 { 16 + } else if v < -9_999_999_999_999 { 15 + } else if v < -999_999_999_999 { 14 + } else if v < -99_999_999_999 { 13 + } else if v < -9_999_999_999 { 12 + } else if v < -999_999_999 { 11 + } else if v < -99_999_999 { 10 + } else if v < -9_999_999 { 9 + } else if v < -999_999 { 8 + } else if v < -99_999 { 7 + } else if v < -9_999 { 6 + } else if v < -999 { 5 + } else if v < -99 { 4 + } else if v < -9 { 3 + } else if v < 0 { 2 + } else if v < 10 { 1 + } else if v < 100 { 2 + } else if v < 1_000 { 3 + } else if v < 10_000 { 4 + } else if v < 100_000 { 5 + } else if v < 1_000_000 { 6 + } else if v < 10_000_000 { 7 + } else if v < 100_000_000 { 8 + } else if v < 1_000_000_000 { 9 + } else if v < 10_000_000_000 { 10 + } else if v < 100_000_000_000 { 11 + } else if v < 1_000_000_000_000 { 12 + } else if v < 10_000_000_000_000 { 13 + } else if v < 100_000_000_000_000 { 14 + } else if v < 1_000_000_000_000_000 { 15 + } else if v < 10_000_000_000_000_000 { 16 + } else if v < 100_000_000_000_000_000 { 17 + } else if v < 1_000_000_000_000_000_000 { 18 + } else { 19 } + ) } } impl EstimatedJsonEncodedSizeOf for usize { - fn estimated_json_encoded_size_of(&self) -> usize { + fn estimated_json_encoded_size_of(&self) -> JsonSize { (*self as u64).estimated_json_encoded_size_of() } } impl EstimatedJsonEncodedSizeOf for isize { - fn estimated_json_encoded_size_of(&self) -> usize { + fn estimated_json_encoded_size_of(&self) -> JsonSize { (*self as i64).estimated_json_encoded_size_of() } } @@ -453,7 +470,7 @@ mod tests { let got = v.estimated_json_encoded_size_of(); let want = serde_json::to_string(&v).unwrap(); - got == want.len() + got == want.len().into() } #[quickcheck] @@ -461,7 +478,7 @@ mod tests { let got = v.estimated_json_encoded_size_of(); let want = serde_json::to_string(&v).unwrap(); - got == want.len() + got == want.len().into() } #[quickcheck] @@ -469,7 +486,7 @@ mod tests { let got = v.estimated_json_encoded_size_of(); let want = serde_json::to_string(&v).unwrap(); - got == want.len() + got == want.len().into() } #[quickcheck] @@ -477,7 +494,7 @@ mod tests { let got = v.estimated_json_encoded_size_of(); let want = serde_json::to_string(&v).unwrap(); - got == want.len() + got == want.len().into() } #[quickcheck] @@ -485,7 +502,7 @@ mod tests { let got = v.estimated_json_encoded_size_of(); let want = serde_json::to_string(&v).unwrap(); - got == want.len() + got == want.len().into() } #[quickcheck] @@ -493,7 +510,7 @@ mod tests { let got = v.estimated_json_encoded_size_of(); let want = serde_json::to_string(&v).unwrap(); - got == want.len() + got == want.len().into() } #[quickcheck] @@ -501,7 +518,7 @@ mod tests { let got = v.estimated_json_encoded_size_of(); let want = serde_json::to_string(&v).unwrap(); - got == want.len() + got == want.len().into() } #[quickcheck] @@ -509,7 +526,7 @@ mod tests { let got = v.estimated_json_encoded_size_of(); let want = serde_json::to_string(&v).unwrap(); - got == want.len() + got == want.len().into() } #[quickcheck] @@ -517,7 +534,7 @@ mod tests { let got = v.estimated_json_encoded_size_of(); let want = serde_json::to_string(&v).unwrap(); - got == want.len() + got == want.len().into() } #[quickcheck] @@ -525,7 +542,7 @@ mod tests { let got = v.estimated_json_encoded_size_of(); let want = serde_json::to_string(&v).unwrap(); - got == want.len() + got == want.len().into() } #[quickcheck] @@ -538,7 +555,7 @@ mod tests { let got = v.estimated_json_encoded_size_of(); let want = serde_json::to_string(&v).unwrap(); - got == want.len() + got == want.len().into() } #[quickcheck] @@ -551,7 +568,7 @@ mod tests { let got = v.estimated_json_encoded_size_of(); let want = serde_json::to_string(&v).unwrap(); - got == want.len() + got == want.len().into() } #[quickcheck] @@ -563,7 +580,7 @@ mod tests { let got = v.estimated_json_encoded_size_of(); let want = serde_json::to_string(&v).unwrap(); - TestResult::from_bool(got == want.len()) + TestResult::from_bool(got == want.len().into()) } #[quickcheck] @@ -575,7 +592,7 @@ mod tests { let got = v.estimated_json_encoded_size_of(); let want = serde_json::to_string(&v).unwrap(); - TestResult::from_bool(got == want.len()) + TestResult::from_bool(got == want.len().into()) } #[quickcheck] @@ -583,7 +600,7 @@ mod tests { let got = v.estimated_json_encoded_size_of(); let want = serde_json::to_string(&v).unwrap(); - got == want.len() + got == want.len().into() } #[quickcheck] @@ -591,7 +608,7 @@ mod tests { let got = v.estimated_json_encoded_size_of(); let want = serde_json::to_string(&v).unwrap(); - got == want.len() + got == want.len().into() } #[quickcheck] @@ -599,7 +616,7 @@ mod tests { let got = v.estimated_json_encoded_size_of(); let want = serde_json::to_string(&v).unwrap(); - TestResult::from_bool(got == want.len()) + TestResult::from_bool(got == want.len().into()) } #[quickcheck] @@ -611,7 +628,7 @@ mod tests { let got = v.estimated_json_encoded_size_of(); let want = serde_json::to_string(&v).unwrap(); - TestResult::from_bool(got == want.len()) + TestResult::from_bool(got == want.len().into()) } fn is_inaccurately_counted_value(v: &Value) -> bool { diff --git a/lib/vector-core/src/event/log_event.rs b/lib/vector-core/src/event/log_event.rs index c3d59b26c0f55..b656ee014810a 100644 --- a/lib/vector-core/src/event/log_event.rs +++ b/lib/vector-core/src/event/log_event.rs @@ -14,7 +14,12 @@ use crossbeam_utils::atomic::AtomicCell; use lookup::lookup_v2::TargetPath; use lookup::PathPrefix; use serde::{Deserialize, Serialize, Serializer}; -use vector_common::EventDataEq; +use vector_common::{ + internal_event::OptionalTag, + json_size::{JsonSize, NonZeroJsonSize}, + request_metadata::{EventCountTags, GetEventCountTags}, + EventDataEq, +}; use super::{ estimated_json_encoded_size_of::EstimatedJsonEncodedSizeOf, @@ -22,8 +27,8 @@ use super::{ metadata::EventMetadata, util, EventFinalizers, Finalizable, Value, }; -use crate::config::log_schema; use crate::config::LogNamespace; +use crate::config::{log_schema, telemetry}; use crate::{event::MaybeAsLogMut, ByteSizeOf}; use lookup::{metadata_path, path}; @@ -36,7 +41,7 @@ struct Inner { size_cache: AtomicCell>, #[serde(skip)] - json_encoded_size_cache: AtomicCell>, + json_encoded_size_cache: AtomicCell>, } impl Inner { @@ -73,12 +78,12 @@ impl ByteSizeOf for Inner { } impl EstimatedJsonEncodedSizeOf for Inner { - fn estimated_json_encoded_size_of(&self) -> usize { + fn estimated_json_encoded_size_of(&self) -> JsonSize { self.json_encoded_size_cache .load() .unwrap_or_else(|| { let size = self.fields.estimated_json_encoded_size_of(); - let size = NonZeroUsize::new(size).expect("Size cannot be zero"); + let size = NonZeroJsonSize::new(size).expect("Size cannot be zero"); self.json_encoded_size_cache.store(Some(size)); size @@ -212,11 +217,31 @@ impl Finalizable for LogEvent { } impl EstimatedJsonEncodedSizeOf for LogEvent { - fn estimated_json_encoded_size_of(&self) -> usize { + fn estimated_json_encoded_size_of(&self) -> JsonSize { self.inner.estimated_json_encoded_size_of() } } +impl GetEventCountTags for LogEvent { + fn get_tags(&self) -> EventCountTags { + let source = if telemetry().tags().emit_source { + self.metadata().source_id().cloned().into() + } else { + OptionalTag::Ignored + }; + + let service = if telemetry().tags().emit_service { + self.get_by_meaning("service") + .map(|value| value.to_string_lossy().to_string()) + .into() + } else { + OptionalTag::Ignored + }; + + EventCountTags { source, service } + } +} + impl LogEvent { #[must_use] pub fn new_with_metadata(metadata: EventMetadata) -> Self { @@ -702,7 +727,7 @@ mod test { use super::*; use crate::test_util::open_fixture; use lookup::event_path; - use vrl::value::value; + use vrl::value; // The following two tests assert that renaming a key has no effect if the // keys are equivalent, whether the key exists in the log or not. diff --git a/lib/vector-core/src/event/metadata.rs b/lib/vector-core/src/event/metadata.rs index 403b43bfc52b9..d86884be7582c 100644 --- a/lib/vector-core/src/event/metadata.rs +++ b/lib/vector-core/src/event/metadata.rs @@ -1,15 +1,16 @@ #![deny(missing_docs)] -use std::collections::BTreeMap; -use std::sync::Arc; +use std::{collections::BTreeMap, sync::Arc}; use serde::{Deserialize, Serialize}; -use vector_common::EventDataEq; +use vector_common::{config::ComponentKey, EventDataEq}; use vrl::value::{Kind, Secrets, Value}; use super::{BatchNotifier, EventFinalizer, EventFinalizers, EventStatus}; -use crate::config::{LogNamespace, OutputId}; -use crate::{schema, ByteSizeOf}; +use crate::{ + config::{LogNamespace, OutputId}, + schema, ByteSizeOf, +}; const DATADOG_API_KEY: &str = "datadog_api_key"; const SPLUNK_HEC_TOKEN: &str = "splunk_hec_token"; @@ -30,10 +31,17 @@ pub struct EventMetadata { finalizers: EventFinalizers, /// The id of the source - source_id: Option>, + source_id: Option>, + + /// The id of the component this event originated from. This is used to + /// determine which schema definition to attach to an event in transforms. + /// This should always have a value set for events in transforms. It will always be `None` + /// in a source, and there is currently no use-case for reading the value in a sink. + upstream_id: Option>, /// An identifier for a globally registered schema definition which provides information about /// the event shape (type information, and semantic meaning of fields). + /// This definition is only currently valid for logs, and shouldn't be used for other event types. /// /// TODO(Jean): must not skip serialization to track schemas across restarts. #[serde(default = "default_schema_definition", skip)] @@ -73,17 +81,29 @@ impl EventMetadata { &mut self.secrets } - /// Returns a reference to the metadata source. + /// Returns a reference to the metadata source id. + #[must_use] + pub fn source_id(&self) -> Option<&Arc> { + self.source_id.as_ref() + } + + /// Returns a reference to the metadata parent id. This is the `OutputId` + /// of the previous component the event was sent through (if any). #[must_use] - pub fn source_id(&self) -> Option<&OutputId> { - self.source_id.as_deref() + pub fn upstream_id(&self) -> Option<&OutputId> { + self.upstream_id.as_deref() } /// Sets the `source_id` in the metadata to the provided value. - pub fn set_source_id(&mut self, source_id: Arc) { + pub fn set_source_id(&mut self, source_id: Arc) { self.source_id = Some(source_id); } + /// Sets the `upstream_id` in the metadata to the provided value. + pub fn set_upstream_id(&mut self, upstream_id: Arc) { + self.upstream_id = Some(upstream_id); + } + /// Return the datadog API key, if it exists pub fn datadog_api_key(&self) -> Option> { self.secrets.get(DATADOG_API_KEY).cloned() @@ -113,6 +133,7 @@ impl Default for EventMetadata { finalizers: Default::default(), schema_definition: default_schema_definition(), source_id: None, + upstream_id: None, } } } diff --git a/lib/vector-core/src/event/metric/mod.rs b/lib/vector-core/src/event/metric/mod.rs index 878036eccdf82..478310428faf1 100644 --- a/lib/vector-core/src/event/metric/mod.rs +++ b/lib/vector-core/src/event/metric/mod.rs @@ -11,10 +11,16 @@ use std::{ }; use chrono::{DateTime, Utc}; -use vector_common::EventDataEq; +use vector_common::{ + internal_event::OptionalTag, + json_size::JsonSize, + request_metadata::{EventCountTags, GetEventCountTags}, + EventDataEq, +}; use vector_config::configurable_component; use crate::{ + config::telemetry, event::{ estimated_json_encoded_size_of::EstimatedJsonEncodedSizeOf, BatchNotifier, EventFinalizer, EventFinalizers, EventMetadata, Finalizable, @@ -444,7 +450,7 @@ impl Display for Metric { /// /// example: /// ```text - /// 2020-08-12T20:23:37.248661343Z vector_processed_bytes_total{component_kind="sink",component_type="blackhole"} = 6391 + /// 2020-08-12T20:23:37.248661343Z vector_received_bytes_total{component_kind="sink",component_type="blackhole"} = 6391 /// ``` fn fmt(&self, fmt: &mut Formatter<'_>) -> Result<(), fmt::Error> { if let Some(timestamp) = &self.data.time.timestamp { @@ -479,10 +485,10 @@ impl ByteSizeOf for Metric { } impl EstimatedJsonEncodedSizeOf for Metric { - fn estimated_json_encoded_size_of(&self) -> usize { + fn estimated_json_encoded_size_of(&self) -> JsonSize { // TODO: For now we're using the in-memory representation of the metric, but we'll convert // this to actually calculate the JSON encoded size in the near future. - self.size_of() + self.size_of().into() } } @@ -492,6 +498,28 @@ impl Finalizable for Metric { } } +impl GetEventCountTags for Metric { + fn get_tags(&self) -> EventCountTags { + let source = if telemetry().tags().emit_source { + self.metadata().source_id().cloned().into() + } else { + OptionalTag::Ignored + }; + + // Currently there is no way to specify a tag that means the service, + // so we will be hardcoding it to "service". + let service = if telemetry().tags().emit_service { + self.tags() + .and_then(|tags| tags.get("service").map(ToString::to_string)) + .into() + } else { + OptionalTag::Ignored + }; + + EventCountTags { source, service } + } +} + /// Metric kind. /// /// Metrics can be either absolute or incremental. Absolute metrics represent a sort of "last write wins" scenario, diff --git a/lib/vector-core/src/event/mod.rs b/lib/vector-core/src/event/mod.rs index de9e01ec4c109..9547f58dc5ed3 100644 --- a/lib/vector-core/src/event/mod.rs +++ b/lib/vector-core/src/event/mod.rs @@ -19,7 +19,13 @@ pub use r#ref::{EventMutRef, EventRef}; use serde::{Deserialize, Serialize}; pub use trace::TraceEvent; use vector_buffers::EventCount; -use vector_common::{finalization, EventDataEq}; +use vector_common::{ + config::ComponentKey, + finalization, + json_size::JsonSize, + request_metadata::{EventCountTags, GetEventCountTags}, + EventDataEq, +}; pub use vrl::value::Value; #[cfg(feature = "vrl")] pub use vrl_target::{TargetEvents, VrlTarget}; @@ -65,7 +71,7 @@ impl ByteSizeOf for Event { } impl EstimatedJsonEncodedSizeOf for Event { - fn estimated_json_encoded_size_of(&self) -> usize { + fn estimated_json_encoded_size_of(&self) -> JsonSize { match self { Event::Log(log_event) => log_event.estimated_json_encoded_size_of(), Event::Metric(metric_event) => metric_event.estimated_json_encoded_size_of(), @@ -90,6 +96,16 @@ impl Finalizable for Event { } } +impl GetEventCountTags for Event { + fn get_tags(&self) -> EventCountTags { + match self { + Event::Log(log) => log.get_tags(), + Event::Metric(metric) => metric.get_tags(), + Event::Trace(trace) => trace.get_tags(), + } + } +} + impl Event { /// Return self as a `LogEvent` /// @@ -284,21 +300,33 @@ impl Event { /// Returns a reference to the event metadata source. #[must_use] - pub fn source_id(&self) -> Option<&OutputId> { + pub fn source_id(&self) -> Option<&Arc> { self.metadata().source_id() } /// Sets the `source_id` in the event metadata to the provided value. - pub fn set_source_id(&mut self, source_id: Arc) { + pub fn set_source_id(&mut self, source_id: Arc) { self.metadata_mut().set_source_id(source_id); } + /// Sets the `upstream_id` in the event metadata to the provided value. + pub fn set_upstream_id(&mut self, upstream_id: Arc) { + self.metadata_mut().set_upstream_id(upstream_id); + } + /// Sets the `source_id` in the event metadata to the provided value. #[must_use] - pub fn with_source_id(mut self, source_id: Arc) -> Self { + pub fn with_source_id(mut self, source_id: Arc) -> Self { self.metadata_mut().set_source_id(source_id); self } + + /// Sets the `upstream_id` in the event metadata to the provided value. + #[must_use] + pub fn with_upstream_id(mut self, upstream_id: Arc) -> Self { + self.metadata_mut().set_upstream_id(upstream_id); + self + } } impl EventDataEq for Event { diff --git a/lib/vector-core/src/event/trace.rs b/lib/vector-core/src/event/trace.rs index 8bc68f9880605..3885b50b9f13d 100644 --- a/lib/vector-core/src/event/trace.rs +++ b/lib/vector-core/src/event/trace.rs @@ -3,7 +3,11 @@ use std::{collections::BTreeMap, fmt::Debug}; use lookup::lookup_v2::TargetPath; use serde::{Deserialize, Serialize}; use vector_buffers::EventCount; -use vector_common::EventDataEq; +use vector_common::{ + json_size::JsonSize, + request_metadata::{EventCountTags, GetEventCountTags}, + EventDataEq, +}; use super::{ BatchNotifier, EstimatedJsonEncodedSizeOf, EventFinalizer, EventFinalizers, EventMetadata, @@ -109,7 +113,7 @@ impl ByteSizeOf for TraceEvent { } impl EstimatedJsonEncodedSizeOf for TraceEvent { - fn estimated_json_encoded_size_of(&self) -> usize { + fn estimated_json_encoded_size_of(&self) -> JsonSize { self.0.estimated_json_encoded_size_of() } } @@ -143,3 +147,9 @@ impl AsMut for TraceEvent { &mut self.0 } } + +impl GetEventCountTags for TraceEvent { + fn get_tags(&self) -> EventCountTags { + self.0.get_tags() + } +} diff --git a/lib/vector-core/src/event/vrl_target.rs b/lib/vector-core/src/event/vrl_target.rs index db067b271e441..782aa7ee2ae03 100644 --- a/lib/vector-core/src/event/vrl_target.rs +++ b/lib/vector-core/src/event/vrl_target.rs @@ -6,11 +6,13 @@ use lookup::{OwnedTargetPath, OwnedValuePath, PathPrefix}; use snafu::Snafu; use vrl::compiler::value::VrlValueConvert; use vrl::compiler::{ProgramInfo, SecretTarget, Target}; -use vrl::value::Value; +use vrl::prelude::Collection; +use vrl::value::{Kind, Value}; use super::{Event, EventMetadata, LogEvent, Metric, MetricKind, TraceEvent}; -use crate::config::log_schema; +use crate::config::{log_schema, LogNamespace}; use crate::event::metric::TagValue; +use crate::schema::Definition; const VALID_METRIC_PATHS_SET: &str = ".name, .namespace, .timestamp, .kind, .tags"; @@ -114,11 +116,24 @@ impl VrlTarget { } } + /// Modifies a schema in the same way that the `into_events` function modifies the event + pub fn modify_schema_definition_for_into_events(input: Definition) -> Definition { + let log_namespaces = input.log_namespaces().clone(); + + // both namespaces merge arrays, but only `Legacy` moves field definitions into a "message" field. + let merged_arrays = merge_array_definitions(input); + Definition::combine_log_namespaces( + &log_namespaces, + move_field_definitions_into_message(merged_arrays.clone()), + merged_arrays, + ) + } + /// Turn the target back into events. /// /// This returns an iterator of events as one event can be turned into multiple by assigning an /// array to `.` in VRL. - pub fn into_events(self) -> TargetEvents { + pub fn into_events(self, log_namespace: LogNamespace) -> TargetEvents { match self { VrlTarget::LogEvent(value, metadata) => match value { value @ Value::Object(_) => { @@ -131,11 +146,16 @@ impl VrlTarget { _marker: PhantomData, }), - v => { - let mut log = LogEvent::new_with_metadata(metadata); - log.insert(log_schema().message_key(), v); - TargetEvents::One(log.into()) - } + v => match log_namespace { + LogNamespace::Vector => { + TargetEvents::One(LogEvent::from_parts(v, metadata).into()) + } + LogNamespace::Legacy => { + let mut log = LogEvent::new_with_metadata(metadata); + log.insert(log_schema().message_key(), v); + TargetEvents::One(log.into()) + } + }, }, VrlTarget::Trace(value, metadata) => match value { value @ Value::Object(_) => { @@ -174,6 +194,53 @@ impl VrlTarget { } } +/// If the VRL returns a value that is not an array (see [`merge_array_definitions`]), +/// or an object, that data is moved into the `message` field. +fn move_field_definitions_into_message(mut definition: Definition) -> Definition { + let mut message = definition.event_kind().clone(); + message.remove_object(); + message.remove_array(); + + if !message.is_never() { + // We need to add the given message type to a field called `message` + // in the event. + let message = Kind::object(Collection::from(BTreeMap::from([( + log_schema().message_key().into(), + message, + )]))); + + definition.event_kind_mut().remove_bytes(); + definition.event_kind_mut().remove_integer(); + definition.event_kind_mut().remove_float(); + definition.event_kind_mut().remove_boolean(); + definition.event_kind_mut().remove_timestamp(); + definition.event_kind_mut().remove_regex(); + definition.event_kind_mut().remove_null(); + + *definition.event_kind_mut() = definition.event_kind().union(message); + } + + definition +} + +/// If the transform returns an array, the elements of this array will be separated +/// out into it's individual elements and passed downstream. +/// +/// The potential types that the transform can output are any of the arrays +/// elements or any non-array elements that are within the definition. All these +/// definitions need to be merged together. +fn merge_array_definitions(mut definition: Definition) -> Definition { + if let Some(array) = definition.event_kind().as_array() { + let array_kinds = array.reduced_kind(); + + let kind = definition.event_kind_mut(); + kind.remove_array(); + *kind = kind.union(array_kinds); + } + + definition +} + fn set_metric_tag_values(name: String, value: &Value, metric: &mut Metric, multi_value_tags: bool) { if multi_value_tags { let tag_values = value @@ -588,12 +655,108 @@ mod test { use chrono::{offset::TimeZone, Utc}; use lookup::owned_value_path; use similar_asserts::assert_eq; - use vrl::value::btreemap; + use vrl::btreemap; + use vrl::value::kind::Index; use super::super::MetricValue; use super::*; use crate::metric_tags; + #[test] + fn test_field_definitions_in_message() { + let definition = + Definition::new_with_default_metadata(Kind::bytes(), [LogNamespace::Legacy]); + assert_eq!( + Definition::new_with_default_metadata( + Kind::object(BTreeMap::from([("message".into(), Kind::bytes())])), + [LogNamespace::Legacy] + ), + move_field_definitions_into_message(definition) + ); + + // Test when a message field already exists. + let definition = Definition::new_with_default_metadata( + Kind::object(BTreeMap::from([("message".into(), Kind::integer())])).or_bytes(), + [LogNamespace::Legacy], + ); + assert_eq!( + Definition::new_with_default_metadata( + Kind::object(BTreeMap::from([( + "message".into(), + Kind::bytes().or_integer() + )])), + [LogNamespace::Legacy] + ), + move_field_definitions_into_message(definition) + ); + } + + #[test] + fn test_merged_array_definitions_simple() { + // Test merging the array definitions where the schema definition + // is simple, containing only one possible type in the array. + let object: BTreeMap = [ + ("carrot".into(), Kind::bytes()), + ("potato".into(), Kind::integer()), + ] + .into(); + + let kind = Kind::array(Collection::from_unknown(Kind::object(object))); + + let definition = Definition::new_with_default_metadata(kind, [LogNamespace::Legacy]); + + let kind = Kind::object(BTreeMap::from([ + ("carrot".into(), Kind::bytes()), + ("potato".into(), Kind::integer()), + ])); + + let wanted = Definition::new_with_default_metadata(kind, [LogNamespace::Legacy]); + let merged = merge_array_definitions(definition); + + assert_eq!(wanted, merged); + } + + #[test] + fn test_merged_array_definitions_complex() { + // Test merging the array definitions where the schema definition + // is fairly complex containing multiple different possible types. + let object: BTreeMap = [ + ("carrot".into(), Kind::bytes()), + ("potato".into(), Kind::integer()), + ] + .into(); + + let array: BTreeMap = [ + (Index::from(0), Kind::integer()), + (Index::from(1), Kind::boolean()), + ( + Index::from(2), + Kind::object(BTreeMap::from([("peas".into(), Kind::bytes())])), + ), + ] + .into(); + + let mut kind = Kind::bytes(); + kind.add_object(object); + kind.add_array(array); + + let definition = Definition::new_with_default_metadata(kind, [LogNamespace::Legacy]); + + let mut kind = Kind::bytes(); + kind.add_integer(); + kind.add_boolean(); + kind.add_object(BTreeMap::from([ + ("carrot".into(), Kind::bytes().or_undefined()), + ("potato".into(), Kind::integer().or_undefined()), + ("peas".into(), Kind::bytes().or_undefined()), + ])); + + let wanted = Definition::new_with_default_metadata(kind, [LogNamespace::Legacy]); + let merged = merge_array_definitions(definition); + + assert_eq!(wanted, merged); + } + #[test] fn log_get() { let cases = vec![ @@ -755,7 +918,7 @@ mod test { Ok(Some(value)) ); assert_eq!( - match target.into_events() { + match target.into_events(LogNamespace::Legacy) { TargetEvents::One(event) => vec![event], TargetEvents::Logs(events) => events.collect::>(), TargetEvents::Traces(events) => events.collect::>(), @@ -858,7 +1021,7 @@ mod test { #[test] fn log_into_events() { - use vrl::value::btreemap; + use vrl::btreemap; let cases = vec![ ( @@ -901,7 +1064,7 @@ mod test { Target::target_insert(&mut target, &OwnedTargetPath::event_root(), value).unwrap(); assert_eq!( - match target.into_events() { + match target.into_events(LogNamespace::Legacy) { TargetEvents::One(event) => vec![event], TargetEvents::Logs(events) => events.collect::>(), TargetEvents::Traces(events) => events.collect::>(), diff --git a/lib/vector-core/src/lib.rs b/lib/vector-core/src/lib.rs index e8b464b50d922..1362fd18bd602 100644 --- a/lib/vector-core/src/lib.rs +++ b/lib/vector-core/src/lib.rs @@ -27,14 +27,8 @@ #![allow(clippy::unnested_or_patterns)] // nightly-only feature as of 1.51.0 #![allow(clippy::type_complexity)] // long-types happen, especially in async code #![allow( - clippy::arc_with_non_send_sync, - clippy::default_constructed_unit_structs, clippy::explicit_iter_loop, - clippy::missing_fields_in_debug, clippy::missing_panics_doc, - clippy::needless_pub_self, - clippy::needless_raw_string_hashes, - clippy::non_minimal_cfg, clippy::redundant_closure_call, clippy::redundant_pattern_matching, clippy::useless_conversion, diff --git a/lib/vector-core/src/metrics/mod.rs b/lib/vector-core/src/metrics/mod.rs index b842f90812e28..375dba4af624b 100644 --- a/lib/vector-core/src/metrics/mod.rs +++ b/lib/vector-core/src/metrics/mod.rs @@ -161,28 +161,6 @@ impl Controller { let mut metrics = self.recorder.with_registry(Registry::visit_metrics); - // Add aliases for deprecated metrics - for i in 0..metrics.len() { - let metric = &metrics[i]; - match metric.name() { - "component_sent_events_total" => { - let alias = metric.clone().with_name("processed_events_total"); - metrics.push(alias); - } - "component_sent_bytes_total" if metric.tag_matches("component_kind", "sink") => { - let alias = metric.clone().with_name("processed_bytes_total"); - metrics.push(alias); - } - "component_received_bytes_total" - if metric.tag_matches("component_kind", "source") => - { - let alias = metric.clone().with_name("processed_bytes_total"); - metrics.push(alias); - } - _ => {} - } - } - #[allow(clippy::cast_precision_loss)] let value = (metrics.len() + 2) as f64; metrics.push(Metric::from_metric_kv( diff --git a/lib/vector-core/src/schema/definition.rs b/lib/vector-core/src/schema/definition.rs index 62a5bd3b2ff64..a3c5afc034cb4 100644 --- a/lib/vector-core/src/schema/definition.rs +++ b/lib/vector-core/src/schema/definition.rs @@ -457,6 +457,25 @@ impl Definition { self } + /// If the schema definition depends on the `LogNamespace`, this combines the individual + /// definitions for each `LogNamespace`. + pub fn combine_log_namespaces( + log_namespaces: &BTreeSet, + legacy: Self, + vector: Self, + ) -> Self { + let mut combined = + Definition::new_with_default_metadata(Kind::never(), log_namespaces.clone()); + + if log_namespaces.contains(&LogNamespace::Legacy) { + combined = combined.merge(legacy); + } + if log_namespaces.contains(&LogNamespace::Vector) { + combined = combined.merge(vector); + } + combined + } + /// Returns an `OwnedTargetPath` into an event, based on the provided `meaning`, if the meaning exists. pub fn meaning_path(&self, meaning: &str) -> Option<&OwnedTargetPath> { match self.meaning.get(meaning) { diff --git a/lib/vector-core/src/sink.rs b/lib/vector-core/src/sink.rs index 37b2a5def3e7a..a3e2e66e08c17 100644 --- a/lib/vector-core/src/sink.rs +++ b/lib/vector-core/src/sink.rs @@ -61,6 +61,12 @@ impl VectorSink { } /// Converts an event sink into a `VectorSink` + /// + /// Deprecated in favor of `VectorSink::from_event_streamsink`. See [vector/9261] + /// for more info. + /// + /// [vector/9261]: https://github.com/vectordotdev/vector/issues/9261 + #[deprecated] pub fn from_event_sink(sink: impl Sink + Send + Unpin + 'static) -> Self { VectorSink::Sink(Box::new(EventSink::new(sink))) } diff --git a/lib/vector-core/src/stream/driver.rs b/lib/vector-core/src/stream/driver.rs index 6376be29f5d32..6ff23014c96d5 100644 --- a/lib/vector-core/src/stream/driver.rs +++ b/lib/vector-core/src/stream/driver.rs @@ -5,10 +5,10 @@ use tokio::{pin, select}; use tower::Service; use tracing::Instrument; use vector_common::internal_event::{ - register, ByteSize, BytesSent, CallError, CountByteSize, EventsSent, InternalEventHandle as _, - Output, PollReadyError, Registered, SharedString, + register, ByteSize, BytesSent, CallError, InternalEventHandle as _, PollReadyError, Registered, + RegisteredEventCache, SharedString, TaggedEventsSent, }; -use vector_common::request_metadata::{MetaDescriptive, RequestMetadata}; +use vector_common::request_metadata::{GroupedCountByteSize, MetaDescriptive}; use super::FuturesUnorderedCount; use crate::{ @@ -18,7 +18,7 @@ use crate::{ pub trait DriverResponse { fn event_status(&self) -> EventStatus; - fn events_sent(&self) -> CountByteSize; + fn events_sent(&self) -> &GroupedCountByteSize; /// Return the number of bytes that were sent in the request that returned this response. // TODO, remove the default implementation once all sinks have @@ -99,7 +99,7 @@ where pin!(batched_input); let bytes_sent = protocol.map(|protocol| register(BytesSent { protocol })); - let events_sent = register(EventsSent::from(Output(None))); + let events_sent = RegisteredEventCache::default(); loop { // Core behavior of the loop: @@ -167,8 +167,7 @@ where let finalizers = req.take_finalizers(); let bytes_sent = bytes_sent.clone(); let events_sent = events_sent.clone(); - - let metadata = req.get_metadata(); + let event_count = req.get_metadata().event_count(); let fut = svc.call(req) .err_into() @@ -176,7 +175,7 @@ where result, request_id, finalizers, - &metadata, + event_count, &bytes_sent, &events_sent, )) @@ -202,13 +201,13 @@ where result: Result, request_id: usize, finalizers: EventFinalizers, - metadata: &RequestMetadata, + event_count: usize, bytes_sent: &Option>, - events_sent: &Registered, + events_sent: &RegisteredEventCache, ) { match result { Err(error) => { - Self::emit_call_error(Some(error), request_id, metadata.event_count()); + Self::emit_call_error(Some(error), request_id, event_count); finalizers.update_status(EventStatus::Rejected); } Ok(response) => { @@ -220,10 +219,12 @@ where bytes_sent.emit(ByteSize(byte_size)); } } - events_sent.emit(response.events_sent()); + + response.events_sent().emit_event(events_sent); + // This condition occurs specifically when the `HttpBatchService::call()` is called *within* the `Service::call()` } else if response.event_status() == EventStatus::Rejected { - Self::emit_call_error(None, request_id, metadata.event_count()); + Self::emit_call_error(None, request_id, event_count); finalizers.update_status(EventStatus::Rejected); } } @@ -263,7 +264,8 @@ mod tests { use tower::Service; use vector_common::{ finalization::{BatchNotifier, EventFinalizer, EventFinalizers, EventStatus, Finalizable}, - request_metadata::RequestMetadata, + json_size::JsonSize, + request_metadata::{GroupedCountByteSize, RequestMetadata}, }; use vector_common::{internal_event::CountByteSize, request_metadata::MetaDescriptive}; @@ -297,20 +299,34 @@ mod tests { } impl MetaDescriptive for DelayRequest { - fn get_metadata(&self) -> RequestMetadata { - self.2 + fn get_metadata(&self) -> &RequestMetadata { + &self.2 + } + + fn metadata_mut(&mut self) -> &mut RequestMetadata { + &mut self.2 } } - struct DelayResponse; + struct DelayResponse { + events_sent: GroupedCountByteSize, + } + + impl DelayResponse { + fn new() -> Self { + Self { + events_sent: CountByteSize(1, JsonSize::new(1)).into(), + } + } + } impl DriverResponse for DelayResponse { fn event_status(&self) -> EventStatus { EventStatus::Delivered } - fn events_sent(&self) -> CountByteSize { - CountByteSize(1, 1) + fn events_sent(&self) -> &GroupedCountByteSize { + &self.events_sent } } @@ -395,7 +411,7 @@ mod tests { drop(permit); drop(req); - Ok(DelayResponse) + Ok(DelayResponse::new()) }) } } diff --git a/lib/vector-core/src/tcp.rs b/lib/vector-core/src/tcp.rs index 2dbbfafee118d..dd8f7b0e707a1 100644 --- a/lib/vector-core/src/tcp.rs +++ b/lib/vector-core/src/tcp.rs @@ -6,6 +6,7 @@ use vector_config::configurable_component; #[configurable_component] #[derive(Clone, Copy, Debug, PartialEq, Eq)] #[serde(deny_unknown_fields)] +#[configurable(metadata(docs::human_name = "Wait Time"))] pub struct TcpKeepaliveConfig { /// The time to wait before starting to send TCP keepalive probes on an idle connection. #[configurable(metadata(docs::type_unit = "seconds"))] diff --git a/lib/vector-core/src/tls/settings.rs b/lib/vector-core/src/tls/settings.rs index 0fdfea544d309..27fdbefdd0f06 100644 --- a/lib/vector-core/src/tls/settings.rs +++ b/lib/vector-core/src/tls/settings.rs @@ -119,6 +119,7 @@ pub struct TlsConfig { /// The certificate must be in the DER or PEM (X.509) format. Additionally, the certificate can be provided as an inline string in PEM format. #[serde(alias = "ca_path")] #[configurable(metadata(docs::examples = "/path/to/certificate_authority.crt"))] + #[configurable(metadata(docs::human_name = "CA File Path"))] pub ca_file: Option, /// Absolute path to a certificate file used to identify this server. @@ -129,6 +130,7 @@ pub struct TlsConfig { /// If this is set, and is not a PKCS#12 archive, `key_file` must also be set. #[serde(alias = "crt_path")] #[configurable(metadata(docs::examples = "/path/to/host_certificate.crt"))] + #[configurable(metadata(docs::human_name = "Certificate File Path"))] pub crt_file: Option, /// Absolute path to a private key file used to identify this server. @@ -136,6 +138,7 @@ pub struct TlsConfig { /// The key must be in DER or PEM (PKCS#8) format. Additionally, the key can be provided as an inline string in PEM format. #[serde(alias = "key_path")] #[configurable(metadata(docs::examples = "/path/to/host_certificate.key"))] + #[configurable(metadata(docs::human_name = "Key File Path"))] pub key_file: Option, /// Passphrase used to unlock the encrypted key file. @@ -143,6 +146,7 @@ pub struct TlsConfig { /// This has no effect unless `key_file` is set. #[configurable(metadata(docs::examples = "${KEY_PASS_ENV_VAR}"))] #[configurable(metadata(docs::examples = "PassWord1"))] + #[configurable(metadata(docs::human_name = "Key File Password"))] pub key_pass: Option, } diff --git a/lib/vector-core/src/transform/mod.rs b/lib/vector-core/src/transform/mod.rs index a4450caed36d8..a54db2a2543a6 100644 --- a/lib/vector-core/src/transform/mod.rs +++ b/lib/vector-core/src/transform/mod.rs @@ -1,11 +1,16 @@ +use std::sync::Arc; use std::{collections::HashMap, error, pin::Pin}; use futures::{Stream, StreamExt}; use vector_common::internal_event::{ self, register, CountByteSize, EventsSent, InternalEventHandle as _, Registered, DEFAULT_OUTPUT, }; +use vector_common::json_size::JsonSize; use vector_common::EventDataEq; +use crate::config::{ComponentKey, OutputId}; +use crate::event::EventMutRef; +use crate::schema::Definition; use crate::usage_metrics::OutputUsageTracker; use crate::{ config, @@ -13,7 +18,7 @@ use crate::{ into_event_stream, EstimatedJsonEncodedSizeOf, Event, EventArray, EventContainer, EventRef, }, fanout::{self, Fanout}, - ByteSizeOf, + schema, ByteSizeOf, }; #[cfg(feature = "lua")] @@ -178,6 +183,8 @@ impl SyncTransform for Box { struct TransformOutput { fanout: Fanout, events_sent: Registered, + log_schema_definitions: HashMap>, + output_id: Arc, } pub struct TransformOutputs { @@ -189,6 +196,7 @@ pub struct TransformOutputs { impl TransformOutputs { pub fn new( outputs_in: Vec, + component_key: &ComponentKey, ) -> (Self, HashMap, fanout::ControlChannel>) { let outputs_spec = outputs_in.clone(); let mut primary_output = None; @@ -197,6 +205,13 @@ impl TransformOutputs { for output in outputs_in { let (fanout, control) = Fanout::new(); + + let log_schema_definitions = output + .log_schema_definitions + .into_iter() + .map(|(id, definition)| (id, Arc::new(definition))) + .collect(); + match output.port { None => { primary_output = Some(TransformOutput { @@ -204,6 +219,11 @@ impl TransformOutputs { events_sent: register(EventsSent::from(internal_event::Output(Some( DEFAULT_OUTPUT.into(), )))), + log_schema_definitions, + output_id: Arc::new(OutputId { + component: component_key.clone(), + port: None, + }), }); controls.insert(None, control); } @@ -215,6 +235,11 @@ impl TransformOutputs { events_sent: register(EventsSent::from(internal_event::Output(Some( name.clone().into(), )))), + log_schema_definitions, + output_id: Arc::new(OutputId { + component: component_key.clone(), + port: Some(name.clone()), + }), }, ); controls.insert(Some(name.clone()), control); @@ -247,46 +272,74 @@ impl TransformOutputs { usage_tracker: &dyn OutputUsageTracker, ) -> Result<(), Box> { if let Some(primary) = self.primary_output.as_mut() { - let count = buf.primary_buffer.as_ref().map_or(0, OutputBuffer::len); - let byte_size = buf.primary_buffer.as_ref().map_or( - 0, - EstimatedJsonEncodedSizeOf::estimated_json_encoded_size_of, - ); + let send_buf = buf.primary_buffer.as_mut().expect("mismatched outputs"); + Self::send_single_buffer(send_buf, primary).await?; let usage_profile = buf.primary_buffer.as_ref().map_or(Default::default(), |o| { o.0.iter() .map(|a| usage_tracker.get_size_and_profile(a)) .sum() }); - - buf.primary_buffer - .as_mut() - .expect("mismatched outputs") - .send(&mut primary.fanout) - .await?; - primary.events_sent.emit(CountByteSize(count, byte_size)); - // We only want to track the primary transform output. // Named outputs are for stuff like route/swimlanes that we don't want to track atm. // We only want to capture the traffic of the remap transform after the node representing // the source (kafka / route transform) usage_tracker.track_output(usage_profile); } - for (key, buf) in &mut buf.named_buffers { - let count = buf.len(); - let byte_size = buf.estimated_json_encoded_size_of(); let output = self.named_outputs.get_mut(key).expect("unknown output"); - buf.send(&mut output.fanout).await?; - output.events_sent.emit(CountByteSize(count, byte_size)); + Self::send_single_buffer(buf, output).await?; // TODO: track named outputs } + Ok(()) + } + async fn send_single_buffer( + buf: &mut OutputBuffer, + output: &mut TransformOutput, + ) -> Result<(), Box> { + for event in buf.events_mut() { + update_runtime_schema_definition( + event, + &output.output_id, + &output.log_schema_definitions, + ); + } + let count = buf.len(); + let byte_size = buf.estimated_json_encoded_size_of(); + buf.send(&mut output.fanout).await?; + output.events_sent.emit(CountByteSize(count, byte_size)); Ok(()) } } +#[allow(clippy::implicit_hasher)] +/// `event`: The event that will be updated +/// `output_id`: The `output_id` that the current even is being sent to (will be used as the new `parent_id`) +/// `log_schema_definitions`: A mapping of parent `OutputId` to definitions, that will be used to lookup the new runtime definition of the event +pub fn update_runtime_schema_definition( + mut event: EventMutRef, + output_id: &Arc, + log_schema_definitions: &HashMap>, +) { + if let EventMutRef::Log(log) = &mut event { + if let Some(parent_component_id) = log.metadata().upstream_id() { + if let Some(definition) = log_schema_definitions.get(parent_component_id) { + log.metadata_mut().set_schema_definition(definition); + } + } else { + // there is no parent defined. That means this event originated from a component that + // isn't able to track the source, such as `reduce` or `lua`. In these cases, all of the + // schema definitions _must_ be the same, so the first one is picked + if let Some(definition) = log_schema_definitions.values().next() { + log.metadata_mut().set_schema_definition(definition); + } + } + } + event.metadata_mut().set_upstream_id(Arc::clone(output_id)); +} + #[derive(Debug, Clone)] pub struct TransformOutputsBuf { primary_buffer: Option, @@ -315,34 +368,17 @@ impl TransformOutputsBuf { } } - pub fn push(&mut self, event: Event) { - self.primary_buffer - .as_mut() - .expect("no default output") - .push(event); - } - - pub fn push_named(&mut self, name: &str, event: Event) { - self.named_buffers - .get_mut(name) - .expect("unknown output") - .push(event); - } - - pub fn append(&mut self, slice: &mut Vec) { - self.primary_buffer - .as_mut() - .expect("no default output") - .append(slice); - } - - pub fn append_named(&mut self, name: &str, slice: &mut Vec) { - self.named_buffers - .get_mut(name) - .expect("unknown output") - .append(slice); + /// Adds a new event to the transform output buffer + pub fn push(&mut self, name: Option<&str>, event: Event) { + match name { + Some(name) => self.named_buffers.get_mut(name), + None => self.primary_buffer.as_mut(), + } + .expect("unknown output") + .push(event); } + #[cfg(any(feature = "test", test))] pub fn drain(&mut self) -> impl Iterator + '_ { self.primary_buffer .as_mut() @@ -350,6 +386,7 @@ impl TransformOutputsBuf { .drain() } + #[cfg(any(feature = "test", test))] pub fn drain_named(&mut self, name: &str) -> impl Iterator + '_ { self.named_buffers .get_mut(name) @@ -357,33 +394,15 @@ impl TransformOutputsBuf { .drain() } - pub fn extend(&mut self, events: impl Iterator) { - self.primary_buffer - .as_mut() - .expect("no default output") - .extend(events); - } - + #[cfg(any(feature = "test", test))] pub fn take_primary(&mut self) -> OutputBuffer { std::mem::take(self.primary_buffer.as_mut().expect("no default output")) } + #[cfg(any(feature = "test", test))] pub fn take_all_named(&mut self) -> HashMap { std::mem::take(&mut self.named_buffers) } - - pub fn len(&self) -> usize { - self.primary_buffer.as_ref().map_or(0, OutputBuffer::len) - + self - .named_buffers - .values() - .map(OutputBuffer::len) - .sum::() - } - - pub fn is_empty(&self) -> bool { - self.len() == 0 - } } impl ByteSizeOf for TransformOutputsBuf { @@ -455,6 +474,7 @@ impl OutputBuffer { }) } + #[cfg(any(feature = "test", test))] pub fn drain(&mut self) -> impl Iterator + '_ { self.0.drain(..).flat_map(EventArray::into_events) } @@ -474,12 +494,12 @@ impl OutputBuffer { self.0.iter().flat_map(EventArray::iter_events) } - pub fn into_events(self) -> impl Iterator { - self.0.into_iter().flat_map(EventArray::into_events) + fn events_mut(&mut self) -> impl Iterator { + self.0.iter_mut().flat_map(EventArray::iter_events_mut) } - pub fn take_events(&mut self) -> Vec { - std::mem::take(&mut self.0) + pub fn into_events(self) -> impl Iterator { + self.0.into_iter().flat_map(EventArray::into_events) } } @@ -504,7 +524,7 @@ impl EventDataEq> for OutputBuffer { } impl EstimatedJsonEncodedSizeOf for OutputBuffer { - fn estimated_json_encoded_size_of(&self) -> usize { + fn estimated_json_encoded_size_of(&self) -> JsonSize { self.0 .iter() .map(EstimatedJsonEncodedSizeOf::estimated_json_encoded_size_of) diff --git a/lib/vector-lookup/Cargo.toml b/lib/vector-lookup/Cargo.toml index 41627bc19269a..a159f31561d0f 100644 --- a/lib/vector-lookup/Cargo.toml +++ b/lib/vector-lookup/Cargo.toml @@ -7,7 +7,7 @@ publish = false license = "MPL-2.0" [dependencies] -serde = { version = "1.0.163", default-features = false, features = ["derive", "alloc"] } +serde = { version = "1.0.164", default-features = false, features = ["derive", "alloc"] } vector-config = { path = "../vector-config" } vector-config-macros = { path = "../vector-config-macros" } -vrl = { git = "ssh://git@github.com/answerbook/vrl.git", rev = "v0.6.0", default-features = false } +vrl.workspace = true diff --git a/lib/vector-lookup/src/lib.rs b/lib/vector-lookup/src/lib.rs index 7058cfc6e899f..e32e73b5f12a3 100644 --- a/lib/vector-lookup/src/lib.rs +++ b/lib/vector-lookup/src/lib.rs @@ -1,7 +1,7 @@ #![deny(warnings)] -pub use vrl::path::{ - event_path, metadata_path, owned_value_path, path, OwnedTargetPath, OwnedValuePath, PathPrefix, -}; +pub use vrl::path::{OwnedTargetPath, OwnedValuePath, PathPrefix}; + +pub use vrl::{event_path, metadata_path, owned_value_path, path}; pub mod lookup_v2; diff --git a/lib/vector-vrl/cli/Cargo.toml b/lib/vector-vrl/cli/Cargo.toml index 94b3863ee93d2..106e430355c56 100644 --- a/lib/vector-vrl/cli/Cargo.toml +++ b/lib/vector-vrl/cli/Cargo.toml @@ -9,4 +9,4 @@ license = "MPL-2.0" [dependencies] clap = { version = "4.1.14", features = ["derive"] } vector-vrl-functions = { path = "../functions" } -vrl = { git = "ssh://git@github.com/answerbook/vrl.git", rev = "v0.6.0", default-features = false, features = ["stdlib", "cli"] } +vrl.workspace = true diff --git a/lib/vector-vrl/functions/Cargo.toml b/lib/vector-vrl/functions/Cargo.toml index 9202c7a916911..432cac075cdb5 100644 --- a/lib/vector-vrl/functions/Cargo.toml +++ b/lib/vector-vrl/functions/Cargo.toml @@ -7,4 +7,4 @@ publish = false license = "MPL-2.0" [dependencies] -vrl = { git = "ssh://git@github.com/answerbook/vrl.git", rev = "v0.6.0", default-features = false, features = ["compiler", "path", "diagnostic"] } +vrl.workspace = true diff --git a/lib/vector-vrl/functions/src/set_semantic_meaning.rs b/lib/vector-vrl/functions/src/set_semantic_meaning.rs index 1f9d09f122526..26e61c1671f59 100644 --- a/lib/vector-vrl/functions/src/set_semantic_meaning.rs +++ b/lib/vector-vrl/functions/src/set_semantic_meaning.rs @@ -1,14 +1,14 @@ use std::collections::BTreeMap; use std::ops::{Deref, DerefMut}; use vrl::diagnostic::Label; -use vrl::path::OwnedValuePath; +use vrl::path::{OwnedTargetPath, PathPrefix}; use vrl::prelude::*; #[derive(Debug, Default, Clone)] -pub struct MeaningList(pub BTreeMap); +pub struct MeaningList(pub BTreeMap); impl Deref for MeaningList { - type Target = BTreeMap; + type Target = BTreeMap; fn deref(&self) -> &Self::Target { &self.0 @@ -68,36 +68,41 @@ impl Function for SetSemanticMeaning { .expect("meaning not bytes") .into_owned(); - // Semantic meaning can only be assigned to external fields. - if !query.is_external() { + let path = if let Some(path) = query.external_path() { + path + } else { + // Semantic meaning can only be assigned to external fields. let mut labels = vec![Label::primary( - "the target of this semantic meaning is non-external", + "this path must point to an event or metadata", span, )]; if let Some(variable) = query.as_variable() { labels.push(Label::context( - format!("maybe you meant \".{}\"?", variable.ident()), + format!( + "maybe you meant \".{}\" or \"%{}\"?", + variable.ident(), + variable.ident() + ), span, )); } let error = ExpressionError::Error { - message: "semantic meaning defined for non-external target".to_owned(), + message: "semantic meaning is not valid for local variables".to_owned(), labels, notes: vec![], }; return Err(Box::new(error) as Box); - } - - let path = query.path().clone(); + }; - let exists = state - .external - .target_kind() - .at_path(&path) - .contains_any_defined(); + let exists = match path.prefix { + PathPrefix::Event => state.external.target_kind(), + PathPrefix::Metadata => state.external.metadata_kind(), + } + .at_path(&path.path) + .contains_any_defined(); // Reject assigning meaning to non-existing field. if !exists { diff --git a/lib/vector-vrl/tests/Cargo.toml b/lib/vector-vrl/tests/Cargo.toml index 47c438a90891c..b67b8afa63a4a 100644 --- a/lib/vector-vrl/tests/Cargo.toml +++ b/lib/vector-vrl/tests/Cargo.toml @@ -7,7 +7,7 @@ publish = false [dependencies] enrichment = { path = "../../enrichment" } -vrl = { git = "ssh://git@github.com/answerbook/vrl.git", rev = "v0.6.0", features = ["test_framework"] } +vrl.workspace = true vector-vrl-functions = { path = "../../vector-vrl/functions" } ansi_term = "0.12" diff --git a/lib/vector-vrl/tests/src/main.rs b/lib/vector-vrl/tests/src/main.rs index 489975864fc12..037b8ebedd614 100644 --- a/lib/vector-vrl/tests/src/main.rs +++ b/lib/vector-vrl/tests/src/main.rs @@ -1,11 +1,5 @@ #![allow(clippy::print_stdout)] // tests #![allow(clippy::print_stderr)] // tests -#![allow( - clippy::default_constructed_unit_structs, - clippy::explicit_iter_loop, - clippy::needless_pub_self, - clippy::useless_conversion -)] mod docs; mod test_enrichment; diff --git a/lib/vector-vrl/web-playground/Cargo.toml b/lib/vector-vrl/web-playground/Cargo.toml index 8415e5eacd733..b951a29661226 100644 --- a/lib/vector-vrl/web-playground/Cargo.toml +++ b/lib/vector-vrl/web-playground/Cargo.toml @@ -10,9 +10,7 @@ crate-type = ["cdylib"] [dependencies] wasm-bindgen = "0.2" -vrl = { package = "vrl", git = "ssh://git@github.com/answerbook/vrl.git", rev = "v0.6.0", default-features = false} -value = { package = "value", git = "ssh://git@github.com/answerbook/vrl.git", rev = "v0.6.0", default-features = false} -stdlib = { package = "vrl-stdlib", git = "ssh://git@github.com/answerbook/vrl.git", rev = "v0.6.0"} +vrl.workspace = true serde = { version = "1.0", features = ["derive"] } serde-wasm-bindgen = "0.5" gloo-utils = { version = "0.1", features = ["serde"] } diff --git a/license-tool.toml b/license-tool.toml index dcefb5284d8cb..64e86fe9837b0 100644 --- a/license-tool.toml +++ b/license-tool.toml @@ -4,15 +4,6 @@ "openssl-macros" = { origin = "https://github.com/sfackler/rust-openssl" } "serde_nanos" = { origin = "https://github.com/caspervonb/serde_nanos" } -# These can go away once Vector starts using a release of the VRL crate with a -# library field set up. -"vrl" = { license = "MPL-2.0" } -"vrl-compiler" = { license = "MPL-2.0" } -"vrl-core" = { license = "MPL-2.0" } -"vrl-diagnostic" = { license = "MPL-2.0" } -"vrl-parser" = { license = "MPL-2.0" } -"vrl-tests" = { license = "MPL-2.0" } - # `ring` has a custom license that is mostly "ISC-style" but parts of it also fall under OpenSSL licensing. "ring-0.16.20" = { license = "ISC AND Custom" } diff --git a/netlify.toml b/netlify.toml index dad74bbc15e0a..b7c6deba7a70b 100644 --- a/netlify.toml +++ b/netlify.toml @@ -52,12 +52,6 @@ to = "https://github.com/vectordotdev/vector/discussions" status = 302 force = true -[[redirects]] -from = "https://roadmap.vector.dev/*" -to = "https://airtable.com/shriTZW5LeOE4cIyJ" -status = 302 -force = true - [[redirects]] from = "https://sh.vector.dev/*" to = "http://sh.vector.dev.s3-website-us-east-1.amazonaws.com/:splat" diff --git a/rfcs/2023-05-03-data-volume-metrics.md b/rfcs/2023-05-03-data-volume-metrics.md new file mode 100644 index 0000000000000..368fe874cd420 --- /dev/null +++ b/rfcs/2023-05-03-data-volume-metrics.md @@ -0,0 +1,240 @@ +# RFC 2023-05-02 - Data Volume Insights metrics + +Vector needs to be able to emit accurate metrics that can be usefully queried +to give users insights into the volume of data moving through the system. + +## Scope + +### In scope + +- All volume event metrics within Vector need to emit the estimated JSON size of the + event. With a consistent method for determining the size it will be easier to accurately + compare data in vs data out. + - `component_received_event_bytes_total` + - `component_sent_event_bytes_total` + - `component_received_event_total` + - `component_sent_event_total` +- The metrics sent by each sink needs to be tagged with the source id of the + event so the route an event takes through Vector can be queried. +- Each event needs to be labelled with a `service`. This is a new concept + within Vector and represents the application that generated the log, + metric or trace. +- The service tag and source tag in the metrics needs to be opt in so customers + that don't need the increased cardinality are unaffected. + +### Out of scope + +- Separate metrics, `component_sent_bytes_total` and `component_received_bytes_total` + that indicate network bytes sent by Vector are not considered here. + +## Pain + +Currently it is difficult to accurately gauge the volume of data that is moving +through Vector. It is difficult to query where data being sent out has come +from. + +## Proposal + +### User Experience + +Global config options will be provided to indicate that the `service` tag and the +`source` tag should be sent. For example: + +```yaml +telemetry: + tags: + service: true + source_id: true +``` + +This will cause Vector to emit a metric like (note the last two tags): + +```prometheus +vector_component_sent_event_bytes_total{component_id="out",component_kind="sink",component_name="out",component_type="console" + ,host="machine",service="potato",source_id="stdin"} 123 +``` + +The default will be to not emit these tags. + +### Implementation + +#### Metric tags + +**service** - to attach the service, we need to add a new meaning to Vector - + `service`. Any sources that receive data that could potentially + be considered a service will need to indicate which field means + `service`. This work has largely already been done with the + LogNamespacing work, so it will be trivial to add this new field. + Not all sources will be able to specify a specific field to + indicate the `service`. In time it will be possible for this to + be accomplished through `VRL`. + +**source_id** - A new field will be added to the [Event metadata][event_metadata] - + `Arc` that will indicate the source of the event. + `OutputId` will need to be serializable so it can be stored in + the disk buffer. Since this field is just an identifier, it can + still be used even if the source no longer exists when the event + is consumed by a sink. + +We will need to do an audit of all components to ensure the +bytes emitted for the `component_received_event_bytes_total` and +`component_sent_event_bytes_total` metrics are the estimated JSON size of the +event. + +These tags will be given the name that was configured in [User Experience] +(#user-experience). + +Transforms `reduce` and `aggregate` combine multiple events together. In this +case the `source` and `service` of the first event will be taken. + +If there is no `source` a source of `-` will be emitted. The only way this can +happen is if the event was created by the `lua` transform. + +If there is no `service` available, a service of `-` will be emitted. + +Emitting a `-` rather than not emitting anything at all makes it clear that +there was no value rather than it just having been forgotten and ensures it +is clear that the metric represents no `service` or `source` rather than the +aggregate value across all services. + +The [Component Spec][component_spec] will need updating to indicate these tags +will need including. + +**Performance** - There is going to be a performance hit when emitting these metrics. +Currently for each batch a simple event is emitted containing the count and size +of the entire batch. With this change it will be necessary to scan the entire +batch to obtain the count of source, service combinations of events before emitting +the counts. This will involve additional allocations to maintain the counts as well +as the O(1) scan. + +#### `component_received_event_bytes_total` + +This metric is emitted by the framework [here][source_sender], so it looks like +the only change needed is to add the service tag. + +#### `component_sent_event_bytes_total` + +For stream based sinks this will typically be the byte value returned by +`DriverResponse::events_sent`. + +Despite being in the [Component Spec][component_spec], not all sinks currently +conform to this. + +As an example, from a cursory glance over a couple of sinks: + +The Amqp sink currently emits this value as the length of the binary +data that is sent. By the time the data has reached the code where the +`component_sent_event_bytes_total` event is emitted, that event has been +encoded and the actual estimated JSON size has been lost. The sink will need +to be updated so that when the event is encoded, the encoded event together +with the pre-encoded JSON bytesize will be sent to the service where the event +is emitted. + +The Kafka sink also currently sends the binary size, but it looks like the +estimated JSON bytesize is easily accessible at the point of emitting, so would +not need too much of a change. + +To ensure that the correct metric is sent in a type-safe manner, we will wrap +the estimated JSON size in a newtype: + +```rust +pub struct JsonSize(usize); +``` + +The `EventsSent` metric will only accept this type. + +### Registered metrics + +It is currently not possible to have dynamic tags with preregistered metrics. + +Preregistering these metrics are essential to ensure that they don't expire. + +The current mechanism to expire metrics is to check if a handle to the given +metric is being held. If it isn't, and nothing has updated that metric in +the last cycle - the metric is dropped. If a metric is dropped, the next time +that event is emitted with those tags, the count starts at zero again. + +We will need to introduce a registered event caching layer that will register +and cache new events keyed on the tags that are sent to it. + +Currently a registered metrics is stored in a `Registered`. + +We will need a new struct that can wrap this that will be generic over a tuple of +the tags for each event and the event - eg. `Cached<(String, String), EventSent>`. +This struct will maintain a BTreeMap of tags -> `Registered`. Since this will +need to be shared across threads, the cache will need to be stored in an `RwLock`. + +In pseudo rust: + +```rust +struct Cached { + cache: Arc>>, + register: Fn(Tags) -> Registered, +} + +impl Cached { + fn emit(&mut self, tags: Tags, value: Event) -> { + if Some(event) = self.cache.get(tags) { + event.emit(value); + } else { + let event = self.register(tags); + event.emit(value); + self.cache.insert(tags, event); + } + } +} +``` + +## Rationale + +The ability to visualize data flowing through Vector will allow users to ascertain +the effectiveness of the current use of Vector. This will enable users to +optimise their configurations to make the best use of Vector's features. + +## Drawbacks + +The additional tags being added to the metrics will increase the cardinality of +those metrics if they are enabled. + +## Prior Art + + +## Alternatives + +We could use an alternative metric instead of estimated JSON size. + +- *Network bytes* This provides a more accurate picture of the actual data being received + and sent by Vector, but will regularly produce different sizes for an incoming event + to an outgoing event. +- *In memory size* The size of the event as held in memory. This may be more accurate in + determining the amount of memory Vector will be utilizing at any time, will often be + less accurate compared to the data being sent and received which is often JSON. + +## Outstanding Questions + +## Plan Of Attack + +Incremental steps to execute this change. These will be converted to issues after the RFC is approved: + +- [ ] Add the `source` field to the Event metadata to indicate the source the event has come from. +- [ ] Update the Volume event metrics to take a `JsonSize` value. Use the compiler to ensure all metrics + emitted use this. The `EstimatedJsonEncodedSizeOf` trait will be updated return a `JsonSize`. +- [ ] Add the Service meaning. Update any sources that potentially create a service to point the meaning + to the relevant field. +- [ ] Introduce an event caching layer that caches registered events based on the tags sent to it. +- [ ] Update the emitted events to accept the new tags - taking the `telemetry` configuration options + into account. +- [ ] There is going to be a hit on performance with these changes. Add benchmarking to help us understand + how much the impact will be. + +## Future Improvements + +- Logs emitted by Vector should also be tagged with `source_id` and `service`. +- This rfc proposes storing the source and service as strings. This incurs a cost since scanning each + event to get the counts of events by source and service will involve multiple string comparisons. A + future optimization could be to hash the combination of these values at the source into a single + integer. + +[component_spec]: https://github.com/vectordotdev/vector/blob/master/docs/specs/component.md#componenteventssent +[source_sender]: https://github.com/vectordotdev/vector/blob/master/src/source_sender/mod.rs#L265-L268 +[event_metadata]: https://github.com/vectordotdev/vector/blob/master/lib/vector-core/src/event/metadata.rs#L20-L38 diff --git a/rust-toolchain.toml b/rust-toolchain.toml index 748be2713f5b3..008def46a7a20 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,3 +1,3 @@ [toolchain] -channel = "1.72.0" +channel = "1.70.0" profile = "default" diff --git a/scripts/Gemfile b/scripts/Gemfile index 3547dd6311eca..8fb1de071f728 100644 --- a/scripts/Gemfile +++ b/scripts/Gemfile @@ -1,4 +1,4 @@ -ruby '~> 2.7.0' +ruby '~> 3.1.0' # !!! # Please try not to add more dependencies here. diff --git a/scripts/Gemfile.lock b/scripts/Gemfile.lock index bac6a4ecd3493..051a61c1e83da 100644 --- a/scripts/Gemfile.lock +++ b/scripts/Gemfile.lock @@ -30,7 +30,7 @@ DEPENDENCIES toml-rb (~> 2.0) RUBY VERSION - ruby 2.7.1p83 + ruby 3.1.4p223 BUNDLED WITH - 2.1.4 + 2.4.14 diff --git a/scripts/cross/bootstrap-ubuntu.sh b/scripts/cross/bootstrap-ubuntu.sh index 1b4889604b06b..cf053aa365b20 100755 --- a/scripts/cross/bootstrap-ubuntu.sh +++ b/scripts/cross/bootstrap-ubuntu.sh @@ -1,6 +1,8 @@ #!/bin/sh set -o errexit +echo 'Acquire::Retries "5";' > /etc/apt/apt.conf.d/80-retries + apt-get update apt-get install -y \ apt-transport-https \ diff --git a/scripts/environment/bootstrap-macos-10.sh b/scripts/environment/bootstrap-macos-10.sh index 6db8102f683bc..fb105c2756b81 100755 --- a/scripts/environment/bootstrap-macos-10.sh +++ b/scripts/environment/bootstrap-macos-10.sh @@ -1,6 +1,9 @@ #! /usr/bin/env bash set -e -o verbose +# https://github.com/Homebrew/homebrew-cask/issues/150323 +unset HOMEBREW_NO_INSTALL_FROM_API + brew update brew install ruby@2.7 coreutils cue-lang/tap/cue protobuf diff --git a/scripts/environment/prepare.sh b/scripts/environment/prepare.sh index d00924206114c..73d51c951b1b4 100755 --- a/scripts/environment/prepare.sh +++ b/scripts/environment/prepare.sh @@ -21,10 +21,6 @@ if ! rust-license-tool --help >& /dev/null ; then cargo install --git https://github.com/DataDog/rust-license-tool fi -cd scripts -bundle install -cd .. - # Currently fixing this to version 0.30 since version 0.31 has introduced # a change that means it only works with versions of node > 10. # https://github.com/igorshubovych/markdownlint-cli/issues/258 diff --git a/scripts/integration/amqp/test.yaml b/scripts/integration/amqp/test.yaml index 5f13537f896c0..94c080a18559d 100644 --- a/scripts/integration/amqp/test.yaml +++ b/scripts/integration/amqp/test.yaml @@ -5,3 +5,13 @@ test_filter: '::amqp::' matrix: version: ['3.8'] + +# changes to these files/paths will invoke the integration test in CI +# expressions are evaluated using https://github.com/micromatch/picomatch +paths: +- "src/amqp.rs" +- "src/internal_events/amqp.rs" +- "src/sinks/amqp/**" +- "src/sources/amqp.rs" +- "src/sources/util/**" +- "src/sinks/util/**" diff --git a/scripts/integration/appsignal/test.yaml b/scripts/integration/appsignal/test.yaml index 0f0970f75e4ac..d110306916df0 100644 --- a/scripts/integration/appsignal/test.yaml +++ b/scripts/integration/appsignal/test.yaml @@ -9,3 +9,9 @@ runner: matrix: version: [latest] + +# changes to these files/paths will invoke the integration test in CI +# expressions are evaluated using https://github.com/micromatch/picomatch +paths: +- "src/sinks/appsignal/**" +- "src/sinks/util/**" diff --git a/scripts/integration/aws/test.yaml b/scripts/integration/aws/test.yaml index 424e6d90c85e9..6e554fca22c1a 100644 --- a/scripts/integration/aws/test.yaml +++ b/scripts/integration/aws/test.yaml @@ -17,3 +17,15 @@ env: matrix: version: [latest] + +# changes to these files/paths will invoke the integration test in CI +# expressions are evaluated using https://github.com/micromatch/picomatch +paths: +- "src/aws_**" +- "src/internal_events/aws_**" +- "src/sources/aws_**" +- "src/sources/util/**" +- "src/sinks/aws_**" +- "src/sinks/util/**" +- "src/transforms/aws_**" +- "scripts/integration/aws/**" diff --git a/scripts/integration/axiom/test.yaml b/scripts/integration/axiom/test.yaml index 59bd599e4a92e..1e8c3e1d8eec5 100644 --- a/scripts/integration/axiom/test.yaml +++ b/scripts/integration/axiom/test.yaml @@ -12,3 +12,10 @@ runner: matrix: postgres: [13-alpine] + +# changes to these files/paths will invoke the integration test in CI +# expressions are evaluated using https://github.com/micromatch/picomatch +paths: +- "src/sinks/axiom.rs" +- "src/sinks/util/**" +- "scripts/integration/axiom/**" diff --git a/scripts/integration/azure/test.yaml b/scripts/integration/azure/test.yaml index f871b052747ae..fe9226e2beeed 100644 --- a/scripts/integration/azure/test.yaml +++ b/scripts/integration/azure/test.yaml @@ -10,3 +10,10 @@ env: matrix: version: [3.14.0] + +# changes to these files/paths will invoke the integration test in CI +# expressions are evaluated using https://github.com/micromatch/picomatch +paths: +- "src/sinks/azure_**" +- "src/sinks/util/**" +- "scripts/integration/azure/**" diff --git a/scripts/integration/clickhouse/compose.yaml b/scripts/integration/clickhouse/compose.yaml index fe11611e9265d..62f8a90a543c2 100644 --- a/scripts/integration/clickhouse/compose.yaml +++ b/scripts/integration/clickhouse/compose.yaml @@ -2,4 +2,4 @@ version: '3' services: clickhouse: - image: docker.io/yandex/clickhouse-server:${CONFIG_VERSION} + image: docker.io/clickhouse/clickhouse-server:${CONFIG_VERSION} diff --git a/scripts/integration/clickhouse/test.yaml b/scripts/integration/clickhouse/test.yaml index 7f59a79e07196..7b106b914d4dd 100644 --- a/scripts/integration/clickhouse/test.yaml +++ b/scripts/integration/clickhouse/test.yaml @@ -7,4 +7,11 @@ env: CLICKHOUSE_ADDRESS: http://clickhouse:8123 matrix: - version: ['19'] + version: ['23'] + +# changes to these files/paths will invoke the integration test in CI +# expressions are evaluated using https://github.com/micromatch/picomatch +paths: +- "src/sinks/clickhouse/**" +- "src/sinks/util/**" +- "scripts/integration/clickhouse/**" diff --git a/scripts/integration/databend/test.yaml b/scripts/integration/databend/test.yaml index 9ce69f72ba690..f84a979cb4b11 100644 --- a/scripts/integration/databend/test.yaml +++ b/scripts/integration/databend/test.yaml @@ -11,3 +11,10 @@ runner: matrix: version: ['latest'] + +# changes to these files/paths will invoke the integration test in CI +# expressions are evaluated using https://github.com/micromatch/picomatch +paths: +- "src/sinks/databend/**" +- "src/sinks/util/**" +- "scripts/integration/databend/**" diff --git a/scripts/integration/datadog-agent/test.yaml b/scripts/integration/datadog-agent/test.yaml index cbf55cd6ee8ca..6c4b399fdfb3e 100644 --- a/scripts/integration/datadog-agent/test.yaml +++ b/scripts/integration/datadog-agent/test.yaml @@ -12,3 +12,11 @@ env: matrix: version: ['7'] + +# changes to these files/paths will invoke the integration test in CI +# expressions are evaluated using https://github.com/micromatch/picomatch +paths: +- "src/common/datadog.rs" +- "src/internal_events/datadog_*" +- "src/sources/datadog_agent/**" +- "scripts/integration/datadog-agent/**" diff --git a/scripts/integration/datadog-logs/test.yaml b/scripts/integration/datadog-logs/test.yaml index 87dbbecae66cf..30a99f8a87ae7 100644 --- a/scripts/integration/datadog-logs/test.yaml +++ b/scripts/integration/datadog-logs/test.yaml @@ -9,3 +9,12 @@ runner: matrix: version: [latest] + +# changes to these files/paths will invoke the integration test in CI +# expressions are evaluated using https://github.com/micromatch/picomatch +paths: +- "src/common/datadog.rs" +- "src/internal_events/datadog_*" +- "src/sinks/datadog/logs/**" +- "src/sinks/util/**" +- "scripts/integration/datadog-logs/**" diff --git a/scripts/integration/datadog-metrics/test.yaml b/scripts/integration/datadog-metrics/test.yaml index cfdf63fab0783..237008a0d2551 100644 --- a/scripts/integration/datadog-metrics/test.yaml +++ b/scripts/integration/datadog-metrics/test.yaml @@ -9,3 +9,12 @@ runner: matrix: version: [latest] + +# changes to these files/paths will invoke the integration test in CI +# expressions are evaluated using https://github.com/micromatch/picomatch +paths: +- "src/common/datadog.rs" +- "src/internal_events/datadog_*" +- "src/sinks/datadog/metrics/**" +- "src/sinks/util/**" +- "scripts/integration/datadog-metrics/**" diff --git a/scripts/integration/datadog-traces/test.yaml b/scripts/integration/datadog-traces/test.yaml index 9a62b19cbf832..31c4c0f97ef11 100644 --- a/scripts/integration/datadog-traces/test.yaml +++ b/scripts/integration/datadog-traces/test.yaml @@ -13,3 +13,12 @@ env: matrix: version: [latest] + +# changes to these files/paths will invoke the integration test in CI +# expressions are evaluated using https://github.com/micromatch/picomatch +paths: +- "src/common/datadog.rs" +- "src/internal_events/datadog_*" +- "src/sinks/datadog/**" +- "src/sinks/util/**" +- "scripts/integration/datadog-traces/**" diff --git a/scripts/integration/dnstap/test.yaml b/scripts/integration/dnstap/test.yaml index 6475a242cf476..dab2f93e5e2ff 100644 --- a/scripts/integration/dnstap/test.yaml +++ b/scripts/integration/dnstap/test.yaml @@ -12,3 +12,11 @@ runner: matrix: version: ['latest'] + +# changes to these files/paths will invoke the integration test in CI +# expressions are evaluated using https://github.com/micromatch/picomatch +paths: +- "src/internal_events/dnstap.rs" +- "src/sources/dnstap/**" +- "src/sources/util/**" +- "scripts/integration/dnstap/**" diff --git a/scripts/integration/docker-logs/test.yaml b/scripts/integration/docker-logs/test.yaml index 5ad677b4b1259..15a432f955c70 100644 --- a/scripts/integration/docker-logs/test.yaml +++ b/scripts/integration/docker-logs/test.yaml @@ -8,3 +8,12 @@ runner: matrix: default: ["default"] + +# changes to these files/paths will invoke the integration test in CI +# expressions are evaluated using https://github.com/micromatch/picomatch +paths: +- "src/docker.rs" +- "src/internal_events/docker_logs.rs" +- "src/sources/docker_logs/**" +- "src/sources/util/**" +- "scripts/integration/docker-logs/**" diff --git a/scripts/integration/elasticsearch/test.yaml b/scripts/integration/elasticsearch/test.yaml index 1ef3eb532c7be..80d00cfad73f7 100644 --- a/scripts/integration/elasticsearch/test.yaml +++ b/scripts/integration/elasticsearch/test.yaml @@ -12,3 +12,10 @@ env: matrix: version: [7.13.1] + +# changes to these files/paths will invoke the integration test in CI +# expressions are evaluated using https://github.com/micromatch/picomatch +paths: +- "src/sinks/elasticsearch/**" +- "src/sinks/util/**" +- "scripts/integration/elasticsearch/**" diff --git a/scripts/integration/eventstoredb/test.yaml b/scripts/integration/eventstoredb/test.yaml index f0dbb90b377e0..43370281b158f 100644 --- a/scripts/integration/eventstoredb/test.yaml +++ b/scripts/integration/eventstoredb/test.yaml @@ -5,3 +5,11 @@ test_filter: '::eventstoredb_metrics::' matrix: version: [latest] + +# changes to these files/paths will invoke the integration test in CI +# expressions are evaluated using https://github.com/micromatch/picomatch +paths: +- "src/internal_events/eventstoredb_metrics.rs" +- "src/sources/eventstoredb_metrics/**" +- "src/sources/util/**" +- "scripts/integration/eventstoredb/**" diff --git a/scripts/integration/fluent/test.yaml b/scripts/integration/fluent/test.yaml index 8457454baf7d4..6593f8379b216 100644 --- a/scripts/integration/fluent/test.yaml +++ b/scripts/integration/fluent/test.yaml @@ -10,3 +10,11 @@ runner: matrix: default: ["default"] + +# changes to these files/paths will invoke the integration test in CI +# expressions are evaluated using https://github.com/micromatch/picomatch +paths: +- "src/internal_events/fluent.rs" +- "src/sources/fluent/**" +- "src/sources/util/**" +- "scripts/integration/fluent/**" diff --git a/scripts/integration/gcp/test.yaml b/scripts/integration/gcp/test.yaml index c63a4de29b613..7e516b716fafb 100644 --- a/scripts/integration/gcp/test.yaml +++ b/scripts/integration/gcp/test.yaml @@ -8,3 +8,14 @@ env: matrix: version: [latest] + +# changes to these files/paths will invoke the integration test in CI +# expressions are evaluated using https://github.com/micromatch/picomatch +paths: +- "src/internal_events/gcp_pubsub.rs" +- "src/sources/gcp_pubsub.rs" +- "src/sources/util/**" +- "src/sinks/gcp/**" +- "src/sinks/util/**" +- "src/gcp.rs" +- "scripts/integration/gcp/**" diff --git a/scripts/integration/http-client/compose.yaml b/scripts/integration/http-client/compose.yaml index 69ad1c6ef3d94..a1723c93d8d1a 100644 --- a/scripts/integration/http-client/compose.yaml +++ b/scripts/integration/http-client/compose.yaml @@ -11,7 +11,7 @@ services: image: docker.io/sigoden/dufs:${CONFIG_VERSION} command: - -a - - /@user:pass + - "user:pass@/" - --auth-method - basic - /data diff --git a/scripts/integration/http-client/test.yaml b/scripts/integration/http-client/test.yaml index aa867b5b23e39..0ae2b49bf2c76 100644 --- a/scripts/integration/http-client/test.yaml +++ b/scripts/integration/http-client/test.yaml @@ -9,4 +9,11 @@ env: DUFS_HTTPS_ADDRESS: https://dufs-https:5000 matrix: - version: [latest] + version: ["v0.34.1"] + +# changes to these files/paths will invoke the integration test in CI +# expressions are evaluated using https://github.com/micromatch/picomatch +paths: +- "src/sources/http_client/**" +- "src/sources/util/**" +- "scripts/integration/http-client/**" diff --git a/scripts/integration/humio/test.yaml b/scripts/integration/humio/test.yaml index 95597eea7d8c3..344ab6a7fc5fc 100644 --- a/scripts/integration/humio/test.yaml +++ b/scripts/integration/humio/test.yaml @@ -9,3 +9,10 @@ runner: matrix: version: [1.13.1] + +# changes to these files/paths will invoke the integration test in CI +# expressions are evaluated using https://github.com/micromatch/picomatch +paths: +- "src/sinks/humio/**" +- "src/sinks/util/**" +- "scripts/integration/humio/**" diff --git a/scripts/integration/influxdb/test.yaml b/scripts/integration/influxdb/test.yaml index ef7bc7bcae349..3dad78af2e2e8 100644 --- a/scripts/integration/influxdb/test.yaml +++ b/scripts/integration/influxdb/test.yaml @@ -10,3 +10,11 @@ env: matrix: version: ['1.8'] + +# changes to these files/paths will invoke the integration test in CI +# expressions are evaluated using https://github.com/micromatch/picomatch +paths: +- "src/internal_events/influxdb.rs" +- "src/sinks/influxdb/**" +- "src/sinks/util/**" +- "scripts/integration/influxdb/**" diff --git a/scripts/integration/kafka/test.yaml b/scripts/integration/kafka/test.yaml index 39a2e2be58af1..a52131e6e5a1b 100644 --- a/scripts/integration/kafka/test.yaml +++ b/scripts/integration/kafka/test.yaml @@ -8,3 +8,14 @@ env: matrix: version: [latest] + +# changes to these files/paths will invoke the integration test in CI +# expressions are evaluated using https://github.com/micromatch/picomatch +paths: +- "src/internal_events/kafka.rs" +- "src/sinks/kafka/**" +- "src/sinks/util/**" +- "src/sources/kafka.rs" +- "src/sources/util/**" +- "src/kafka.rs" +- "scripts/integration/kafka/**" diff --git a/scripts/integration/logstash/test.yaml b/scripts/integration/logstash/test.yaml index 550daed1f7ce1..a73a040b6a996 100644 --- a/scripts/integration/logstash/test.yaml +++ b/scripts/integration/logstash/test.yaml @@ -9,3 +9,10 @@ env: matrix: version: [7.12.1] + +# changes to these files/paths will invoke the integration test in CI +# expressions are evaluated using https://github.com/micromatch/picomatch +paths: +- "src/sources/logstash.rs" +- "src/sources/util/**" +- "scripts/integration/logstash/**" diff --git a/scripts/integration/loki/test.yaml b/scripts/integration/loki/test.yaml index 156392182a509..60f762dd569b7 100644 --- a/scripts/integration/loki/test.yaml +++ b/scripts/integration/loki/test.yaml @@ -8,3 +8,11 @@ env: matrix: version: [2.4.1] + +# changes to these files/paths will invoke the integration test in CI +# expressions are evaluated using https://github.com/micromatch/picomatch +paths: +- "src/internal_events/loki.rs" +- "src/sinks/loki/**" +- "src/sinks/util/**" +- "scripts/integration/loki/**" diff --git a/scripts/integration/mongodb/test.yaml b/scripts/integration/mongodb/test.yaml index 3c2a40b264ecc..76ddefab16f46 100644 --- a/scripts/integration/mongodb/test.yaml +++ b/scripts/integration/mongodb/test.yaml @@ -9,3 +9,11 @@ env: matrix: version: [4.2.10] + +# changes to these files/paths will invoke the integration test in CI +# expressions are evaluated using https://github.com/micromatch/picomatch +paths: +- "src/internal_events/mongodb_metrics.rs" +- "src/sources/mongodb_metrics/**" +- "src/sources/util/**" +- "scripts/integration/mongodb/**" diff --git a/scripts/integration/nats/test.yaml b/scripts/integration/nats/test.yaml index 9a11fabbf14de..1615b5f244ff6 100644 --- a/scripts/integration/nats/test.yaml +++ b/scripts/integration/nats/test.yaml @@ -14,3 +14,14 @@ env: matrix: version: [latest] + +# changes to these files/paths will invoke the integration test in CI +# expressions are evaluated using https://github.com/micromatch/picomatch +paths: +- "src/internal_events/nats.rs" +- "src/sources/nats.rs" +- "src/sources/util/**" +- "src/sinks/nats.rs" +- "src/sinks/util/**" +- "src/nats.rs" +- "scripts/integration/nats/**" diff --git a/scripts/integration/nginx/test.yaml b/scripts/integration/nginx/test.yaml index 61519b9f4fff4..934873608d5ef 100644 --- a/scripts/integration/nginx/test.yaml +++ b/scripts/integration/nginx/test.yaml @@ -11,3 +11,11 @@ runner: matrix: version: [1.19.4] + +# changes to these files/paths will invoke the integration test in CI +# expressions are evaluated using https://github.com/micromatch/picomatch +paths: +- "src/internal_events/nginx_metrics.rs" +- "src/sources/nginx_metrics/**" +- "src/sources/util/**" +- "scripts/integration/nginx/**" diff --git a/scripts/integration/opentelemetry/test.yaml b/scripts/integration/opentelemetry/test.yaml index 140958c15a079..e586c444affac 100644 --- a/scripts/integration/opentelemetry/test.yaml +++ b/scripts/integration/opentelemetry/test.yaml @@ -10,3 +10,10 @@ runner: matrix: version: [0.56.0] + +# changes to these files/paths will invoke the integration test in CI +# expressions are evaluated using https://github.com/micromatch/picomatch +paths: +- "src/sources/opentelemetry/**" +- "src/sources/util/**" +- "scripts/integration/opentelemetry/**" diff --git a/scripts/integration/postgres/test.yaml b/scripts/integration/postgres/test.yaml index 37260c91c3847..046431913d4c6 100644 --- a/scripts/integration/postgres/test.yaml +++ b/scripts/integration/postgres/test.yaml @@ -14,3 +14,11 @@ runner: matrix: version: ['13.1'] + +# changes to these files/paths will invoke the integration test in CI +# expressions are evaluated using https://github.com/micromatch/picomatch +paths: +- "src/internal_events/postgresql_metrics.rs" +- "src/sources/postgresql_metrics.rs" +- "src/sources/util/**" +- "scripts/integration/postgres/**" diff --git a/scripts/integration/prometheus/test.yaml b/scripts/integration/prometheus/test.yaml index fb3a52d529a35..d2db2d9282b6b 100644 --- a/scripts/integration/prometheus/test.yaml +++ b/scripts/integration/prometheus/test.yaml @@ -9,3 +9,13 @@ env: matrix: prometheus: ['v2.33.4'] influxdb: ['1.8'] + +# changes to these files/paths will invoke the integration test in CI +# expressions are evaluated using https://github.com/micromatch/picomatch +paths: +- "src/internal_events/prometheus.rs" +- "src/sources/prometheus/**" +- "src/sources/util/**" +- "src/sinks/prometheus/**" +- "src/sinks/util/**" +- "scripts/integration/prometheus/**" diff --git a/scripts/integration/pulsar/test.yaml b/scripts/integration/pulsar/test.yaml index a63f7da772e4b..824f0e0f290d4 100644 --- a/scripts/integration/pulsar/test.yaml +++ b/scripts/integration/pulsar/test.yaml @@ -8,3 +8,11 @@ env: matrix: version: [latest] + +# changes to these files/paths will invoke the integration test in CI +# expressions are evaluated using https://github.com/micromatch/picomatch +paths: +- "src/internal_events/pulsar.rs" +- "src/sinks/pulsar/**" +- "src/sinks/util/**" +- "scripts/integration/pulsar/**" diff --git a/scripts/integration/redis/test.yaml b/scripts/integration/redis/test.yaml index 4456e4829082f..d2d0577e844ca 100644 --- a/scripts/integration/redis/test.yaml +++ b/scripts/integration/redis/test.yaml @@ -8,3 +8,13 @@ env: matrix: version: [6-alpine] + +# changes to these files/paths will invoke the integration test in CI +# expressions are evaluated using https://github.com/micromatch/picomatch +paths: +- "src/internal_events/redis.rs" +- "src/sources/redis/**" +- "src/sources/util/**" +- "src/sinks/redis.rs" +- "src/sinks/util/**" +- "scripts/integration/redis/**" diff --git a/scripts/integration/splunk/test.yaml b/scripts/integration/splunk/test.yaml index 4d45c8364ae00..85de787cbe1e6 100644 --- a/scripts/integration/splunk/test.yaml +++ b/scripts/integration/splunk/test.yaml @@ -9,3 +9,13 @@ env: matrix: version: ["8.2.4", "7.3"] + +# changes to these files/paths will invoke the integration test in CI +# expressions are evaluated using https://github.com/micromatch/picomatch +paths: +- "src/internal_events/splunk_hec.rs" +- "src/sources/splunk_hec/**" +- "src/sources/util/**" +- "src/sinks/splunk_hec/**" +- "src/sinks/util/**" +- "scripts/integration/splunk/**" diff --git a/scripts/integration/webhdfs/test.yaml b/scripts/integration/webhdfs/test.yaml index fe0c52cd47af7..fc6a3193f9139 100644 --- a/scripts/integration/webhdfs/test.yaml +++ b/scripts/integration/webhdfs/test.yaml @@ -8,3 +8,10 @@ env: matrix: hadoop: ['2.0.0-hadoop3.2.1-java8'] + +# changes to these files/paths will invoke the integration test in CI +# expressions are evaluated using https://github.com/micromatch/picomatch +paths: +- "src/sinks/webhdfs/**" +- "src/sinks/util/**" +- "scripts/integration/webhdfs/**" diff --git a/src/api/schema/components/mod.rs b/src/api/schema/components/mod.rs index 6f096034ad87a..20880f3a06698 100644 --- a/src/api/schema/components/mod.rs +++ b/src/api/schema/components/mod.rs @@ -11,7 +11,6 @@ use std::{ use async_graphql::{Enum, InputObject, Interface, Object, Subscription}; use once_cell::sync::Lazy; use tokio_stream::{wrappers::BroadcastStream, Stream, StreamExt}; -use vector_config::NamedComponent; use vector_core::internal_event::DEFAULT_OUTPUT; use crate::{ diff --git a/src/api/schema/metrics/events_in.rs b/src/api/schema/metrics/events_in.rs deleted file mode 100644 index b9550457d09e0..0000000000000 --- a/src/api/schema/metrics/events_in.rs +++ /dev/null @@ -1,42 +0,0 @@ -use async_graphql::Object; -use chrono::{DateTime, Utc}; - -use crate::event::{Metric, MetricValue}; - -pub struct EventsInTotal(Metric); - -impl EventsInTotal { - pub const fn new(m: Metric) -> Self { - Self(m) - } - - pub fn get_timestamp(&self) -> Option> { - self.0.timestamp() - } - - pub fn get_events_in_total(&self) -> f64 { - match self.0.value() { - MetricValue::Counter { value } => *value, - _ => 0.00, - } - } -} - -#[Object] -impl EventsInTotal { - /// Metric timestamp - pub async fn timestamp(&self) -> Option> { - self.get_timestamp() - } - - /// Total incoming events - pub async fn events_in_total(&self) -> f64 { - self.get_events_in_total() - } -} - -impl From for EventsInTotal { - fn from(m: Metric) -> Self { - Self(m) - } -} diff --git a/src/api/schema/metrics/events_out.rs b/src/api/schema/metrics/events_out.rs deleted file mode 100644 index de565d56bf56f..0000000000000 --- a/src/api/schema/metrics/events_out.rs +++ /dev/null @@ -1,42 +0,0 @@ -use async_graphql::Object; -use chrono::{DateTime, Utc}; - -use crate::event::{Metric, MetricValue}; - -pub struct EventsOutTotal(Metric); - -impl EventsOutTotal { - pub const fn new(m: Metric) -> Self { - Self(m) - } - - pub fn get_timestamp(&self) -> Option> { - self.0.timestamp() - } - - pub fn get_events_out_total(&self) -> f64 { - match self.0.value() { - MetricValue::Counter { value } => *value, - _ => 0.00, - } - } -} - -#[Object] -impl EventsOutTotal { - /// Metric timestamp - pub async fn timestamp(&self) -> Option> { - self.get_timestamp() - } - - /// Total outgoing events - pub async fn events_out_total(&self) -> f64 { - self.get_events_out_total() - } -} - -impl From for EventsOutTotal { - fn from(m: Metric) -> Self { - Self(m) - } -} diff --git a/src/api/schema/metrics/filter.rs b/src/api/schema/metrics/filter.rs index 0c61a5794fffe..bd094d64ce1c5 100644 --- a/src/api/schema/metrics/filter.rs +++ b/src/api/schema/metrics/filter.rs @@ -5,8 +5,8 @@ use tokio::time::Duration; use tokio_stream::{Stream, StreamExt}; use super::{ - filter_output_metric, EventsInTotal, EventsOutTotal, OutputThroughput, ProcessedBytesTotal, - ProcessedEventsTotal, ReceivedEventsTotal, SentEventsTotal, + filter_output_metric, OutputThroughput, ReceivedBytesTotal, ReceivedEventsTotal, + SentBytesTotal, SentEventsTotal, }; use crate::{ config::ComponentKey, @@ -46,31 +46,20 @@ fn sum_metrics_owned>(metrics: I) -> Option { - fn processed_events_total(&self) -> Option; - fn processed_bytes_total(&self) -> Option; + fn received_bytes_total(&self) -> Option; fn received_events_total(&self) -> Option; - fn events_in_total(&self) -> Option; - fn events_out_total(&self) -> Option; + fn sent_bytes_total(&self) -> Option; fn sent_events_total(&self) -> Option; } impl<'a> MetricsFilter<'a> for Vec { - fn processed_events_total(&self) -> Option { - let sum = sum_metrics(self.iter().filter(|m| m.name() == "processed_events_total"))?; - - Some(ProcessedEventsTotal::new(sum)) - } - - fn processed_bytes_total(&self) -> Option { - let sum = sum_metrics(self.iter().filter(|m| m.name() == "processed_bytes_total"))?; - - Some(ProcessedBytesTotal::new(sum)) - } - - fn events_in_total(&self) -> Option { - let sum = sum_metrics(self.iter().filter(|m| m.name() == "events_in_total"))?; + fn received_bytes_total(&self) -> Option { + let sum = sum_metrics( + self.iter() + .filter(|m| m.name() == "component_received_bytes_total"), + )?; - Some(EventsInTotal::new(sum)) + Some(ReceivedBytesTotal::new(sum)) } fn received_events_total(&self) -> Option { @@ -82,10 +71,13 @@ impl<'a> MetricsFilter<'a> for Vec { Some(ReceivedEventsTotal::new(sum)) } - fn events_out_total(&self) -> Option { - let sum = sum_metrics(self.iter().filter(|m| m.name() == "events_out_total"))?; + fn sent_bytes_total(&self) -> Option { + let sum = sum_metrics( + self.iter() + .filter(|m| m.name() == "component_sent_bytes_total"), + )?; - Some(EventsOutTotal::new(sum)) + Some(SentBytesTotal::new(sum)) } fn sent_events_total(&self) -> Option { @@ -99,24 +91,14 @@ impl<'a> MetricsFilter<'a> for Vec { } impl<'a> MetricsFilter<'a> for Vec<&'a Metric> { - fn processed_events_total(&self) -> Option { - let sum = sum_metrics( - self.iter() - .filter(|m| m.name() == "processed_events_total") - .copied(), - )?; - - Some(ProcessedEventsTotal::new(sum)) - } - - fn processed_bytes_total(&self) -> Option { + fn received_bytes_total(&self) -> Option { let sum = sum_metrics( self.iter() - .filter(|m| m.name() == "processed_bytes_total") + .filter(|m| m.name() == "component_received_bytes_total") .copied(), )?; - Some(ProcessedBytesTotal::new(sum)) + Some(ReceivedBytesTotal::new(sum)) } fn received_events_total(&self) -> Option { @@ -129,24 +111,14 @@ impl<'a> MetricsFilter<'a> for Vec<&'a Metric> { Some(ReceivedEventsTotal::new(sum)) } - fn events_in_total(&self) -> Option { - let sum = sum_metrics( - self.iter() - .filter(|m| m.name() == "events_in_total") - .copied(), - )?; - - Some(EventsInTotal::new(sum)) - } - - fn events_out_total(&self) -> Option { + fn sent_bytes_total(&self) -> Option { let sum = sum_metrics( self.iter() - .filter(|m| m.name() == "events_out_total") + .filter(|m| m.name() == "component_sent_bytes_total") .copied(), )?; - Some(EventsOutTotal::new(sum)) + Some(SentBytesTotal::new(sum)) } fn sent_events_total(&self) -> Option { @@ -202,7 +174,7 @@ pub fn by_component_key(component_key: &ComponentKey) -> Vec { type MetricFilterFn = dyn Fn(&Metric) -> bool + Send + Sync; /// Returns a stream of `Vec`, where `metric_name` matches the name of the metric -/// (e.g. "processed_events_total"), and the value is derived from `MetricValue::Counter`. Uses a +/// (e.g. "component_sent_events_total"), and the value is derived from `MetricValue::Counter`. Uses a /// local cache to match against the `component_id` of a metric, to return results only when /// the value of a current iteration is greater than the previous. This is useful for the client /// to be notified as metrics increase without returning 'empty' or identical results. @@ -230,7 +202,7 @@ pub fn component_counter_metrics( } /// Returns a stream of `Vec`, where `metric_name` matches the name of the metric -/// (e.g. "processed_events_total"), and the value is derived from `MetricValue::Gauge`. Uses a +/// (e.g. "component_sent_events_total"), and the value is derived from `MetricValue::Gauge`. Uses a /// local cache to match against the `component_id` of a metric, to return results only when /// the value of a current iteration is greater than the previous. This is useful for the client /// to be notified as metrics increase without returning 'empty' or identical results. diff --git a/src/api/schema/metrics/mod.rs b/src/api/schema/metrics/mod.rs index d883d560645e2..11b8abc51ddc5 100644 --- a/src/api/schema/metrics/mod.rs +++ b/src/api/schema/metrics/mod.rs @@ -1,12 +1,10 @@ mod allocated_bytes; mod errors; -mod events_in; -mod events_out; pub mod filter; mod output; -mod processed_bytes; -mod processed_events; +mod received_bytes; mod received_events; +mod sent_bytes; mod sent_events; mod sink; pub mod source; @@ -20,19 +18,15 @@ pub use allocated_bytes::{AllocatedBytes, ComponentAllocatedBytes}; use async_graphql::{Interface, Object, Subscription}; use chrono::{DateTime, Utc}; pub use errors::{ComponentErrorsTotal, ErrorsTotal}; -pub use events_in::EventsInTotal; -pub use events_out::EventsOutTotal; pub use filter::*; pub use output::*; -pub use processed_bytes::{ - ComponentProcessedBytesThroughput, ComponentProcessedBytesTotal, ProcessedBytesTotal, -}; -pub use processed_events::{ - ComponentProcessedEventsThroughput, ComponentProcessedEventsTotal, ProcessedEventsTotal, +pub use received_bytes::{ + ComponentReceivedBytesThroughput, ComponentReceivedBytesTotal, ReceivedBytesTotal, }; pub use received_events::{ ComponentReceivedEventsThroughput, ComponentReceivedEventsTotal, ReceivedEventsTotal, }; +pub use sent_bytes::{ComponentSentBytesThroughput, ComponentSentBytesTotal, SentBytesTotal}; pub use sent_events::{ComponentSentEventsThroughput, ComponentSentEventsTotal, SentEventsTotal}; pub use sink::{IntoSinkMetrics, SinkMetrics}; pub use source::{IntoSourceMetrics, SourceMetrics}; @@ -46,8 +40,6 @@ use crate::config::ComponentKey; #[graphql(field(name = "timestamp", type = "Option>"))] pub enum MetricType { Uptime(Uptime), - ProcessedEventsTotal(ProcessedEventsTotal), - ProcessedBytesTotal(ProcessedBytesTotal), } #[derive(Default)] @@ -78,57 +70,6 @@ impl MetricsSubscription { }) } - /// Event processing metrics. - async fn processed_events_total( - &self, - #[graphql(default = 1000, validator(minimum = 10, maximum = 60_000))] interval: i32, - ) -> impl Stream { - get_metrics(interval).filter_map(|m| match m.name() { - "processed_events_total" => Some(ProcessedEventsTotal::new(m)), - _ => None, - }) - } - - /// Event processing throughput sampled over the provided millisecond `interval`. - async fn processed_events_throughput( - &self, - #[graphql(default = 1000, validator(minimum = 10, maximum = 60_000))] interval: i32, - ) -> impl Stream { - counter_throughput(interval, &|m| m.name() == "processed_events_total") - .map(|(_, throughput)| throughput as i64) - } - - /// Component event processing throughput metrics over `interval`. - async fn component_processed_events_throughputs( - &self, - #[graphql(default = 1000, validator(minimum = 10, maximum = 60_000))] interval: i32, - ) -> impl Stream> { - component_counter_throughputs(interval, &|m| m.name() == "processed_events_total").map( - |m| { - m.into_iter() - .map(|(m, throughput)| { - ComponentProcessedEventsThroughput::new( - ComponentKey::from(m.tag_value("component_id").unwrap()), - throughput as i64, - ) - }) - .collect() - }, - ) - } - - /// Component event processing metrics over `interval`. - async fn component_processed_events_totals( - &self, - #[graphql(default = 1000, validator(minimum = 10, maximum = 60_000))] interval: i32, - ) -> impl Stream> { - component_counter_metrics(interval, &|m| m.name() == "processed_events_total").map(|m| { - m.into_iter() - .map(ComponentProcessedEventsTotal::new) - .collect() - }) - } - /// Total received events metrics #[graphql(deprecation = "Use component_received_events_totals instead")] async fn received_events_total( @@ -231,53 +172,64 @@ impl MetricsSubscription { }) } - /// Byte processing metrics. - async fn processed_bytes_total( + /// Component bytes received metrics over `interval`. + async fn component_received_bytes_totals( &self, #[graphql(default = 1000, validator(minimum = 10, maximum = 60_000))] interval: i32, - ) -> impl Stream { - get_metrics(interval).filter_map(|m| match m.name() { - "processed_bytes_total" => Some(ProcessedBytesTotal::new(m)), - _ => None, - }) + ) -> impl Stream> { + component_counter_metrics(interval, &|m| m.name() == "component_received_bytes_total").map( + |m| { + m.into_iter() + .map(ComponentReceivedBytesTotal::new) + .collect() + }, + ) } - /// Byte processing throughput sampled over a provided millisecond `interval`. - async fn processed_bytes_throughput( + /// Component bytes received throughput over `interval` + async fn component_received_bytes_throughputs( &self, #[graphql(default = 1000, validator(minimum = 10, maximum = 60_000))] interval: i32, - ) -> impl Stream { - counter_throughput(interval, &|m| m.name() == "processed_bytes_total") - .map(|(_, throughput)| throughput as i64) + ) -> impl Stream> { + component_counter_throughputs(interval, &|m| m.name() == "component_received_bytes_total") + .map(|m| { + m.into_iter() + .map(|(m, throughput)| { + ComponentReceivedBytesThroughput::new( + ComponentKey::from(m.tag_value("component_id").unwrap()), + throughput as i64, + ) + }) + .collect() + }) } - /// Component byte processing metrics over `interval`. - async fn component_processed_bytes_totals( + /// Component bytes sent metrics over `interval`. + async fn component_sent_bytes_totals( &self, #[graphql(default = 1000, validator(minimum = 10, maximum = 60_000))] interval: i32, - ) -> impl Stream> { - component_counter_metrics(interval, &|m| m.name() == "processed_bytes_total").map(|m| { - m.into_iter() - .map(ComponentProcessedBytesTotal::new) - .collect() - }) + ) -> impl Stream> { + component_counter_metrics(interval, &|m| m.name() == "component_sent_bytes_total") + .map(|m| m.into_iter().map(ComponentSentBytesTotal::new).collect()) } - /// Component byte processing throughput over `interval` - async fn component_processed_bytes_throughputs( + /// Component bytes sent throughput over `interval` + async fn component_sent_bytes_throughputs( &self, #[graphql(default = 1000, validator(minimum = 10, maximum = 60_000))] interval: i32, - ) -> impl Stream> { - component_counter_throughputs(interval, &|m| m.name() == "processed_bytes_total").map(|m| { - m.into_iter() - .map(|(m, throughput)| { - ComponentProcessedBytesThroughput::new( - ComponentKey::from(m.tag_value("component_id").unwrap()), - throughput as i64, - ) - }) - .collect() - }) + ) -> impl Stream> { + component_counter_throughputs(interval, &|m| m.name() == "component_sent_bytes_total").map( + |m| { + m.into_iter() + .map(|(m, throughput)| { + ComponentSentBytesThroughput::new( + ComponentKey::from(m.tag_value("component_id").unwrap()), + throughput as i64, + ) + }) + .collect() + }, + ) } /// Total error metrics. @@ -325,8 +277,6 @@ impl MetricsSubscription { ) -> impl Stream { get_metrics(interval).filter_map(|m| match m.name() { "uptime_seconds" => Some(MetricType::Uptime(m.into())), - "processed_events_total" => Some(MetricType::ProcessedEventsTotal(m.into())), - "processed_bytes_total" => Some(MetricType::ProcessedBytesTotal(m.into())), _ => None, }) } diff --git a/src/api/schema/metrics/processed_bytes.rs b/src/api/schema/metrics/received_bytes.rs similarity index 57% rename from src/api/schema/metrics/processed_bytes.rs rename to src/api/schema/metrics/received_bytes.rs index 9711272cb08e3..516631559cbae 100644 --- a/src/api/schema/metrics/processed_bytes.rs +++ b/src/api/schema/metrics/received_bytes.rs @@ -6,9 +6,9 @@ use crate::{ event::{Metric, MetricValue}, }; -pub struct ProcessedBytesTotal(Metric); +pub struct ReceivedBytesTotal(Metric); -impl ProcessedBytesTotal { +impl ReceivedBytesTotal { pub const fn new(m: Metric) -> Self { Self(m) } @@ -17,7 +17,7 @@ impl ProcessedBytesTotal { self.0.timestamp() } - pub fn get_processed_bytes_total(&self) -> f64 { + pub fn get_received_bytes_total(&self) -> f64 { match self.0.value() { MetricValue::Counter { value } => *value, _ => 0.00, @@ -26,32 +26,33 @@ impl ProcessedBytesTotal { } #[Object] -impl ProcessedBytesTotal { - /// Metric timestamp +impl ReceivedBytesTotal { + /// Metric timestamp. pub async fn timestamp(&self) -> Option> { self.get_timestamp() } - /// Total number of bytes processed - pub async fn processed_bytes_total(&self) -> f64 { - self.get_processed_bytes_total() + /// Total number of bytes received. + pub async fn received_bytes_total(&self) -> f64 { + self.get_received_bytes_total() } } -impl From for ProcessedBytesTotal { +impl From for ReceivedBytesTotal { fn from(m: Metric) -> Self { Self(m) } } -pub struct ComponentProcessedBytesTotal { +pub struct ComponentReceivedBytesTotal { component_key: ComponentKey, metric: Metric, } -impl ComponentProcessedBytesTotal { - /// Returns a new `ComponentProcessedBytesTotal` struct, which is a GraphQL type. The - /// component id is hoisted for clear field resolution in the resulting payload +impl ComponentReceivedBytesTotal { + /// Returns a new `ComponentReceivedBytesTotal`. + /// + /// Expects that the metric contains a tag for the component ID the metric is referenced to. pub fn new(metric: Metric) -> Self { let component_key = metric.tag_value("component_id").expect( "Returned a metric without a `component_id`, which shouldn't happen. Please report.", @@ -66,25 +67,25 @@ impl ComponentProcessedBytesTotal { } #[Object] -impl ComponentProcessedBytesTotal { - /// Component id +impl ComponentReceivedBytesTotal { + /// Component ID. async fn component_id(&self) -> &str { self.component_key.id() } - /// Bytes processed total metric - async fn metric(&self) -> ProcessedBytesTotal { - ProcessedBytesTotal::new(self.metric.clone()) + /// Metric for total bytes received. + async fn metric(&self) -> ReceivedBytesTotal { + ReceivedBytesTotal::new(self.metric.clone()) } } -pub struct ComponentProcessedBytesThroughput { +pub struct ComponentReceivedBytesThroughput { component_key: ComponentKey, throughput: i64, } -impl ComponentProcessedBytesThroughput { - /// Returns a new `ComponentProcessedBytesThroughput`, set to the provided id/throughput values +impl ComponentReceivedBytesThroughput { + /// Returns a new `ComponentReceivedBytesThroughput` for the given component. pub const fn new(component_key: ComponentKey, throughput: i64) -> Self { Self { component_key, @@ -94,13 +95,13 @@ impl ComponentProcessedBytesThroughput { } #[Object] -impl ComponentProcessedBytesThroughput { - /// Component id +impl ComponentReceivedBytesThroughput { + /// Component ID. async fn component_id(&self) -> &str { self.component_key.id() } - /// Bytes processed throughput + /// Throughput of bytes sent. async fn throughput(&self) -> i64 { self.throughput } diff --git a/src/api/schema/metrics/processed_events.rs b/src/api/schema/metrics/sent_bytes.rs similarity index 57% rename from src/api/schema/metrics/processed_events.rs rename to src/api/schema/metrics/sent_bytes.rs index 9b3a5c974fc3e..02fb833a39adc 100644 --- a/src/api/schema/metrics/processed_events.rs +++ b/src/api/schema/metrics/sent_bytes.rs @@ -6,9 +6,9 @@ use crate::{ event::{Metric, MetricValue}, }; -pub struct ProcessedEventsTotal(Metric); +pub struct SentBytesTotal(Metric); -impl ProcessedEventsTotal { +impl SentBytesTotal { pub const fn new(m: Metric) -> Self { Self(m) } @@ -17,7 +17,7 @@ impl ProcessedEventsTotal { self.0.timestamp() } - pub fn get_processed_events_total(&self) -> f64 { + pub fn get_sent_bytes_total(&self) -> f64 { match self.0.value() { MetricValue::Counter { value } => *value, _ => 0.00, @@ -26,32 +26,33 @@ impl ProcessedEventsTotal { } #[Object] -impl ProcessedEventsTotal { - /// Metric timestamp +impl SentBytesTotal { + /// Metric timestamp. pub async fn timestamp(&self) -> Option> { self.get_timestamp() } - /// Total number of events processed - pub async fn processed_events_total(&self) -> f64 { - self.get_processed_events_total() + /// Total number of bytes sent. + pub async fn sent_bytes_total(&self) -> f64 { + self.get_sent_bytes_total() } } -impl From for ProcessedEventsTotal { +impl From for SentBytesTotal { fn from(m: Metric) -> Self { Self(m) } } -pub struct ComponentProcessedEventsTotal { +pub struct ComponentSentBytesTotal { component_key: ComponentKey, metric: Metric, } -impl ComponentProcessedEventsTotal { - /// Returns a new `ComponentProcessedEventsTotal` struct, which is a GraphQL type. The - /// component id is hoisted for clear field resolution in the resulting payload +impl ComponentSentBytesTotal { + /// Returns a new `ComponentSentBytesTotal` for the given metric. + /// + /// Expects that the metric contains a tag for the component ID the metric is referenced to. pub fn new(metric: Metric) -> Self { let component_key = metric.tag_value("component_id").expect( "Returned a metric without a `component_id`, which shouldn't happen. Please report.", @@ -66,25 +67,25 @@ impl ComponentProcessedEventsTotal { } #[Object] -impl ComponentProcessedEventsTotal { - /// Component id +impl ComponentSentBytesTotal { + /// Component ID. async fn component_id(&self) -> &str { self.component_key.id() } - /// Events processed total metric - async fn metric(&self) -> ProcessedEventsTotal { - ProcessedEventsTotal::new(self.metric.clone()) + /// Metric for total bytes sent. + async fn metric(&self) -> SentBytesTotal { + SentBytesTotal::new(self.metric.clone()) } } -pub struct ComponentProcessedEventsThroughput { +pub struct ComponentSentBytesThroughput { component_key: ComponentKey, throughput: i64, } -impl ComponentProcessedEventsThroughput { - /// Returns a new `ComponentProcessedEventsThroughput`, set to the provided id/throughput values +impl ComponentSentBytesThroughput { + /// Returns a new `ComponentSentBytesThroughput` for the given component. pub const fn new(component_key: ComponentKey, throughput: i64) -> Self { Self { component_key, @@ -94,13 +95,13 @@ impl ComponentProcessedEventsThroughput { } #[Object] -impl ComponentProcessedEventsThroughput { - /// Component id +impl ComponentSentBytesThroughput { + /// Component ID. async fn component_id(&self) -> &str { self.component_key.id() } - /// Events processed throughput + /// Throughput of bytes sent. async fn throughput(&self) -> i64 { self.throughput } diff --git a/src/api/schema/metrics/sink/generic.rs b/src/api/schema/metrics/sink/generic.rs index e2a01b55d5961..e0f96e38fbd6d 100644 --- a/src/api/schema/metrics/sink/generic.rs +++ b/src/api/schema/metrics/sink/generic.rs @@ -16,32 +16,17 @@ impl GenericSinkMetrics { #[Object] impl GenericSinkMetrics { - /// Events processed for the current sink - pub async fn processed_events_total(&self) -> Option { - self.0.processed_events_total() - } - - /// Bytes processed for the current sink - pub async fn processed_bytes_total(&self) -> Option { - self.0.processed_bytes_total() - } - - /// Total incoming events for the current sink - pub async fn events_in_total(&self) -> Option { - self.0.events_in_total() - } - /// Total received events for the current sink pub async fn received_events_total(&self) -> Option { self.0.received_events_total() } - /// Total outgoing events for the current sink - pub async fn events_out_total(&self) -> Option { - self.0.events_out_total() + /// Total sent bytes for the current sink + pub async fn sent_bytes_total(&self) -> Option { + self.0.sent_bytes_total() } - /// Total outgoing events for the current sink + /// Total sent events for the current sink pub async fn sent_events_total(&self) -> Option { self.0.sent_events_total() } diff --git a/src/api/schema/metrics/sink/mod.rs b/src/api/schema/metrics/sink/mod.rs index c6c25c3f8a726..97f943a7dbef7 100644 --- a/src/api/schema/metrics/sink/mod.rs +++ b/src/api/schema/metrics/sink/mod.rs @@ -2,28 +2,14 @@ mod generic; use async_graphql::Interface; -use super::{ - EventsInTotal, EventsOutTotal, ProcessedBytesTotal, ProcessedEventsTotal, ReceivedEventsTotal, - SentEventsTotal, -}; +use super::{ReceivedEventsTotal, SentBytesTotal, SentEventsTotal}; use crate::event::Metric; #[derive(Debug, Clone, Interface)] #[graphql( - field(name = "processed_events_total", type = "Option"), - field(name = "processed_bytes_total", type = "Option"), field(name = "received_events_total", type = "Option"), - field( - name = "events_in_total", - type = "Option", - deprecation = "Use received_events_total instead" - ), - field(name = "sent_events_total", type = "Option"), - field( - name = "events_out_total", - type = "Option", - deprecation = "Use sent_events_total instead" - ) + field(name = "sent_bytes_total", type = "Option"), + field(name = "sent_events_total", type = "Option") )] pub enum SinkMetrics { GenericSinkMetrics(generic::GenericSinkMetrics), diff --git a/src/api/schema/metrics/source/file.rs b/src/api/schema/metrics/source/file.rs index 4108f68d7210e..daff4648cc967 100644 --- a/src/api/schema/metrics/source/file.rs +++ b/src/api/schema/metrics/source/file.rs @@ -37,19 +37,9 @@ impl<'a> FileSourceMetricFile<'a> { &*self.name } - /// Metric indicating events processed for the current file - async fn processed_events_total(&self) -> Option { - self.metrics.processed_events_total() - } - - /// Metric indicating bytes processed for the current file - async fn processed_bytes_total(&self) -> Option { - self.metrics.processed_bytes_total() - } - - /// Metric indicating incoming events for the current file - async fn events_in_total(&self) -> Option { - self.metrics.events_in_total() + /// Metric indicating bytes received for the current file + async fn received_bytes_total(&self) -> Option { + self.metrics.received_bytes_total() } /// Metric indicating received events for the current file @@ -57,11 +47,6 @@ impl<'a> FileSourceMetricFile<'a> { self.metrics.received_events_total() } - /// Metric indicating outgoing events for the current file - async fn events_out_total(&self) -> Option { - self.metrics.events_out_total() - } - /// Metric indicating outgoing events for the current file async fn sent_events_total(&self) -> Option { self.metrics.sent_events_total() @@ -93,38 +78,24 @@ impl FileSourceMetrics { #[derive(Enum, Copy, Clone, Eq, PartialEq)] pub enum FileSourceMetricFilesSortFieldName { Name, - ProcessedBytesTotal, - ProcessedEventsTotal, + ReceivedBytesTotal, ReceivedEventsTotal, - EventsInTotal, SentEventsTotal, - EventsOutTotal, } impl sort::SortableByField for FileSourceMetricFile<'_> { fn sort(&self, rhs: &Self, field: &FileSourceMetricFilesSortFieldName) -> Ordering { match field { FileSourceMetricFilesSortFieldName::Name => Ord::cmp(&self.name, &rhs.name), - FileSourceMetricFilesSortFieldName::ProcessedBytesTotal => Ord::cmp( - &self - .metrics - .processed_bytes_total() - .map(|m| m.get_processed_bytes_total() as i64) - .unwrap_or(0), - &rhs.metrics - .processed_bytes_total() - .map(|m| m.get_processed_bytes_total() as i64) - .unwrap_or(0), - ), - FileSourceMetricFilesSortFieldName::ProcessedEventsTotal => Ord::cmp( + FileSourceMetricFilesSortFieldName::ReceivedBytesTotal => Ord::cmp( &self .metrics - .processed_events_total() - .map(|m| m.get_processed_events_total() as i64) + .received_bytes_total() + .map(|m| m.get_received_bytes_total() as i64) .unwrap_or(0), &rhs.metrics - .processed_events_total() - .map(|m| m.get_processed_events_total() as i64) + .received_bytes_total() + .map(|m| m.get_received_bytes_total() as i64) .unwrap_or(0), ), FileSourceMetricFilesSortFieldName::ReceivedEventsTotal => Ord::cmp( @@ -138,17 +109,6 @@ impl sort::SortableByField for FileSourceMet .map(|m| m.get_received_events_total() as i64) .unwrap_or(0), ), - FileSourceMetricFilesSortFieldName::EventsInTotal => Ord::cmp( - &self - .metrics - .events_in_total() - .map(|m| m.get_events_in_total() as i64) - .unwrap_or(0), - &rhs.metrics - .events_in_total() - .map(|m| m.get_events_in_total() as i64) - .unwrap_or(0), - ), FileSourceMetricFilesSortFieldName::SentEventsTotal => Ord::cmp( &self .metrics @@ -160,17 +120,6 @@ impl sort::SortableByField for FileSourceMet .map(|m| m.get_sent_events_total() as i64) .unwrap_or(0), ), - FileSourceMetricFilesSortFieldName::EventsOutTotal => Ord::cmp( - &self - .metrics - .events_out_total() - .map(|m| m.get_events_out_total() as i64) - .unwrap_or(0), - &rhs.metrics - .events_out_total() - .map(|m| m.get_events_out_total() as i64) - .unwrap_or(0), - ), } } } @@ -223,19 +172,9 @@ impl FileSourceMetrics { .await } - /// Events processed for the current file source - pub async fn processed_events_total(&self) -> Option { - self.0.processed_events_total() - } - - /// Bytes processed for the current file source - pub async fn processed_bytes_total(&self) -> Option { - self.0.processed_bytes_total() - } - - /// Total incoming events for the current file source - pub async fn events_in_total(&self) -> Option { - self.0.events_in_total() + /// Total received bytes for the current file source + pub async fn received_bytes_total(&self) -> Option { + self.0.received_bytes_total() } /// Total received events for the current file source @@ -243,12 +182,7 @@ impl FileSourceMetrics { self.0.received_events_total() } - /// Total outgoing events for the current file source - pub async fn events_out_total(&self) -> Option { - self.0.events_out_total() - } - - /// Total outgoing events for the current file source + /// Total sent events for the current file source pub async fn sent_events_total(&self) -> Option { self.0.sent_events_total() } @@ -272,8 +206,8 @@ mod tests { fn new(name: &'static str, events_processed: f64, bytes_processed: f64) -> Self { Self { name, - events_metric: metric("processed_events_total", events_processed), - bytes_metric: metric("processed_bytes_total", bytes_processed), + events_metric: metric("component_sent_events_total", events_processed), + bytes_metric: metric("component_received_bytes_total", bytes_processed), } } @@ -343,7 +277,7 @@ mod tests { let mut files = vec![t1.get_metric(), t2.get_metric(), t3.get_metric()]; let fields = vec![SortField:: { - field: FileSourceMetricFilesSortFieldName::ProcessedEventsTotal, + field: FileSourceMetricFilesSortFieldName::SentEventsTotal, direction: sort::Direction::Asc, }]; @@ -362,7 +296,7 @@ mod tests { let mut files = vec![t1.get_metric(), t2.get_metric(), t3.get_metric()]; let fields = vec![SortField:: { - field: FileSourceMetricFilesSortFieldName::ProcessedEventsTotal, + field: FileSourceMetricFilesSortFieldName::SentEventsTotal, direction: sort::Direction::Desc, }]; @@ -374,14 +308,14 @@ mod tests { } #[test] - fn processed_bytes_asc() { + fn received_bytes_asc() { let t1 = FileSourceMetricTest::new("a", 1000.00, 100.00); let t2 = FileSourceMetricTest::new("b", 500.00, 300.00); let t3 = FileSourceMetricTest::new("c", 250.00, 200.00); let mut files = vec![t1.get_metric(), t2.get_metric(), t3.get_metric()]; let fields = vec![SortField:: { - field: FileSourceMetricFilesSortFieldName::ProcessedBytesTotal, + field: FileSourceMetricFilesSortFieldName::ReceivedBytesTotal, direction: sort::Direction::Asc, }]; @@ -393,14 +327,14 @@ mod tests { } #[test] - fn processed_bytes_desc() { + fn received_bytes_desc() { let t1 = FileSourceMetricTest::new("a", 1000.00, 100.00); let t2 = FileSourceMetricTest::new("b", 500.00, 300.00); let t3 = FileSourceMetricTest::new("c", 250.00, 200.00); let mut files = vec![t1.get_metric(), t2.get_metric(), t3.get_metric()]; let fields = vec![SortField:: { - field: FileSourceMetricFilesSortFieldName::ProcessedBytesTotal, + field: FileSourceMetricFilesSortFieldName::ReceivedBytesTotal, direction: sort::Direction::Desc, }]; diff --git a/src/api/schema/metrics/source/generic.rs b/src/api/schema/metrics/source/generic.rs index f37cf8b13a716..c66d50a841c91 100644 --- a/src/api/schema/metrics/source/generic.rs +++ b/src/api/schema/metrics/source/generic.rs @@ -16,19 +16,9 @@ impl GenericSourceMetrics { #[Object] impl GenericSourceMetrics { - /// Events processed for the current source - pub async fn processed_events_total(&self) -> Option { - self.0.processed_events_total() - } - - /// Bytes processed for the current source - pub async fn processed_bytes_total(&self) -> Option { - self.0.processed_bytes_total() - } - - /// Total incoming events for the current source - pub async fn events_in_total(&self) -> Option { - self.0.events_in_total() + /// Total received bytes for the current source + pub async fn received_bytes_total(&self) -> Option { + self.0.received_bytes_total() } /// Total received events for the current source @@ -36,12 +26,7 @@ impl GenericSourceMetrics { self.0.received_events_total() } - /// Total outgoing events for the current source - pub async fn events_out_total(&self) -> Option { - self.0.events_out_total() - } - - /// Total outgoing events for the current source + /// Total sent events for the current source pub async fn sent_events_total(&self) -> Option { self.0.sent_events_total() } diff --git a/src/api/schema/metrics/source/mod.rs b/src/api/schema/metrics/source/mod.rs index e47cb48cf1333..5463ac0ecf545 100644 --- a/src/api/schema/metrics/source/mod.rs +++ b/src/api/schema/metrics/source/mod.rs @@ -3,28 +3,14 @@ mod generic; use async_graphql::Interface; -use super::{ - EventsInTotal, EventsOutTotal, ProcessedBytesTotal, ProcessedEventsTotal, ReceivedEventsTotal, - SentEventsTotal, -}; +use super::{ReceivedBytesTotal, ReceivedEventsTotal, SentEventsTotal}; use crate::event::Metric; #[derive(Debug, Clone, Interface)] #[graphql( - field(name = "processed_events_total", type = "Option"), - field(name = "processed_bytes_total", type = "Option"), + field(name = "received_bytes_total", type = "Option"), field(name = "received_events_total", type = "Option"), - field( - name = "events_in_total", - type = "Option", - deprecation = "Use received_events_total instead" - ), - field(name = "sent_events_total", type = "Option"), - field( - name = "events_out_total", - type = "Option", - deprecation = "Use sent_events_total instead" - ) + field(name = "sent_events_total", type = "Option") )] pub enum SourceMetrics { GenericSourceMetrics(generic::GenericSourceMetrics), diff --git a/src/api/schema/metrics/transform/generic.rs b/src/api/schema/metrics/transform/generic.rs index e6c5af7c90a0a..0fd2569551473 100644 --- a/src/api/schema/metrics/transform/generic.rs +++ b/src/api/schema/metrics/transform/generic.rs @@ -16,32 +16,12 @@ impl GenericTransformMetrics { #[Object] impl GenericTransformMetrics { - /// Events processed for the current transform - pub async fn processed_events_total(&self) -> Option { - self.0.processed_events_total() - } - - /// Bytes processed for the current transform - pub async fn processed_bytes_total(&self) -> Option { - self.0.processed_bytes_total() - } - - /// Total incoming events for the current transform - pub async fn events_in_total(&self) -> Option { - self.0.events_in_total() - } - /// Total received events for the current transform pub async fn received_events_total(&self) -> Option { self.0.received_events_total() } - /// Total outgoing events for the current transform - pub async fn events_out_total(&self) -> Option { - self.0.events_out_total() - } - - /// Total outgoing events for the current transform + /// Total sent events for the current transform pub async fn sent_events_total(&self) -> Option { self.0.sent_events_total() } diff --git a/src/api/schema/metrics/transform/mod.rs b/src/api/schema/metrics/transform/mod.rs index b408ac490a783..bfdc8a5a08b30 100644 --- a/src/api/schema/metrics/transform/mod.rs +++ b/src/api/schema/metrics/transform/mod.rs @@ -2,28 +2,13 @@ mod generic; use async_graphql::Interface; -use super::{ - EventsInTotal, EventsOutTotal, ProcessedBytesTotal, ProcessedEventsTotal, ReceivedEventsTotal, - SentEventsTotal, -}; +use super::{ReceivedEventsTotal, SentEventsTotal}; use crate::event::Metric; #[derive(Debug, Clone, Interface)] #[graphql( - field(name = "processed_events_total", type = "Option"), - field(name = "processed_bytes_total", type = "Option"), field(name = "received_events_total", type = "Option"), - field( - name = "events_in_total", - type = "Option", - deprecation = "Use received_events_total instead" - ), - field(name = "sent_events_total", type = "Option"), - field( - name = "events_out_total", - type = "Option", - deprecation = "Use sent_events_total instead" - ) + field(name = "sent_events_total", type = "Option") )] pub enum TransformMetrics { GenericTransformMetrics(generic::GenericTransformMetrics), diff --git a/src/app.rs b/src/app.rs index d9eb0cac32e2b..917d95fdc41ee 100644 --- a/src/app.rs +++ b/src/app.rs @@ -1,15 +1,14 @@ #![allow(missing_docs)] +use std::{ + collections::HashMap, num::NonZeroUsize, path::PathBuf, process::ExitStatus, time::Duration, +}; + use exitcode::ExitCode; use futures::StreamExt; #[cfg(feature = "enterprise")] use futures_util::future::BoxFuture; use once_cell::race::OnceNonZeroUsize; -use std::{ - collections::HashMap, - num::NonZeroUsize, - path::PathBuf, - time::{Duration, Instant}, -}; +use std::time::Instant; use tokio::{ runtime::{self, Runtime}, sync::mpsc, @@ -41,6 +40,11 @@ use crate::{ trace, }; +#[cfg(unix)] +use std::os::unix::process::ExitStatusExt; +#[cfg(windows)] +use std::os::windows::process::ExitStatusExt; + pub static WORKER_THREADS: OnceNonZeroUsize = OnceNonZeroUsize::new(); use crate::internal_events::{VectorQuit, VectorStarted, VectorStopped}; @@ -72,10 +76,14 @@ impl ApplicationConfig { ) -> Result { let config_paths = opts.config_paths_with_formats(); + let graceful_shutdown_duration = (!opts.no_graceful_shutdown_limit) + .then(|| Duration::from_secs(u64::from(opts.graceful_shutdown_limit_secs))); + let config = load_configs( &config_paths, opts.watch_config, opts.require_healthy, + graceful_shutdown_duration, signal_handler, ) .await?; @@ -158,10 +166,10 @@ impl ApplicationConfig { } impl Application { - pub fn run() { + pub fn run() -> ExitStatus { let (runtime, app) = Self::prepare_start().unwrap_or_else(|code| std::process::exit(code)); - runtime.block_on(app.run()); + runtime.block_on(app.run()) } pub fn prepare_start() -> Result<(Runtime, StartedApplication), ExitCode> { @@ -304,7 +312,7 @@ pub struct StartedApplication { } impl StartedApplication { - pub async fn run(self) { + pub async fn run(self) -> ExitStatus { self.main().await.shutdown().await } @@ -450,7 +458,7 @@ pub struct FinishedApplication { } impl FinishedApplication { - pub async fn shutdown(self) { + pub async fn shutdown(self) -> ExitStatus { let FinishedApplication { signal, mut signal_rx, @@ -468,18 +476,42 @@ impl FinishedApplication { SignalTo::Shutdown => { emit!(VectorStopped); tokio::select! { - _ = topology_controller.stop() => (), // Graceful shutdown finished + _ = topology_controller.stop() => ExitStatus::from_raw({ + #[cfg(windows)] + { + exitcode::OK as u32 + } + #[cfg(unix)] + exitcode::OK + }), // Graceful shutdown finished _ = signal_rx.recv() => { // It is highly unlikely that this event will exit from topology. emit!(VectorQuit); // Dropping the shutdown future will immediately shut the server down + ExitStatus::from_raw({ + #[cfg(windows)] + { + exitcode::UNAVAILABLE as u32 + } + #[cfg(unix)] + exitcode::OK + }) } + } } SignalTo::Quit => { // It is highly unlikely that this event will exit from topology. emit!(VectorQuit); drop(topology_controller); + ExitStatus::from_raw({ + #[cfg(windows)] + { + exitcode::UNAVAILABLE as u32 + } + #[cfg(unix)] + exitcode::OK + }) } _ => unreachable!(), } @@ -511,7 +543,7 @@ fn get_log_levels(default: &str) -> String { format!("codec={}", level), format!("vrl={}", level), format!("file_source={}", level), - "tower_limit=trace".to_owned(), + format!("tower_limit={}", level), format!("rdkafka={}", level), format!("buffers={}", level), format!("lapin={}", level), @@ -523,6 +555,7 @@ fn get_log_levels(default: &str) -> String { pub fn build_runtime(threads: Option, thread_name: &str) -> Result { let mut rt_builder = runtime::Builder::new_multi_thread(); + rt_builder.max_blocking_threads(20_000); rt_builder.enable_all().thread_name(thread_name); if let Some(threads) = threads { @@ -547,6 +580,7 @@ pub async fn load_configs( config_paths: &[ConfigPath], watch_config: bool, require_healthy: Option, + graceful_shutdown_duration: Option, signal_handler: &mut SignalHandler, ) -> Result { let config_paths = config::process_paths(config_paths).ok_or(exitcode::CONFIG)?; @@ -566,17 +600,22 @@ pub async fn load_configs( paths = ?config_paths.iter().map(<&PathBuf>::from).collect::>() ); + // config::init_log_schema should be called before initializing sources. + #[cfg(not(feature = "enterprise-tests"))] + config::init_log_schema(&config_paths, true).map_err(handle_config_errors)?; + let mut config = config::load_from_paths_with_provider_and_secrets(&config_paths, signal_handler) .await .map_err(handle_config_errors)?; - #[cfg(not(feature = "enterprise-tests"))] - config::init_log_schema(config.global.log_schema.clone(), true); + + config::init_telemetry(config.global.telemetry.clone(), true); if !config.healthchecks.enabled { info!("Health checks are disabled."); } config.healthchecks.set_require_healthy(require_healthy); + config.graceful_shutdown_duration = graceful_shutdown_duration; Ok(config) } diff --git a/src/aws/auth.rs b/src/aws/auth.rs index ec73bc97984bf..f95256fcf69a1 100644 --- a/src/aws/auth.rs +++ b/src/aws/auth.rs @@ -9,7 +9,10 @@ use aws_config::{ }, sts::AssumeRoleProviderBuilder, }; -use aws_types::{credentials::SharedCredentialsProvider, region::Region, Credentials}; +use aws_credential_types::{ + cache::CredentialsCache, provider::SharedCredentialsProvider, Credentials, +}; +use aws_types::region::Region; use serde_with::serde_as; use vector_common::sensitive_string::SensitiveString; use vector_config::configurable_component; @@ -117,6 +120,7 @@ pub enum AwsAuthentication { /// Relevant when the default credentials chain or `assume_role` is used. #[configurable(metadata(docs::type_unit = "seconds"))] #[configurable(metadata(docs::examples = 30))] + #[configurable(metadata(docs::human_name = "Load Timeout"))] load_timeout_secs: Option, /// Configuration for authenticating with AWS through IMDS. @@ -141,6 +145,7 @@ pub enum AwsAuthentication { /// Relevant when the default credentials chain or `assume_role` is used. #[configurable(metadata(docs::type_unit = "seconds"))] #[configurable(metadata(docs::examples = 30))] + #[configurable(metadata(docs::human_name = "Load Timeout"))] load_timeout_secs: Option, /// Configuration for authenticating with AWS through IMDS. @@ -163,6 +168,28 @@ fn default_profile() -> String { } impl AwsAuthentication { + pub async fn credentials_cache(&self) -> crate::Result { + match self { + AwsAuthentication::Role { + load_timeout_secs, .. + } + | AwsAuthentication::Default { + load_timeout_secs, .. + } => { + let credentials_cache = CredentialsCache::lazy_builder() + .load_timeout( + load_timeout_secs + .map(Duration::from_secs) + .unwrap_or(DEFAULT_LOAD_TIMEOUT), + ) + .into_credentials_cache(); + + Ok(credentials_cache) + } + _ => Ok(CredentialsCache::lazy()), + } + } + pub async fn credentials_provider( &self, service_region: Region, @@ -205,28 +232,20 @@ impl AwsAuthentication { } AwsAuthentication::Role { assume_role, - load_timeout_secs, imds, region, + .. } => { let auth_region = region.clone().map(Region::new).unwrap_or(service_region); let provider = AssumeRoleProviderBuilder::new(assume_role) .region(auth_region.clone()) - .build( - default_credentials_provider(auth_region, *load_timeout_secs, *imds) - .await?, - ); + .build(default_credentials_provider(auth_region, *imds).await?); Ok(SharedCredentialsProvider::new(provider)) } - AwsAuthentication::Default { - load_timeout_secs, - imds, - region, - } => Ok(SharedCredentialsProvider::new( + AwsAuthentication::Default { imds, region, .. } => Ok(SharedCredentialsProvider::new( default_credentials_provider( region.clone().map(Region::new).unwrap_or(service_region), - *load_timeout_secs, *imds, ) .await?, @@ -247,7 +266,6 @@ impl AwsAuthentication { async fn default_credentials_provider( region: Region, - load_timeout_secs: Option, imds: ImdsAuthentication, ) -> crate::Result { let client = imds::Client::builder() @@ -257,16 +275,13 @@ async fn default_credentials_provider( .build() .await?; - let chain = DefaultCredentialsChain::builder() + let credentials_provider = DefaultCredentialsChain::builder() .region(region) .imds_client(client) - .load_timeout( - load_timeout_secs - .map(Duration::from_secs) - .unwrap_or(DEFAULT_LOAD_TIMEOUT), - ); + .build() + .await; - Ok(SharedCredentialsProvider::new(chain.build().await)) + Ok(SharedCredentialsProvider::new(credentials_provider)) } #[cfg(test)] diff --git a/src/aws/mod.rs b/src/aws/mod.rs index 0c3c53e53c1fd..ec0351f82fcc1 100644 --- a/src/aws/mod.rs +++ b/src/aws/mod.rs @@ -2,31 +2,33 @@ pub mod auth; pub mod region; +use std::error::Error; use std::future::Future; use std::pin::Pin; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; use std::task::{Context, Poll}; -use std::time::{Duration, SystemTime}; +use std::time::SystemTime; pub use auth::{AwsAuthentication, ImdsAuthentication}; use aws_config::meta::region::ProvideRegion; +use aws_credential_types::provider::{ProvideCredentials, SharedCredentialsProvider}; use aws_sigv4::http_request::{SignableRequest, SigningSettings}; use aws_sigv4::SigningParams; -use aws_smithy_async::rt::sleep::{AsyncSleep, Sleep}; +use aws_smithy_async::rt::sleep::TokioSleep; use aws_smithy_client::bounds::SmithyMiddleware; use aws_smithy_client::erase::{DynConnector, DynMiddleware}; use aws_smithy_client::{Builder, SdkError}; -use aws_smithy_http::callback::BodyCallback; -use aws_smithy_http::endpoint::Endpoint; -use aws_smithy_http::event_stream::BoxError; +use aws_smithy_http::body::{BoxBody, SdkBody}; use aws_smithy_http::operation::{Request, Response}; use aws_smithy_types::retry::RetryConfig; -use aws_types::credentials::{ProvideCredentials, SharedCredentialsProvider}; use aws_types::region::Region; use aws_types::SdkConfig; use bytes::Bytes; +use http::HeaderMap; +use http_body::Body; use once_cell::sync::OnceCell; +use pin_project::pin_project; use regex::RegexSet; pub use region::RegionOrEndpoint; use tower::{Layer, Service, ServiceBuilder}; @@ -42,41 +44,48 @@ pub fn is_retriable_error(error: &SdkError) -> bool { match error { SdkError::TimeoutError(_) | SdkError::DispatchFailure(_) => true, SdkError::ConstructionFailure(_) => false, - SdkError::ResponseError { err: _, raw } | SdkError::ServiceError { err: _, raw } => { - // This header is a direct indication that we should retry the request. Eventually it'd - // be nice to actually schedule the retry after the given delay, but for now we just - // check that it contains a positive value. - let retry_header = raw.http().headers().get("x-amz-retry-after").is_some(); - - // Certain 400-level responses will contain an error code indicating that the request - // should be retried. Since we don't retry 400-level responses by default, we'll look - // for these specifically before falling back to more general heuristics. Because AWS - // services use a mix of XML and JSON response bodies and the AWS SDK doesn't give us - // a parsed representation, we resort to a simple string match. - // - // S3: RequestTimeout - // SQS: RequestExpired, ThrottlingException - // ECS: RequestExpired, ThrottlingException - // Kinesis: RequestExpired, ThrottlingException - // Cloudwatch: RequestExpired, ThrottlingException - // - // Now just look for those when it's a client_error - let re = RETRIABLE_CODES.get_or_init(|| { - RegexSet::new(["RequestTimeout", "RequestExpired", "ThrottlingException"]) - .expect("invalid regex") - }); - - let status = raw.http().status(); - let response_body = String::from_utf8_lossy(raw.http().body().bytes().unwrap_or(&[])); - - retry_header - || status.is_server_error() - || status == http::StatusCode::TOO_MANY_REQUESTS - || (status.is_client_error() && re.is_match(response_body.as_ref())) + SdkError::ResponseError(err) => check_response(err.raw()), + SdkError::ServiceError(err) => check_response(err.raw()), + _ => { + warn!("AWS returned unknown error, retrying request."); + true } } } +fn check_response(res: &Response) -> bool { + // This header is a direct indication that we should retry the request. Eventually it'd + // be nice to actually schedule the retry after the given delay, but for now we just + // check that it contains a positive value. + let retry_header = res.http().headers().get("x-amz-retry-after").is_some(); + + // Certain 400-level responses will contain an error code indicating that the request + // should be retried. Since we don't retry 400-level responses by default, we'll look + // for these specifically before falling back to more general heuristics. Because AWS + // services use a mix of XML and JSON response bodies and the AWS SDK doesn't give us + // a parsed representation, we resort to a simple string match. + // + // S3: RequestTimeout + // SQS: RequestExpired, ThrottlingException + // ECS: RequestExpired, ThrottlingException + // Kinesis: RequestExpired, ThrottlingException + // Cloudwatch: RequestExpired, ThrottlingException + // + // Now just look for those when it's a client_error + let re = RETRIABLE_CODES.get_or_init(|| { + RegexSet::new(["RequestTimeout", "RequestExpired", "ThrottlingException"]) + .expect("invalid regex") + }); + + let status = res.http().status(); + let response_body = String::from_utf8_lossy(res.http().body().bytes().unwrap_or(&[])); + + retry_header + || status.is_server_error() + || status == http::StatusCode::TOO_MANY_REQUESTS + || (status.is_client_error() && re.is_match(response_body.as_ref())) +} + pub trait ClientBuilder { type Config; type Client; @@ -84,7 +93,7 @@ pub trait ClientBuilder { fn default_middleware() -> Self::DefaultMiddleware; - fn build(client: aws_smithy_client::Client, config: &aws_types::SdkConfig) -> Self::Client; + fn build(client: aws_smithy_client::Client, config: &SdkConfig) -> Self::Client; } pub async fn create_smithy_client( @@ -99,11 +108,11 @@ pub async fn create_smithy_client( let connector = if proxy.enabled { let proxy = build_proxy_connector(tls_settings, proxy)?; let hyper_client = aws_smithy_client::hyper_ext::Adapter::builder().build(proxy); - aws_smithy_client::erase::DynConnector::new(hyper_client) + DynConnector::new(hyper_client) } else { let tls_connector = build_tls_connector(tls_settings)?; let hyper_client = aws_smithy_client::hyper_ext::Adapter::builder().build(tls_connector); - aws_smithy_client::erase::DynConnector::new(hyper_client) + DynConnector::new(hyper_client) }; let middleware_builder = ServiceBuilder::new() @@ -114,7 +123,7 @@ pub async fn create_smithy_client( let mut client_builder = Builder::new() .connector(connector) .middleware(middleware) - .sleep_impl(Arc::new(TokioSleep)); + .sleep_impl(Arc::new(TokioSleep::new())); client_builder.set_retry_config(Some(retry_config.into())); Ok(client_builder.build()) @@ -135,7 +144,7 @@ pub async fn resolve_region(region: Option) -> crate::Result { pub async fn create_client( auth: &AwsAuthentication, region: Option, - endpoint: Option, + endpoint: Option, proxy: &ProxyConfig, tls_options: &Option, is_sink: bool, @@ -148,12 +157,13 @@ pub async fn create_client( // Build the configuration first. let mut config_builder = SdkConfig::builder() + .credentials_cache(auth.credentials_cache().await?) .credentials_provider(auth.credentials_provider(region.clone()).await?) .region(region.clone()) .retry_config(retry_config.clone()); if let Some(endpoint_override) = endpoint { - config_builder = config_builder.endpoint_resolver(endpoint_override); + config_builder = config_builder.endpoint_url(endpoint_override); } let config = config_builder.build(); @@ -190,15 +200,6 @@ pub async fn sign_request( Ok(()) } -#[derive(Debug)] -pub struct TokioSleep; - -impl AsyncSleep for TokioSleep { - fn sleep(&self, duration: Duration) -> Sleep { - Sleep::new(tokio::time::sleep(duration)) - } -} - /// Layer for capturing the payload size for AWS API client requests and emitting internal telemetry. #[derive(Clone)] struct CaptureRequestSize { @@ -239,23 +240,36 @@ where { type Response = S::Response; type Error = S::Error; - type Future = - Pin> + Send>>; + type Future = Pin> + Send>>; fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll> { self.inner.poll_ready(cx) } - fn call(&mut self, mut req: Request) -> Self::Future { + fn call(&mut self, req: Request) -> Self::Future { // Attach a body callback that will capture the bytes sent by interrogating the body chunks that get read as it // sends the request out over the wire. We'll read the shared atomic counter, which will contain the number of // bytes "read", aka the bytes it actually sent, if and only if we get back a successful response. - let maybe_bytes_sent = self.enabled.then(|| { - let (callback, shared_bytes_sent) = BodyCaptureCallback::new(); - req.http_mut().body_mut().with_callback(Box::new(callback)); + let (req, maybe_bytes_sent) = if self.enabled { + let shared_bytes_sent = Arc::new(AtomicUsize::new(0)); + let (request, properties) = req.into_parts(); + let (parts, body) = request.into_parts(); + + let body = { + let shared_bytes_sent = Arc::clone(&shared_bytes_sent); + + body.map_immutable(move |body| { + let body = MeasuredBody::new(body, Arc::clone(&shared_bytes_sent)); + SdkBody::from_dyn(BoxBody::new(body)) + }) + }; - shared_bytes_sent - }); + let req = Request::from_parts(http::Request::from_parts(parts, body), properties); + + (req, Some(shared_bytes_sent)) + } else { + (req, None) + }; let region = self.region.clone(); let fut = self.inner.call(req); @@ -284,69 +298,48 @@ where } } -struct BodyCaptureCallback { - bytes_sent: usize, +#[pin_project] +struct MeasuredBody { + #[pin] + inner: SdkBody, shared_bytes_sent: Arc, } -impl BodyCaptureCallback { - fn new() -> (Self, Arc) { - let shared_bytes_sent = Arc::new(AtomicUsize::new(0)); - - ( - Self { - bytes_sent: 0, - shared_bytes_sent: Arc::clone(&shared_bytes_sent), - }, +impl MeasuredBody { + fn new(body: SdkBody, shared_bytes_sent: Arc) -> Self { + Self { + inner: body, shared_bytes_sent, - ) + } } } -impl BodyCallback for BodyCaptureCallback { - fn update(&mut self, bytes: &[u8]) -> Result<(), BoxError> { - // This gets called every time a chunk is read from the request body, which includes both static chunks and - // streaming bodies. Just add the chunk's length to our running tally. - self.bytes_sent += bytes.len(); - Ok(()) - } - - fn trailers(&self) -> Result>, BoxError> { - Ok(None) - } - - fn make_new(&self) -> Box { - // We technically don't use retries within the AWS side of the API clients, but we have to satisfy this trait - // method, because `aws_smithy_http` uses the retry layer from `tower`, which clones the request regardless - // before it even executes the first attempt... so there's no reason not to make it technically correct. - Box::new(Self { - bytes_sent: 0, - shared_bytes_sent: Arc::clone(&self.shared_bytes_sent), - }) +impl Body for MeasuredBody { + type Data = Bytes; + type Error = Box; + + fn poll_data( + self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll>> { + let this = self.project(); + + match this.inner.poll_data(cx) { + Poll::Ready(Some(Ok(data))) => { + this.shared_bytes_sent + .fetch_add(data.len(), Ordering::Release); + Poll::Ready(Some(Ok(data))) + } + Poll::Ready(None) => Poll::Ready(None), + Poll::Ready(Some(Err(e))) => Poll::Ready(Some(Err(e))), + Poll::Pending => Poll::Pending, + } } -} -impl Drop for BodyCaptureCallback { - fn drop(&mut self) { - // This is where we actually emit. We specifically emit here, and not in `trailers`, because despite the - // documentation that `trailers` is called after all chunks of the body are successfully read, `hyper` won't - // continue polling a body if it knows it's gotten all the available bytes i.e. it doesn't necessarily drive it - // until `poll_data` returns `None`. This means the only consistent place to know that the body is "done" is - // when it's dropped. - // - // We update our shared atomic counter with the total bytes sent that we accumulated, and it will read the - // atomic if the response indicates that the request was successful. Since we know the body will go out-of-scope - // before a response can possibly be generated, we know the atomic will in turn be updated before it is read. - // - // This design also copes with the fact that, technically, `aws_smithy_client` supports retries and could clone - // this callback for each copy of the request... which it already does at least once per request since the retry - // middleware has to clone the request before trying it. As requests are retried sequentially, only after the - // previous attempt failed, we know that we'll end up in a "last write wins" scenario, so this is still sound. - // - // In the future, we may track every single byte sent in order to generate "raw bytes over the wire, regardless - // of status" metrics, but right now, this is purely "how many bytes have we sent as part of _successful_ - // sends?" - self.shared_bytes_sent - .store(self.bytes_sent, Ordering::Release); + fn poll_trailers( + self: Pin<&mut Self>, + _cx: &mut Context<'_>, + ) -> Poll, Self::Error>> { + Poll::Ready(Ok(None)) } } diff --git a/src/aws/region.rs b/src/aws/region.rs index fa8a45c6f38a0..b9db93a56bf5f 100644 --- a/src/aws/region.rs +++ b/src/aws/region.rs @@ -1,8 +1,4 @@ -use std::str::FromStr; - -use aws_smithy_http::endpoint::Endpoint; use aws_types::region::Region; -use http::Uri; use vector_config::configurable_component; /// Configuration of the region/endpoint to use when interacting with an AWS service. @@ -37,9 +33,8 @@ impl RegionOrEndpoint { } } - pub fn endpoint(&self) -> crate::Result> { - let uri = self.endpoint.as_deref().map(Uri::from_str).transpose()?; - Ok(uri.map(Endpoint::immutable)) + pub fn endpoint(&self) -> Option { + self.endpoint.clone() } pub fn region(&self) -> Option { diff --git a/src/cli.rs b/src/cli.rs index 37d4870c72095..fa90277649252 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -1,5 +1,5 @@ #![allow(missing_docs)] -use std::path::PathBuf; +use std::{num::NonZeroU64, path::PathBuf}; use clap::{ArgAction, CommandFactory, FromArgMatches, Parser}; @@ -163,6 +163,28 @@ pub struct RootOpts { #[arg(short, long, env = "VECTOR_USER_LOG_RATE_LIMIT", default_value = "10")] pub user_log_rate_limit: u64, + /// Set the duration in seconds to wait for graceful shutdown after SIGINT or SIGTERM are + /// received. After the duration has passed, Vector will force shutdown. To never force + /// shutdown, use `--no-graceful-shutdown-limit`. + #[arg( + long, + default_value = "60", + env = "VECTOR_GRACEFUL_SHUTDOWN_LIMIT_SECS", + group = "graceful-shutdown-limit" + )] + pub graceful_shutdown_limit_secs: NonZeroU64, + + /// Never time out while waiting for graceful shutdown after SIGINT or SIGTERM received. + /// This is useful when you would like for Vector to attempt to send data until terminated + /// by a SIGKILL. Overrides/cannot be set with `--graceful-shutdown-limit-secs`. + #[arg( + long, + default_value = "false", + env = "VECTOR_NO_GRACEFUL_SHUTDOWN_LIMIT", + group = "graceful-shutdown-limit" + )] + pub no_graceful_shutdown_limit: bool, + /// Set runtime allocation tracing #[cfg(feature = "allocation-tracing")] #[arg(long, env = "ALLOCATION_TRACING", default_value = "false")] diff --git a/src/codecs/decoding/decoder.rs b/src/codecs/decoding/decoder.rs index 6f212f450dc83..ecf19c17d715e 100644 --- a/src/codecs/decoding/decoder.rs +++ b/src/codecs/decoding/decoder.rs @@ -122,7 +122,7 @@ mod tests { let reader = StreamReader::new(stream); let decoder = Decoder::new( Framer::NewlineDelimited(NewlineDelimitedDecoder::new()), - Deserializer::Json(JsonDeserializer::new()), + Deserializer::Json(JsonDeserializer::default()), ); let mut stream = FramedRead::new(reader, decoder); diff --git a/src/common/mod.rs b/src/common/mod.rs index 50fa2c509eeb4..557ace9f3cdf2 100644 --- a/src/common/mod.rs +++ b/src/common/mod.rs @@ -1,6 +1,5 @@ #[cfg(any( feature = "sources-datadog_agent", - feature = "sinks-datadog_archives", feature = "sinks-datadog_events", feature = "sinks-datadog_logs", feature = "sinks-datadog_metrics", diff --git a/src/common/s3.rs b/src/common/s3.rs index 4376908f9c6ea..c8eb80572a412 100644 --- a/src/common/s3.rs +++ b/src/common/s3.rs @@ -12,6 +12,10 @@ impl ClientBuilder for S3ClientBuilder { } fn build(client: aws_smithy_client::Client, config: &aws_types::SdkConfig) -> Self::Client { - aws_sdk_s3::client::Client::with_config(client, config.into()) + let config = aws_sdk_s3::config::Builder::from(config) + .force_path_style(true) + .build(); + + aws_sdk_s3::client::Client::with_config(client, config) } } diff --git a/src/components/validation/mod.rs b/src/components/validation/mod.rs index a7687c33264be..7457cd6548ad9 100644 --- a/src/components/validation/mod.rs +++ b/src/components/validation/mod.rs @@ -6,7 +6,7 @@ mod test_case; pub mod util; mod validators; -use crate::{config::BoxedSource, config::BoxedTransform, sinks::Sinks}; +use crate::config::{BoxedSink, BoxedSource, BoxedTransform}; pub use self::resources::*; #[cfg(feature = "component-validation-runner")] @@ -46,7 +46,7 @@ pub enum ComponentConfiguration { Transform(BoxedTransform), /// A sink component. - Sink(Sinks), + Sink(BoxedSink), } /// Configuration for validating a component. @@ -88,7 +88,7 @@ impl ValidationConfiguration { } /// Creates a new `ValidationConfiguration` for a sink. - pub fn from_sink>( + pub fn from_sink>( component_name: &'static str, config: C, external_resource: Option, diff --git a/src/components/validation/resources/mod.rs b/src/components/validation/resources/mod.rs index 45b87158e1135..940bd90846ad7 100644 --- a/src/components/validation/resources/mod.rs +++ b/src/components/validation/resources/mod.rs @@ -2,7 +2,7 @@ mod event; mod http; use codecs::{ - decoding::{self, DeserializerConfig, NewlineDelimitedDecoderOptions}, + decoding::{self, DeserializerConfig}, encoding::{ self, Framer, FramingConfig, JsonSerializerConfig, SerializerConfig, TextSerializerConfig, }, @@ -141,16 +141,16 @@ fn deserializer_config_to_serializer(config: &DeserializerConfig) -> encoding::S // "bytes" can be a top-level field and we aren't implicitly decoding everything into the // `message` field... but it's close enough for now. DeserializerConfig::Bytes => SerializerConfig::Text(TextSerializerConfig::default()), - DeserializerConfig::Json => SerializerConfig::Json(JsonSerializerConfig::default()), + DeserializerConfig::Json { .. } => SerializerConfig::Json(JsonSerializerConfig::default()), // TODO: We need to create an Avro serializer because, certainly, for any source decoding // the data as Avro, we can't possibly send anything else without the source just // immediately barfing. #[cfg(feature = "sources-syslog")] - DeserializerConfig::Syslog => SerializerConfig::Logfmt, + DeserializerConfig::Syslog { .. } => SerializerConfig::Logfmt, DeserializerConfig::Native => SerializerConfig::Native, - DeserializerConfig::NativeJson => SerializerConfig::NativeJson, - DeserializerConfig::Gelf => SerializerConfig::Gelf, - DeserializerConfig::Mezmo(_) => SerializerConfig::Logfmt, // Mz always decodes to logs + DeserializerConfig::NativeJson { .. } => SerializerConfig::NativeJson, + DeserializerConfig::Gelf { .. } => SerializerConfig::Gelf, + DeserializerConfig::Mezmo { .. } => SerializerConfig::Logfmt, // Mz always decodes to logs }; serializer_config @@ -161,20 +161,18 @@ fn deserializer_config_to_serializer(config: &DeserializerConfig) -> encoding::S fn decoder_framing_to_encoding_framer(framing: &decoding::FramingConfig) -> encoding::Framer { let framing_config = match framing { decoding::FramingConfig::Bytes => encoding::FramingConfig::Bytes, - decoding::FramingConfig::CharacterDelimited { - character_delimited, - } => encoding::FramingConfig::CharacterDelimited { - character_delimited: encoding::CharacterDelimitedEncoderOptions { - delimiter: character_delimited.delimiter, - }, - }, - decoding::FramingConfig::LengthDelimited => encoding::FramingConfig::LengthDelimited, - decoding::FramingConfig::NewlineDelimited { .. } => { - encoding::FramingConfig::NewlineDelimited + decoding::FramingConfig::CharacterDelimited(config) => { + encoding::FramingConfig::CharacterDelimited(encoding::CharacterDelimitedEncoderConfig { + character_delimited: encoding::CharacterDelimitedEncoderOptions { + delimiter: config.character_delimited.delimiter, + }, + }) } + decoding::FramingConfig::LengthDelimited => encoding::FramingConfig::LengthDelimited, + decoding::FramingConfig::NewlineDelimited(_) => encoding::FramingConfig::NewlineDelimited, // TODO: There's no equivalent octet counting framer for encoding... although // there's no particular reason that would make it hard to write. - decoding::FramingConfig::OctetCounting { .. } => todo!(), + decoding::FramingConfig::OctetCounting(_) => todo!(), }; framing_config.build() @@ -184,11 +182,11 @@ fn serializer_config_to_deserializer(config: &SerializerConfig) -> decoding::Des let deserializer_config = match config { SerializerConfig::Avro { .. } => todo!(), SerializerConfig::Csv { .. } => todo!(), - SerializerConfig::Gelf => DeserializerConfig::Gelf, - SerializerConfig::Json(_) => DeserializerConfig::Json, + SerializerConfig::Gelf => DeserializerConfig::Gelf(Default::default()), + SerializerConfig::Json(_) => DeserializerConfig::Json(Default::default()), SerializerConfig::Logfmt => todo!(), SerializerConfig::Native => DeserializerConfig::Native, - SerializerConfig::NativeJson => DeserializerConfig::NativeJson, + SerializerConfig::NativeJson => DeserializerConfig::NativeJson(Default::default()), SerializerConfig::RawMessage | SerializerConfig::Text(_) => DeserializerConfig::Bytes, }; @@ -198,18 +196,18 @@ fn serializer_config_to_deserializer(config: &SerializerConfig) -> decoding::Des fn encoder_framing_to_decoding_framer(framing: encoding::FramingConfig) -> decoding::Framer { let framing_config = match framing { encoding::FramingConfig::Bytes => decoding::FramingConfig::Bytes, - encoding::FramingConfig::CharacterDelimited { - character_delimited, - } => decoding::FramingConfig::CharacterDelimited { - character_delimited: decoding::CharacterDelimitedDecoderOptions { - delimiter: character_delimited.delimiter, - max_length: None, - }, - }, + encoding::FramingConfig::CharacterDelimited(config) => { + decoding::FramingConfig::CharacterDelimited(decoding::CharacterDelimitedDecoderConfig { + character_delimited: decoding::CharacterDelimitedDecoderOptions { + delimiter: config.character_delimited.delimiter, + max_length: None, + }, + }) + } encoding::FramingConfig::LengthDelimited => decoding::FramingConfig::LengthDelimited, - encoding::FramingConfig::NewlineDelimited => decoding::FramingConfig::NewlineDelimited { - newline_delimited: NewlineDelimitedDecoderOptions::default(), - }, + encoding::FramingConfig::NewlineDelimited => { + decoding::FramingConfig::NewlineDelimited(Default::default()) + } }; framing_config.build() diff --git a/src/components/validation/runner/config.rs b/src/components/validation/runner/config.rs index f526583d5282c..c087fd8bf809a 100644 --- a/src/components/validation/runner/config.rs +++ b/src/components/validation/runner/config.rs @@ -4,8 +4,8 @@ use crate::{ util::GrpcAddress, ComponentConfiguration, ComponentType, ValidationConfiguration, }, - config::{BoxedSource, BoxedTransform, ConfigBuilder}, - sinks::{vector::VectorConfig as VectorSinkConfig, Sinks}, + config::{BoxedSink, BoxedSource, BoxedTransform, ConfigBuilder}, + sinks::vector::VectorConfig as VectorSinkConfig, sources::vector::VectorConfig as VectorSourceConfig, test_util::next_addr, }; @@ -78,7 +78,7 @@ impl TopologyBuilder { } } - fn from_sink(sink: Sinks) -> Self { + fn from_sink(sink: BoxedSink) -> Self { let (input_edge, input_source) = build_input_edge(); let mut config_builder = ConfigBuilder::default(); @@ -130,7 +130,7 @@ fn build_input_edge() -> (InputEdge, impl Into) { (input_edge, input_source) } -fn build_output_edge() -> (OutputEdge, impl Into) { +fn build_output_edge() -> (OutputEdge, impl Into) { let output_listen_addr = GrpcAddress::from(next_addr()); debug!(endpoint = %output_listen_addr, "Creating controlled output edge."); diff --git a/src/components/validation/validators/component_spec/sources.rs b/src/components/validation/validators/component_spec/sources.rs index f3593116b795a..c25b217a399e4 100644 --- a/src/components/validation/validators/component_spec/sources.rs +++ b/src/components/validation/validators/component_spec/sources.rs @@ -1,6 +1,7 @@ use std::fmt::{Display, Formatter}; use bytes::BytesMut; +use vector_common::json_size::JsonSize; use vector_core::event::{Event, MetricKind}; use vector_core::EstimatedJsonEncodedSizeOf; @@ -163,7 +164,7 @@ fn validate_component_received_event_bytes_total( } } - let expected_bytes = inputs.iter().fold(0, |acc, i| { + let expected_bytes = inputs.iter().fold(JsonSize::new(0), |acc, i| { if let TestEvent::Passthrough(_) = i { let size = vec![i.clone().into_event()].estimated_json_encoded_size_of(); return acc + size; @@ -179,7 +180,7 @@ fn validate_component_received_event_bytes_total( expected_bytes, ); - if metric_bytes != expected_bytes as f64 { + if JsonSize::new(metric_bytes as usize) != expected_bytes { errs.push(format!( "{}: expected {} bytes, but received {}", SourceMetrics::EventsReceivedBytes, @@ -367,7 +368,7 @@ fn validate_component_sent_event_bytes_total( } } - let mut expected_bytes = 0; + let mut expected_bytes = JsonSize::zero(); for e in outputs { expected_bytes += vec![e].estimated_json_encoded_size_of(); } @@ -379,7 +380,7 @@ fn validate_component_sent_event_bytes_total( expected_bytes, ); - if metric_bytes != expected_bytes as f64 { + if JsonSize::new(metric_bytes as usize) != expected_bytes { errs.push(format!( "{}: expected {} bytes, but received {}.", SourceMetrics::SentEventBytesTotal, diff --git a/src/conditions/vrl.rs b/src/conditions/vrl.rs index 703d72e601ebf..5230058b68260 100644 --- a/src/conditions/vrl.rs +++ b/src/conditions/vrl.rs @@ -6,6 +6,7 @@ use vrl::compiler::{CompilationResult, CompileConfig, Program, TypeState, VrlRun use vrl::diagnostic::Formatter; use vrl::value::Value; +use crate::config::LogNamespace; use crate::event::TargetEvents; use crate::{ conditions::{Condition, Conditional, ConditionalConfig}, @@ -93,12 +94,16 @@ pub struct Vrl { impl Vrl { fn run(&self, event: Event) -> (Event, RuntimeResult) { + let log_namespace = event + .maybe_as_log() + .map(|log| log.namespace()) + .unwrap_or(LogNamespace::Legacy); let mut target = VrlTarget::new(event, self.program.info(), false); // TODO: use timezone from remap config let timezone = TimeZone::default(); let result = Runtime::default().resolve(&mut target, &self.program, &timezone); - let original_event = match target.into_events() { + let original_event = match target.into_events(log_namespace) { TargetEvents::One(event) => event, _ => panic!("Event was modified in a condition. This is an internal compiler error."), }; diff --git a/src/config/builder.rs b/src/config/builder.rs index 7503608ddbf59..77af80511baa1 100644 --- a/src/config/builder.rs +++ b/src/config/builder.rs @@ -1,23 +1,21 @@ use std::collections::BTreeMap; -use std::path::Path; +use std::{path::Path, time::Duration}; use indexmap::IndexMap; use serde_json::Value; use vector_config::configurable_component; use vector_core::config::GlobalOptions; -use crate::{ - enrichment_tables::EnrichmentTables, providers::Providers, secrets::SecretBackends, - sinks::Sinks, -}; +use crate::{enrichment_tables::EnrichmentTables, providers::Providers, secrets::SecretBackends}; #[cfg(feature = "api")] use super::api; #[cfg(feature = "enterprise")] use super::enterprise; use super::{ - compiler, schema, BoxedSource, BoxedTransform, ComponentKey, Config, EnrichmentTableOuter, - HealthcheckOptions, SinkOuter, SourceOuter, TestDefinition, TransformOuter, + compiler, schema, BoxedSink, BoxedSource, BoxedTransform, ComponentKey, Config, + EnrichmentTableOuter, HealthcheckOptions, SinkOuter, SourceOuter, TestDefinition, + TransformOuter, }; /// A complete Vector configuration. @@ -76,6 +74,13 @@ pub struct ConfigBuilder { /// All configured secrets backends. #[serde(default)] pub secret: IndexMap, + + /// The duration in seconds to wait for graceful shutdown after SIGINT or SIGTERM are received. + /// After the duration has passed, Vector will force shutdown. Default value is 60 seconds. This + /// value can be set using a [cli arg](crate::cli::RootOpts::graceful_shutdown_limit_secs). + #[serde(default, skip)] + #[doc(hidden)] + pub graceful_shutdown_duration: Option, } #[derive(::serde::Serialize)] @@ -188,6 +193,7 @@ impl From for ConfigBuilder { transforms, tests, secret, + graceful_shutdown_duration, hash: _, } = config; @@ -218,6 +224,7 @@ impl From for ConfigBuilder { provider: None, tests, secret, + graceful_shutdown_duration, } } } @@ -259,7 +266,12 @@ impl ConfigBuilder { .insert(ComponentKey::from(key.into()), SourceOuter::new(source)); } - pub fn add_sink, S: Into>(&mut self, key: K, inputs: &[&str], sink: S) { + pub fn add_sink, S: Into>( + &mut self, + key: K, + inputs: &[&str], + sink: S, + ) { let inputs = inputs .iter() .map(|value| value.to_string()) diff --git a/src/config/compiler.rs b/src/config/compiler.rs index aef09dd83ed72..d38516b392288 100644 --- a/src/config/compiler.rs +++ b/src/config/compiler.rs @@ -61,6 +61,7 @@ pub fn compile( tests, provider: _, secret, + graceful_shutdown_duration, } = builder; let graph = match Graph::new(&sources, &transforms, &sinks, schema) { @@ -118,6 +119,7 @@ pub fn compile( transforms, tests, secret, + graceful_shutdown_duration, }; config.propagate_acknowledgements()?; diff --git a/src/config/enterprise.rs b/src/config/enterprise.rs index 97075716b3b96..bce787aa7d183 100644 --- a/src/config/enterprise.rs +++ b/src/config/enterprise.rs @@ -833,9 +833,9 @@ mod test { use indexmap::IndexMap; use tokio::time::sleep; use vector_core::config::proxy::ProxyConfig; + use vrl::btreemap; use vrl::compiler::state::ExternalEnv; use vrl::compiler::{compile, compile_with_external, CompileConfig}; - use vrl::value::btreemap; use vrl::value::kind::Collection; use vrl::value::Kind; use wiremock::{matchers, Mock, MockServer, ResponseTemplate}; diff --git a/src/config/graph.rs b/src/config/graph.rs index 859b590257de2..6f4cdb81d4451 100644 --- a/src/config/graph.rs +++ b/src/config/graph.rs @@ -2,8 +2,8 @@ use indexmap::{set::IndexSet, IndexMap}; use std::collections::{HashMap, HashSet, VecDeque}; use super::{ - schema, ComponentKey, DataType, OutputId, SinkConfig, SinkOuter, SourceOuter, SourceOutput, - TransformOuter, TransformOutput, + schema, ComponentKey, DataType, OutputId, SinkOuter, SourceOuter, SourceOutput, TransformOuter, + TransformOutput, }; #[derive(Debug, Clone)] diff --git a/src/config/loading/secret.rs b/src/config/loading/secret.rs index 54c83823e2218..09a32260708e1 100644 --- a/src/config/loading/secret.rs +++ b/src/config/loading/secret.rs @@ -26,7 +26,7 @@ use crate::{ // - "SECRET[backend..secret.name]" will match and capture "backend" and ".secret.name" // - "SECRET[secret_name]" will not match // - "SECRET[.secret.name]" will not match -static COLLECTOR: Lazy = +pub static COLLECTOR: Lazy = Lazy::new(|| Regex::new(r"SECRET\[([[:word:]]+)\.([[:word:].]+)\]").unwrap()); /// Helper type for specifically deserializing secrets backends. diff --git a/src/config/mod.rs b/src/config/mod.rs index 629b5a94be218..8e1ea09b350c4 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -5,6 +5,7 @@ use std::{ hash::Hash, net::SocketAddr, path::PathBuf, + time::Duration, }; use indexmap::IndexMap; @@ -48,21 +49,33 @@ pub use format::{Format, FormatHint}; pub use id::{ComponentKey, Inputs}; pub use loading::{ load, load_builder_from_paths, load_from_paths, load_from_paths_with_provider_and_secrets, - load_from_str, load_source_from_paths, merge_path_lists, process_paths, CONFIG_PATHS, + load_from_str, load_source_from_paths, merge_path_lists, process_paths, COLLECTOR, + CONFIG_PATHS, }; pub use provider::ProviderConfig; pub use secret::SecretBackend; -pub use sink::{SinkConfig, SinkContext, SinkHealthcheckOptions, SinkOuter}; +pub use sink::{BoxedSink, SinkConfig, SinkContext, SinkHealthcheckOptions, SinkOuter}; pub use source::{BoxedSource, SourceConfig, SourceContext, SourceOuter}; pub use transform::{ get_transform_output_ids, BoxedTransform, TransformConfig, TransformContext, TransformOuter, }; pub use unit_test::{build_unit_tests, build_unit_tests_main, UnitTestResult}; pub use validation::warnings; +pub use vars::{interpolate, ENVIRONMENT_VARIABLE_INTERPOLATION_REGEX}; pub use vector_core::config::{ - init_log_schema, log_schema, proxy::ProxyConfig, LogSchema, OutputId, + init_telemetry, log_schema, proxy::ProxyConfig, telemetry, LogSchema, OutputId, }; +/// Loads Log Schema from configurations and sets global schema. +/// Once this is done, configurations can be correctly loaded using +/// configured log schema defaults. +/// If deny is set, will panic if schema has already been set. +pub fn init_log_schema(config_paths: &[ConfigPath], deny_if_set: bool) -> Result<(), Vec> { + let (builder, _) = load_builder_from_paths(config_paths)?; + vector_core::config::init_log_schema(builder.global.log_schema, deny_if_set); + Ok(()) +} + #[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq)] pub enum ConfigPath { File(PathBuf, FormatHint), @@ -103,6 +116,7 @@ pub struct Config { pub enrichment_tables: IndexMap, tests: Vec, secret: IndexMap, + pub graceful_shutdown_duration: Option, } impl Config { @@ -1100,7 +1114,7 @@ mod tests { type = "filter" inputs = ["internal_metrics"] condition = """ - .name == "processed_bytes_total" + .name == "component_received_bytes_total" """ [sinks.out] @@ -1131,7 +1145,7 @@ mod tests { type = "filter" inputs = ["internal_metrics"] condition = """ - .name == "processed_bytes_total" + .name == "component_received_bytes_total" """ [sinks.out] @@ -1310,6 +1324,7 @@ mod resource_tests { proptest! { #[test] fn valid(addr: IpAddr, port1 in specport(), port2 in specport()) { + prop_assume!(port1 != port2); let components = vec![ ("sink_0", vec![tcp(addr, 0)]), ("sink_1", vec![tcp(addr, port1)]), diff --git a/src/config/sink.rs b/src/config/sink.rs index a88e2363eab91..cc52d346b829b 100644 --- a/src/config/sink.rs +++ b/src/config/sink.rs @@ -1,16 +1,48 @@ use crate::mezmo::MezmoContext; +use std::cell::RefCell; + use async_trait::async_trait; -use enum_dispatch::enum_dispatch; +use dyn_clone::DynClone; use serde::Serialize; use vector_buffers::{BufferConfig, BufferType}; -use vector_config::{configurable_component, Configurable, NamedComponent}; +use vector_config::{ + configurable_component, Configurable, GenerateError, Metadata, NamedComponent, +}; +use vector_config_common::attributes::CustomAttribute; +use vector_config_common::schema::{SchemaGenerator, SchemaObject}; use vector_core::{ config::{AcknowledgementsConfig, GlobalOptions, Input}, sink::VectorSink, }; use super::{id::Inputs, schema, ComponentKey, ProxyConfig, Resource}; -use crate::sinks::{util::UriSerde, Healthcheck, Sinks}; +use crate::sinks::{util::UriSerde, Healthcheck}; + +pub type BoxedSink = Box; + +impl Configurable for BoxedSink { + fn referenceable_name() -> Option<&'static str> { + Some("vector::sinks::Sinks") + } + + fn metadata() -> Metadata { + let mut metadata = Metadata::default(); + metadata.set_description("Configurable sinks in Vector."); + metadata.add_custom_attribute(CustomAttribute::kv("docs::enum_tagging", "internal")); + metadata.add_custom_attribute(CustomAttribute::kv("docs::enum_tag_field", "type")); + metadata + } + + fn generate_schema(gen: &RefCell) -> Result { + vector_config::component::SinkDescription::generate_schemas(gen) + } +} + +impl From for BoxedSink { + fn from(value: T) -> Self { + Box::new(value) + } +} /// Fully resolved sink component. #[configurable_component] @@ -50,7 +82,7 @@ where #[serde(flatten)] #[configurable(metadata(docs::hidden))] - pub inner: Sinks, + pub inner: BoxedSink, } impl SinkOuter @@ -60,7 +92,7 @@ where pub fn new(inputs: I, inner: IS) -> SinkOuter where I: IntoIterator, - IS: Into, + IS: Into, { SinkOuter { inputs: Inputs::from_iter(inputs), @@ -171,8 +203,8 @@ impl From for SinkHealthcheckOptions { /// Generalized interface for describing and building sink components. #[async_trait] -#[enum_dispatch] -pub trait SinkConfig: NamedComponent + core::fmt::Debug + Send + Sync { +#[typetag::serde(tag = "type")] +pub trait SinkConfig: DynClone + NamedComponent + core::fmt::Debug + Send + Sync { /// Builds the sink with the given context. /// /// If the sink is built successfully, `Ok(...)` is returned containing the sink and the sink's @@ -202,7 +234,9 @@ pub trait SinkConfig: NamedComponent + core::fmt::Debug + Send + Sync { fn acknowledgements(&self) -> &AcknowledgementsConfig; } -#[derive(Debug, Clone)] +dyn_clone::clone_trait_object!(SinkConfig); + +#[derive(Clone, Debug, Default)] pub struct SinkContext { pub healthcheck: SinkHealthcheckOptions, pub globals: GlobalOptions, diff --git a/src/config/source.rs b/src/config/source.rs index aad02abd1dd2b..765653eb022ec 100644 --- a/src/config/source.rs +++ b/src/config/source.rs @@ -144,7 +144,7 @@ impl SourceContext { out: SourceSender, ) -> (Self, crate::shutdown::SourceShutdownCoordinator) { let mut shutdown = crate::shutdown::SourceShutdownCoordinator::default(); - let (shutdown_signal, _) = shutdown.register_source(key); + let (shutdown_signal, _) = shutdown.register_source(key, false); ( Self { key: key.clone(), diff --git a/src/config/transform.rs b/src/config/transform.rs index 9c1c07f020319..748fbc4ca617c 100644 --- a/src/config/transform.rs +++ b/src/config/transform.rs @@ -198,6 +198,9 @@ pub trait TransformConfig: DynClone + NamedComponent + core::fmt::Debug + Send + &self, enrichment_tables: enrichment::TableRegistry, input_definitions: &[(OutputId, schema::Definition)], + + // This only exists for transforms that create logs from non-logs, to know which namespace + // to use, such as `metric_to_log` global_log_namespace: LogNamespace, ) -> Vec; diff --git a/src/config/unit_test/mod.rs b/src/config/unit_test/mod.rs index 9b1b0167aed2e..e11a3536afc53 100644 --- a/src/config/unit_test/mod.rs +++ b/src/config/unit_test/mod.rs @@ -72,24 +72,11 @@ impl UnitTest { } } -/// Loads Log Schema from configurations and sets global schema. -/// Once this is done, configurations can be correctly loaded using -/// configured log schema defaults. -/// If deny is set, will panic if schema has already been set. -fn init_log_schema_from_paths( - config_paths: &[ConfigPath], - deny_if_set: bool, -) -> Result<(), Vec> { - let (builder, _) = config::loading::load_builder_from_paths(config_paths)?; - vector_core::config::init_log_schema(builder.global.log_schema, deny_if_set); - Ok(()) -} - pub async fn build_unit_tests_main( paths: &[ConfigPath], signal_handler: &mut signal::SignalHandler, ) -> Result, Vec> { - init_log_schema_from_paths(paths, false)?; + config::init_log_schema(paths, false)?; let (mut secrets_backends_loader, _) = loading::load_secret_backends_from_paths(paths)?; let (config_builder, _) = if secrets_backends_loader.has_secrets_to_retrieve() { let resolved_secrets = secrets_backends_loader diff --git a/src/config/unit_test/unit_test_components.rs b/src/config/unit_test/unit_test_components.rs index a38e0492d87aa..3166e396a17e4 100644 --- a/src/config/unit_test/unit_test_components.rs +++ b/src/config/unit_test/unit_test_components.rs @@ -135,7 +135,7 @@ pub struct UnitTestSinkResult { } /// Configuration for the `unit_test` sink. -#[configurable_component(sink("unit_test"))] +#[configurable_component(sink("unit_test", "Unit test."))] #[derive(Clone, Default, Derivative)] #[derivative(Debug)] pub struct UnitTestSinkConfig { @@ -158,6 +158,7 @@ pub struct UnitTestSinkConfig { impl_generate_config_from_default!(UnitTestSinkConfig); #[async_trait::async_trait] +#[typetag::serde(name = "unit_test")] impl SinkConfig for UnitTestSinkConfig { async fn build(&self, _cx: SinkContext) -> crate::Result<(VectorSink, Healthcheck)> { let tx = self.result_tx.lock().await.take(); @@ -272,7 +273,7 @@ impl StreamSink for UnitTestSink { } /// Configuration for the `unit_test_stream` sink. -#[configurable_component(sink("unit_test_stream"))] +#[configurable_component(sink("unit_test_stream", "Unit test stream."))] #[derive(Clone, Default)] pub struct UnitTestStreamSinkConfig { /// Sink that receives the processed events. @@ -297,11 +298,13 @@ impl std::fmt::Debug for UnitTestStreamSinkConfig { } #[async_trait::async_trait] +#[typetag::serde(name = "unit_test_stream")] impl SinkConfig for UnitTestStreamSinkConfig { async fn build(&self, _cx: SinkContext) -> crate::Result<(VectorSink, Healthcheck)> { let sink = self.sink.lock().await.take().unwrap(); let healthcheck = future::ok(()).boxed(); + #[allow(deprecated)] Ok((VectorSink::from_event_sink(sink), healthcheck)) } diff --git a/src/config/vars.rs b/src/config/vars.rs index 3b923a9687bfe..73316884cebf3 100644 --- a/src/config/vars.rs +++ b/src/config/vars.rs @@ -1,7 +1,25 @@ use std::collections::HashMap; +use once_cell::sync::Lazy; use regex::{Captures, Regex}; +// Environment variable names can have any characters from the Portable Character Set other +// than NUL. However, for Vector's interpolation, we are closer to what a shell supports which +// is solely of uppercase letters, digits, and the '_' (that is, the `[:word:]` regex class). +// In addition to these characters, we allow `.` as this commonly appears in environment +// variable names when they come from a Java properties file. +// +// https://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html +pub static ENVIRONMENT_VARIABLE_INTERPOLATION_REGEX: Lazy = Lazy::new(|| { + Regex::new( + r"(?x) + \$\$| + \$([[:word:].]+)| + \$\{([[:word:].]+)(?:(:?-|:?\?)([^}]*))?\}", + ) + .unwrap() +}); + /// (result, warnings) pub fn interpolate( input: &str, @@ -10,22 +28,7 @@ pub fn interpolate( let mut errors = Vec::new(); let mut warnings = Vec::new(); - // Environment variable names can have any characters from the Portable Character Set other - // than NUL. However, for Vector's interpolation, we are closer to what a shell supports which - // is solely of uppercase letters, digits, and the '_' (that is, the `[:word:]` regex class). - // In addition to these characters, we allow `.` as this commonly appears in environment - // variable names when they come from a Java properties file. - // - // https://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html - let re = Regex::new( - r"(?x) - \$\$| - \$([[:word:].]+)| - \$\{([[:word:].]+)(?:(:?-|:?\?)([^}]*))?\}", - ) - .unwrap(); - - let interpolated = re + let interpolated = ENVIRONMENT_VARIABLE_INTERPOLATION_REGEX .replace_all(input, |caps: &Captures<'_>| { let flags = caps.get(3).map(|m| m.as_str()).unwrap_or_default(); let def_or_err = caps.get(4).map(|m| m.as_str()).unwrap_or_default(); diff --git a/src/http.rs b/src/http.rs index cac31cb89af31..faebc98533542 100644 --- a/src/http.rs +++ b/src/http.rs @@ -61,10 +61,12 @@ impl UserLoggingError for HttpError { } pub type HttpClientFuture = >>::Future; +type HttpProxyConnector = ProxyConnector>; pub struct HttpClient { - client: Client>, B>, + client: Client, user_agent: HeaderValue, + proxy_connector: HttpProxyConnector, } impl HttpClient @@ -85,14 +87,18 @@ where proxy_config: &ProxyConfig, client_builder: &mut client::Builder, ) -> Result, HttpError> { - let proxy = build_proxy_connector(tls_settings.into(), proxy_config)?; - let client = client_builder.build(proxy); + let proxy_connector = build_proxy_connector(tls_settings.into(), proxy_config)?; + let client = client_builder.build(proxy_connector.clone()); let version = crate::get_version(); let user_agent = HeaderValue::from_str(&format!("Mezmo/{}", version)) .expect("Invalid header value for version!"); - Ok(HttpClient { client, user_agent }) + Ok(HttpClient { + client, + user_agent, + proxy_connector, + }) } pub fn send( @@ -103,6 +109,7 @@ where let _enter = span.enter(); default_request_headers(&mut request, &self.user_agent); + self.maybe_add_proxy_headers(&mut request); emit!(http_client::AboutToSendHttpRequest { request: &request }); @@ -143,6 +150,17 @@ where Box::pin(fut) } + + fn maybe_add_proxy_headers(&self, request: &mut Request) { + if let Some(proxy_headers) = self.proxy_connector.http_headers(request.uri()) { + for (k, v) in proxy_headers { + let request_headers = request.headers_mut(); + if !request_headers.contains_key(k) { + request_headers.insert(k, v.into()); + } + } + } + } } pub fn build_proxy_connector( @@ -224,6 +242,7 @@ impl Clone for HttpClient { Self { client: self.client.clone(), user_agent: self.user_agent.clone(), + proxy_connector: self.proxy_connector.clone(), } } } diff --git a/src/internal_events/apache_metrics.rs b/src/internal_events/apache_metrics.rs index ac09f4f71ada5..0e42463f960e8 100644 --- a/src/internal_events/apache_metrics.rs +++ b/src/internal_events/apache_metrics.rs @@ -1,6 +1,9 @@ use metrics::counter; -use vector_common::internal_event::{error_stage, error_type}; +use vector_common::{ + internal_event::{error_stage, error_type}, + json_size::JsonSize, +}; use vector_core::internal_event::InternalEvent; use super::prelude::http_error_code; @@ -8,12 +11,13 @@ use crate::sources::apache_metrics; #[derive(Debug)] pub struct ApacheMetricsEventsReceived<'a> { - pub byte_size: usize, + pub byte_size: JsonSize, pub count: usize, pub endpoint: &'a str, } impl<'a> InternalEvent for ApacheMetricsEventsReceived<'a> { + // ## skip check-duplicate-events ## fn emit(self) { trace!(message = "Events received.", count = %self.count, byte_size = %self.byte_size, endpoint = %self.endpoint); counter!( @@ -21,13 +25,9 @@ impl<'a> InternalEvent for ApacheMetricsEventsReceived<'a> { "endpoint" => self.endpoint.to_owned(), ); counter!( - "component_received_event_bytes_total", self.byte_size as u64, + "component_received_event_bytes_total", self.byte_size.get() as u64, "endpoint" => self.endpoint.to_owned(), ); - counter!( - "events_in_total", self.count as u64, - "uri" => self.endpoint.to_owned(), - ); } } diff --git a/src/internal_events/aws_ecs_metrics.rs b/src/internal_events/aws_ecs_metrics.rs index bb394b0f491db..2ae8081697a66 100644 --- a/src/internal_events/aws_ecs_metrics.rs +++ b/src/internal_events/aws_ecs_metrics.rs @@ -1,14 +1,17 @@ use std::borrow::Cow; use metrics::counter; -use vector_common::internal_event::{error_stage, error_type}; +use vector_common::{ + internal_event::{error_stage, error_type}, + json_size::JsonSize, +}; use vector_core::internal_event::InternalEvent; use super::prelude::{http_error_code, hyper_error_code}; #[derive(Debug)] pub struct AwsEcsMetricsEventsReceived<'a> { - pub byte_size: usize, + pub byte_size: JsonSize, pub count: usize, pub endpoint: &'a str, } @@ -27,11 +30,9 @@ impl<'a> InternalEvent for AwsEcsMetricsEventsReceived<'a> { "endpoint" => self.endpoint.to_string(), ); counter!( - "component_received_event_bytes_total", self.byte_size as u64, + "component_received_event_bytes_total", self.byte_size.get() as u64, "endpoint" => self.endpoint.to_string(), ); - // deprecated - counter!("events_in_total", self.count as u64); } } diff --git a/src/internal_events/batch.rs b/src/internal_events/batch.rs index 3bd9f6ab6eafd..1044d1cca82ab 100644 --- a/src/internal_events/batch.rs +++ b/src/internal_events/batch.rs @@ -14,8 +14,9 @@ pub struct LargeEventDroppedError { impl InternalEvent for LargeEventDroppedError { fn emit(self) { + let reason = "Event larger than batch max_bytes."; error!( - message = "Event larger than batch max_bytes.", + message = reason, batch_max_bytes = %self.max_length, length = %self.length, error_type = error_type::CONDITION_FAILED, @@ -28,14 +29,6 @@ impl InternalEvent for LargeEventDroppedError { "error_type" => error_type::CONDITION_FAILED, "stage" => error_stage::SENDING, ); - emit!(ComponentEventsDropped:: { - count: 1, - reason: "Event larger than batch max_bytes." - }); - // deprecated - counter!( - "events_discarded_total", 1, - "reason" => "oversized", - ); + emit!(ComponentEventsDropped:: { count: 1, reason }); } } diff --git a/src/internal_events/common.rs b/src/internal_events/common.rs index fafa280df099e..0bf94f54ae25a 100644 --- a/src/internal_events/common.rs +++ b/src/internal_events/common.rs @@ -83,7 +83,6 @@ const STREAM_CLOSED: &str = "stream_closed"; #[derive(Debug)] pub struct StreamClosedError { - pub error: crate::source_sender::ClosedError, pub count: usize, } diff --git a/src/internal_events/conditions.rs b/src/internal_events/conditions.rs index 4b88685d32edf..31ddeecc60bae 100644 --- a/src/internal_events/conditions.rs +++ b/src/internal_events/conditions.rs @@ -22,7 +22,5 @@ impl<'a> InternalEvent for VrlConditionExecutionError<'a> { "error_type" => error_type::SCRIPT_FAILED, "stage" => error_stage::PROCESSING, ); - // deprecated - counter!("processing_errors_total", 1); } } diff --git a/src/internal_events/datadog_metrics.rs b/src/internal_events/datadog_metrics.rs index 792d8496f041d..c4daf1d3ce7f8 100644 --- a/src/internal_events/datadog_metrics.rs +++ b/src/internal_events/datadog_metrics.rs @@ -7,19 +7,17 @@ use vector_common::internal_event::{ }; #[derive(Debug)] -pub struct DatadogMetricsEncodingError { - pub error_message: &'static str, +pub struct DatadogMetricsEncodingError<'a> { + pub reason: &'a str, pub error_code: &'static str, pub dropped_events: usize, } -impl InternalEvent for DatadogMetricsEncodingError { +impl<'a> InternalEvent for DatadogMetricsEncodingError<'a> { fn emit(self) { - let reason = "Failed to encode Datadog metrics."; error!( - message = reason, - error = %self.error_message, - error_code = %self.error_code, + message = self.reason, + error_code = self.error_code, error_type = error_type::ENCODER_FAILED, intentional = "false", stage = error_stage::PROCESSING, @@ -35,7 +33,7 @@ impl InternalEvent for DatadogMetricsEncodingError { if self.dropped_events > 0 { emit!(ComponentEventsDropped:: { count: self.dropped_events, - reason, + reason: self.reason, }); } } diff --git a/src/internal_events/dedupe.rs b/src/internal_events/dedupe.rs index ef8525cb6f3fd..caf0dd9c204e1 100644 --- a/src/internal_events/dedupe.rs +++ b/src/internal_events/dedupe.rs @@ -1,5 +1,4 @@ use crate::emit; -use metrics::counter; use vector_core::internal_event::{ComponentEventsDropped, InternalEvent, INTENTIONAL}; #[derive(Debug)] @@ -13,6 +12,5 @@ impl InternalEvent for DedupeEventsDropped { count: self.count, reason: "Events have been found in cache for deduplication.", }); - counter!("events_discarded_total", self.count as u64); // Deprecated } } diff --git a/src/internal_events/docker_logs.rs b/src/internal_events/docker_logs.rs index f02ce627a3310..0447624b01630 100644 --- a/src/internal_events/docker_logs.rs +++ b/src/internal_events/docker_logs.rs @@ -1,12 +1,15 @@ use bollard::errors::Error; use chrono::ParseError; use metrics::counter; -use vector_common::internal_event::{error_stage, error_type}; +use vector_common::{ + internal_event::{error_stage, error_type}, + json_size::JsonSize, +}; use vector_core::internal_event::InternalEvent; #[derive(Debug)] pub struct DockerLogsEventsReceived<'a> { - pub byte_size: usize, + pub byte_size: JsonSize, pub container_id: &'a str, pub container_name: &'a str, } @@ -24,12 +27,7 @@ impl InternalEvent for DockerLogsEventsReceived<'_> { "container_name" => self.container_name.to_owned() ); counter!( - "component_received_event_bytes_total", self.byte_size as u64, - "container_name" => self.container_name.to_owned() - ); - // deprecated - counter!( - "events_in_total", 1, + "component_received_event_bytes_total", self.byte_size.get() as u64, "container_name" => self.container_name.to_owned() ); } diff --git a/src/internal_events/exec.rs b/src/internal_events/exec.rs index 99f41f143d490..4fd3461a1f822 100644 --- a/src/internal_events/exec.rs +++ b/src/internal_events/exec.rs @@ -3,8 +3,9 @@ use std::time::Duration; use crate::emit; use metrics::{counter, histogram}; use tokio::time::error::Elapsed; -use vector_common::internal_event::{ - error_stage, error_type, ComponentEventsDropped, UNINTENTIONAL, +use vector_common::{ + internal_event::{error_stage, error_type, ComponentEventsDropped, UNINTENTIONAL}, + json_size::JsonSize, }; use vector_core::internal_event::InternalEvent; @@ -14,7 +15,7 @@ use super::prelude::io_error_code; pub struct ExecEventsReceived<'a> { pub count: usize, pub command: &'a str, - pub byte_size: usize, + pub byte_size: JsonSize, } impl InternalEvent for ExecEventsReceived<'_> { @@ -22,7 +23,7 @@ impl InternalEvent for ExecEventsReceived<'_> { trace!( message = "Events received.", count = self.count, - byte_size = self.byte_size, + byte_size = self.byte_size.get(), command = %self.command, ); counter!( @@ -30,12 +31,7 @@ impl InternalEvent for ExecEventsReceived<'_> { "command" => self.command.to_owned(), ); counter!( - "component_received_event_bytes_total", self.byte_size as u64, - "command" => self.command.to_owned(), - ); - // deprecated - counter!( - "events_in_total", self.count as u64, + "component_received_event_bytes_total", self.byte_size.get() as u64, "command" => self.command.to_owned(), ); } @@ -65,13 +61,6 @@ impl InternalEvent for ExecFailedError<'_> { "error_code" => io_error_code(&self.error), "stage" => error_stage::RECEIVING, ); - // deprecated - counter!( - "processing_errors_total", 1, - "command" => self.command.to_owned(), - "error_type" => error_type::COMMAND_FAILED, - "stage" => error_stage::RECEIVING, - ); } } @@ -99,13 +88,6 @@ impl InternalEvent for ExecTimeoutError<'_> { "error_type" => error_type::TIMED_OUT, "stage" => error_stage::RECEIVING, ); - // deprecated - counter!( - "processing_errors_total", 1, - "command" => self.command.to_owned(), - "error_type" => error_type::TIMED_OUT, - "stage" => error_stage::RECEIVING, - ); } } @@ -216,14 +198,6 @@ impl InternalEvent for ExecFailedToSignalChildError<'_> { "error_type" => error_type::COMMAND_FAILED, "stage" => error_stage::RECEIVING, ); - // deprecated - counter!( - "processing_errors_total", 1, - "command_code" => format!("{:?}", self.command.as_std()), - "error" => self.error.to_error_code(), - "error_type" => error_type::COMMAND_FAILED, - "stage" => error_stage::RECEIVING, - ); } } diff --git a/src/internal_events/file.rs b/src/internal_events/file.rs index 2e3cb17344fb9..aedac3d74afb2 100644 --- a/src/internal_events/file.rs +++ b/src/internal_events/file.rs @@ -6,6 +6,7 @@ use crate::emit; #[cfg(any(feature = "sources-file", feature = "sources-kubernetes_logs"))] pub use self::source::*; + use vector_common::internal_event::{error_stage, error_type}; #[derive(Debug)] @@ -86,7 +87,10 @@ mod source { use super::{FileOpen, InternalEvent}; use crate::emit; - use vector_common::internal_event::{error_stage, error_type}; + use vector_common::{ + internal_event::{error_stage, error_type}, + json_size::JsonSize, + }; #[derive(Debug)] pub struct FileBytesReceived<'a> { @@ -114,7 +118,7 @@ mod source { pub struct FileEventsReceived<'a> { pub count: usize, pub file: &'a str, - pub byte_size: usize, + pub byte_size: JsonSize, } impl InternalEvent for FileEventsReceived<'_> { @@ -125,16 +129,12 @@ mod source { byte_size = %self.byte_size, file = %self.file ); - counter!( - "events_in_total", self.count as u64, - "file" => self.file.to_owned(), - ); counter!( "component_received_events_total", self.count as u64, "file" => self.file.to_owned(), ); counter!( - "component_received_event_bytes_total", self.byte_size as u64, + "component_received_event_bytes_total", self.byte_size.get() as u64, "file" => self.file.to_owned(), ); } diff --git a/src/internal_events/filter.rs b/src/internal_events/filter.rs index 91df569bd1cdb..44d3c607f4f1f 100644 --- a/src/internal_events/filter.rs +++ b/src/internal_events/filter.rs @@ -1,4 +1,3 @@ -use metrics::{register_counter, Counter}; use vector_common::internal_event::{ComponentEventsDropped, Count, Registered, INTENTIONAL}; use crate::register; @@ -9,11 +8,9 @@ vector_common::registered_event! ( = register!(ComponentEventsDropped::::from( "Events matched filter condition." )), - events_discarded: Counter = register_counter!("events_discarded_total"), } fn emit(&self, data: Count) { self.events_dropped.emit(data); - self.events_discarded.increment(data.0 as u64); } ); diff --git a/src/internal_events/fluent.rs b/src/internal_events/fluent.rs index a325dc969eeac..9fbcdc0031e5f 100644 --- a/src/internal_events/fluent.rs +++ b/src/internal_events/fluent.rs @@ -13,7 +13,6 @@ impl InternalEvent for FluentMessageReceived { fn emit(self) { trace!(message = "Received fluent message.", byte_size = %self.byte_size); counter!("component_received_events_total", 1); - counter!("events_in_total", 1); } } diff --git a/src/internal_events/http.rs b/src/internal_events/http.rs index a1879c2379bcb..5243cf47628f5 100644 --- a/src/internal_events/http.rs +++ b/src/internal_events/http.rs @@ -3,7 +3,10 @@ use std::error::Error; use metrics::{counter, histogram}; use vector_core::internal_event::InternalEvent; -use vector_common::internal_event::{error_stage, error_type}; +use vector_common::{ + internal_event::{error_stage, error_type}, + json_size::JsonSize, +}; #[derive(Debug)] pub struct HttpBytesReceived<'a> { @@ -31,7 +34,7 @@ impl InternalEvent for HttpBytesReceived<'_> { #[derive(Debug)] pub struct HttpEventsReceived<'a> { pub count: usize, - pub byte_size: usize, + pub byte_size: JsonSize, pub http_path: &'a str, pub protocol: &'static str, } @@ -54,11 +57,10 @@ impl InternalEvent for HttpEventsReceived<'_> { ); counter!( "component_received_event_bytes_total", - self.byte_size as u64, + self.byte_size.get() as u64, "http_path" => self.http_path.to_string(), "protocol" => self.protocol, ); - counter!("events_in_total", self.count as u64); } } diff --git a/src/internal_events/http_client_source.rs b/src/internal_events/http_client_source.rs index 6d9d6a1e1c3e8..b5eb27e8a3fd9 100644 --- a/src/internal_events/http_client_source.rs +++ b/src/internal_events/http_client_source.rs @@ -1,12 +1,15 @@ use metrics::counter; -use vector_common::internal_event::{error_stage, error_type}; +use vector_common::{ + internal_event::{error_stage, error_type}, + json_size::JsonSize, +}; use vector_core::internal_event::InternalEvent; use super::prelude::http_error_code; #[derive(Debug)] pub struct HttpClientEventsReceived { - pub byte_size: usize, + pub byte_size: JsonSize, pub count: usize, pub url: String, } @@ -24,14 +27,9 @@ impl InternalEvent for HttpClientEventsReceived { "uri" => self.url.clone(), ); counter!( - "component_received_event_bytes_total", self.byte_size as u64, + "component_received_event_bytes_total", self.byte_size.get() as u64, "uri" => self.url.clone(), ); - // deprecated - counter!( - "events_in_total", self.count as u64, - "uri" => self.url, - ); } } diff --git a/src/internal_events/internal_logs.rs b/src/internal_events/internal_logs.rs index f78df4e2d32fd..5d6637bfec986 100644 --- a/src/internal_events/internal_logs.rs +++ b/src/internal_events/internal_logs.rs @@ -1,4 +1,5 @@ use metrics::counter; +use vector_common::json_size::JsonSize; use vector_core::internal_event::InternalEvent; #[derive(Debug)] @@ -18,7 +19,7 @@ impl InternalEvent for InternalLogsBytesReceived { #[derive(Debug)] pub struct InternalLogsEventsReceived { - pub byte_size: usize, + pub byte_size: JsonSize, pub count: usize, } @@ -28,7 +29,7 @@ impl InternalEvent for InternalLogsEventsReceived { counter!("component_received_events_total", self.count as u64); counter!( "component_received_event_bytes_total", - self.byte_size as u64 + self.byte_size.get() as u64 ); } } diff --git a/src/internal_events/kafka.rs b/src/internal_events/kafka.rs index 92afe66b98a53..57c25d905c60f 100644 --- a/src/internal_events/kafka.rs +++ b/src/internal_events/kafka.rs @@ -1,7 +1,10 @@ use metrics::{counter, gauge}; use vector_core::{internal_event::InternalEvent, update_counter}; -use vector_common::internal_event::{error_stage, error_type}; +use vector_common::{ + internal_event::{error_stage, error_type}, + json_size::JsonSize, +}; #[derive(Debug)] pub struct KafkaBytesReceived<'a> { @@ -32,7 +35,7 @@ impl<'a> InternalEvent for KafkaBytesReceived<'a> { #[derive(Debug)] pub struct KafkaEventsReceived<'a> { - pub byte_size: usize, + pub byte_size: JsonSize, pub count: usize, pub topic: &'a str, pub partition: i32, @@ -50,12 +53,10 @@ impl<'a> InternalEvent for KafkaEventsReceived<'a> { counter!("component_received_events_total", self.count as u64, "topic" => self.topic.to_string(), "partition" => self.partition.to_string()); counter!( "component_received_event_bytes_total", - self.byte_size as u64, + self.byte_size.get() as u64, "topic" => self.topic.to_string(), "partition" => self.partition.to_string(), ); - // deprecated - counter!("events_in_total", self.count as u64); } } @@ -106,8 +107,6 @@ impl InternalEvent for KafkaReadError { "error_type" => error_type::READER_FAILED, "stage" => error_stage::RECEIVING, ); - // deprecated - counter!("events_failed_total", 1); } } diff --git a/src/internal_events/kubernetes_logs.rs b/src/internal_events/kubernetes_logs.rs index 02aefa9bbe374..aff0109295078 100644 --- a/src/internal_events/kubernetes_logs.rs +++ b/src/internal_events/kubernetes_logs.rs @@ -3,14 +3,15 @@ use vector_core::internal_event::InternalEvent; use crate::emit; use crate::event::Event; -use vector_common::internal_event::{ - error_stage, error_type, ComponentEventsDropped, UNINTENTIONAL, +use vector_common::{ + internal_event::{error_stage, error_type, ComponentEventsDropped, UNINTENTIONAL}, + json_size::JsonSize, }; #[derive(Debug)] pub struct KubernetesLogsEventsReceived<'a> { pub file: &'a str, - pub byte_size: usize, + pub byte_size: JsonSize, pub pod_info: Option, } @@ -34,16 +35,14 @@ impl InternalEvent for KubernetesLogsEventsReceived<'_> { let pod_namespace = pod_info.namespace; counter!("component_received_events_total", 1, "pod_name" => pod_name.clone(), "pod_namespace" => pod_namespace.clone()); - counter!("component_received_event_bytes_total", self.byte_size as u64, "pod_name" => pod_name.clone(), "pod_namespace" => pod_namespace.clone()); - counter!("events_in_total", 1, "pod_name" => pod_name, "pod_namespace" => pod_namespace); + counter!("component_received_event_bytes_total", self.byte_size.get() as u64, "pod_name" => pod_name, "pod_namespace" => pod_namespace); } None => { counter!("component_received_events_total", 1); counter!( "component_received_event_bytes_total", - self.byte_size as u64 + self.byte_size.get() as u64 ); - counter!("events_in_total", 1); } } } diff --git a/src/internal_events/log_to_metric.rs b/src/internal_events/log_to_metric.rs index df9027a40b234..1925dd135efba 100644 --- a/src/internal_events/log_to_metric.rs +++ b/src/internal_events/log_to_metric.rs @@ -30,11 +30,6 @@ impl<'a> InternalEvent for LogToMetricFieldNullError<'a> { "stage" => error_stage::PROCESSING, "null_field" => self.field.to_string(), ); - // deprecated - counter!( - "processing_errors_total", 1, - "error_type" => "field_null", - ); emit!(ComponentEventsDropped:: { count: 1, reason }) } @@ -64,11 +59,6 @@ impl<'a> InternalEvent for LogToMetricParseFloatError<'a> { "stage" => error_stage::PROCESSING, "field" => self.field.to_string(), ); - // deprecated - counter!( - "processing_errors_total", 1, - "error_type" => "parse_error", - ); emit!(ComponentEventsDropped:: { count: 1, reason }) } diff --git a/src/internal_events/loki.rs b/src/internal_events/loki.rs index 3d3da61786552..eff93d7a7a429 100644 --- a/src/internal_events/loki.rs +++ b/src/internal_events/loki.rs @@ -1,35 +1,58 @@ use crate::emit; use metrics::counter; +use vector_common::internal_event::{error_stage, error_type}; use vector_core::internal_event::{ComponentEventsDropped, InternalEvent, INTENTIONAL}; #[derive(Debug)] -pub struct LokiEventUnlabeled; +pub struct LokiEventUnlabeledError; -impl InternalEvent for LokiEventUnlabeled { +impl InternalEvent for LokiEventUnlabeledError { fn emit(self) { - // Deprecated - counter!("processing_errors_total", 1, - "error_type" => "unlabeled_event"); + error!( + message = "Event had no labels. Adding default `agent` label.", + error_code = "unlabeled_event", + error_type = error_type::CONDITION_FAILED, + stage = error_stage::PROCESSING, + internal_log_rate_limit = true, + ); + + counter!( + "component_errors_total", 1, + "error_code" => "unlabeled_event", + "error_type" => error_type::CONDITION_FAILED, + "stage" => error_stage::PROCESSING, + ); } } #[derive(Debug)] -pub struct LokiOutOfOrderEventDropped { +pub struct LokiOutOfOrderEventDroppedError { pub count: usize, } -impl InternalEvent for LokiOutOfOrderEventDropped { +impl InternalEvent for LokiOutOfOrderEventDroppedError { fn emit(self) { + let reason = "Dropping out-of-order event(s)."; + + error!( + message = reason, + error_code = "out_of_order", + error_type = error_type::CONDITION_FAILED, + stage = error_stage::PROCESSING, + internal_log_rate_limit = true, + ); + emit!(ComponentEventsDropped:: { count: self.count, - reason: "out_of_order", + reason, }); - // Deprecated - counter!("events_discarded_total", self.count as u64, - "reason" => "out_of_order"); - counter!("processing_errors_total", 1, - "error_type" => "out_of_order"); + counter!( + "component_errors_total", 1, + "error_code" => "out_of_order", + "error_type" => error_type::CONDITION_FAILED, + "stage" => error_stage::PROCESSING, + ); } } @@ -47,9 +70,5 @@ impl InternalEvent for LokiOutOfOrderEventRewritten { internal_log_rate_limit = true, ); counter!("rewritten_timestamp_events_total", self.count as u64); - - // Deprecated - counter!("processing_errors_total", 1, - "error_type" => "out_of_order"); } } diff --git a/src/internal_events/lua.rs b/src/internal_events/lua.rs index 54491c1602967..302e908d490c1 100644 --- a/src/internal_events/lua.rs +++ b/src/internal_events/lua.rs @@ -43,8 +43,6 @@ impl InternalEvent for LuaScriptError { count: 1, reason: "Error in lua script.", }); - // deprecated - counter!("processing_errors_total", 1); } } @@ -70,12 +68,6 @@ impl InternalEvent for LuaBuildError { "error_type" => error_type::SCRIPT_FAILED, "stage" => error_stage:: PROCESSING, ); - emit!(ComponentEventsDropped:: { - count: 1, - reason: "Error in lua build.", - }); - // deprecated - counter!("processing_errors_total", 1); emit!(ComponentEventsDropped:: { count: 1, reason }) } diff --git a/src/internal_events/metric_to_log.rs b/src/internal_events/metric_to_log.rs index fe75b1252353f..14463782d3adc 100644 --- a/src/internal_events/metric_to_log.rs +++ b/src/internal_events/metric_to_log.rs @@ -27,8 +27,6 @@ impl InternalEvent for MetricToLogSerializeError { "error_type" => error_type::ENCODER_FAILED, "stage" => error_stage::PROCESSING, ); - // deprecated - counter!("processing_errors_total", 1, "error_type" => "failed_serialize"); emit!(ComponentEventsDropped:: { count: 1, reason }) } diff --git a/src/internal_events/mod.rs b/src/internal_events/mod.rs index d714c81531a3d..4d7c020b4f7ec 100644 --- a/src/internal_events/mod.rs +++ b/src/internal_events/mod.rs @@ -266,24 +266,13 @@ pub(crate) use self::tag_cardinality_limit::*; #[cfg(feature = "transforms-throttle")] pub(crate) use self::throttle::*; -#[cfg(all( - any( - feature = "sinks-socket", - feature = "sinks-statsd", - feature = "sources-dnstap", - feature = "sources-metrics", - feature = "sources-statsd", - feature = "sources-syslog", - feature = "sources-socket" - ), - unix -))] +#[cfg(unix)] pub(crate) use self::unix::*; #[cfg(feature = "sinks-websocket")] pub(crate) use self::websocket::*; #[cfg(windows)] pub(crate) use self::windows::*; -pub(crate) use self::{ +pub use self::{ adaptive_concurrency::*, batch::*, common::*, conditions::*, encoding_transcode::*, heartbeat::*, open::*, process::*, socket::*, tcp::*, template::*, udp::*, }; diff --git a/src/internal_events/mongodb_metrics.rs b/src/internal_events/mongodb_metrics.rs index d5038e54a5f47..1e749dc5ba8c2 100644 --- a/src/internal_events/mongodb_metrics.rs +++ b/src/internal_events/mongodb_metrics.rs @@ -2,21 +2,25 @@ use metrics::counter; use mongodb::{bson, error::Error as MongoError}; use vector_core::internal_event::InternalEvent; -use vector_common::internal_event::{error_stage, error_type}; +use vector_common::{ + internal_event::{error_stage, error_type}, + json_size::JsonSize, +}; #[derive(Debug)] pub struct MongoDbMetricsEventsReceived<'a> { pub count: usize, - pub byte_size: usize, + pub byte_size: JsonSize, pub endpoint: &'a str, } impl<'a> InternalEvent for MongoDbMetricsEventsReceived<'a> { + // ## skip check-duplicate-events ## fn emit(self) { trace!( message = "Events received.", count = self.count, - byte_size = self.byte_size, + byte_size = self.byte_size.get(), endpoint = self.endpoint, ); counter!( @@ -24,12 +28,7 @@ impl<'a> InternalEvent for MongoDbMetricsEventsReceived<'a> { "endpoint" => self.endpoint.to_owned(), ); counter!( - "component_received_event_bytes_total", self.byte_size as u64, - "endpoint" => self.endpoint.to_owned(), - ); - // deprecated - counter!( - "events_in_total", self.count as u64, + "component_received_event_bytes_total", self.byte_size.get() as u64, "endpoint" => self.endpoint.to_owned(), ); } diff --git a/src/internal_events/nginx_metrics.rs b/src/internal_events/nginx_metrics.rs index d8db19fcb7c60..eb5adcf8485d1 100644 --- a/src/internal_events/nginx_metrics.rs +++ b/src/internal_events/nginx_metrics.rs @@ -2,11 +2,14 @@ use metrics::counter; use vector_core::internal_event::InternalEvent; use crate::sources::nginx_metrics::parser::ParseError; -use vector_common::internal_event::{error_stage, error_type}; +use vector_common::{ + internal_event::{error_stage, error_type}, + json_size::JsonSize, +}; #[derive(Debug)] pub struct NginxMetricsEventsReceived<'a> { - pub byte_size: usize, + pub byte_size: JsonSize, pub count: usize, pub endpoint: &'a str, } @@ -24,12 +27,7 @@ impl<'a> InternalEvent for NginxMetricsEventsReceived<'a> { "endpoint" => self.endpoint.to_owned(), ); counter!( - "component_received_event_bytes_total", self.byte_size as u64, - "endpoint" => self.endpoint.to_owned(), - ); - // deprecated - counter!( - "events_in_total", self.count as u64, + "component_received_event_bytes_total", self.byte_size.get() as u64, "endpoint" => self.endpoint.to_owned(), ); } diff --git a/src/internal_events/parser.rs b/src/internal_events/parser.rs index 739d33199202e..07826e3b6f155 100644 --- a/src/internal_events/parser.rs +++ b/src/internal_events/parser.rs @@ -42,8 +42,6 @@ impl InternalEvent for ParserMatchError<'_> { "error_type" => error_type::CONDITION_FAILED, "stage" => error_stage::PROCESSING, ); - // deprecated - counter!("processing_errors_total", 1, "error_type" => "failed_match"); } } @@ -75,8 +73,6 @@ impl InternalEvent for ParserMissingFieldError<'_, DROP_ "stage" => error_stage::PROCESSING, "field" => self.field.to_string(), ); - // deprecated - counter!("processing_errors_total", 1, "error_type" => "missing_field"); if DROP_EVENT { emit!(ComponentEventsDropped:: { count: 1, reason }); @@ -108,8 +104,6 @@ impl<'a> InternalEvent for ParserConversionError<'a> { "stage" => error_stage::PROCESSING, "name" => self.name.to_string(), ); - // deprecated - counter!("processing_errors_total", 1, "error_type" => "type_conversion_failed"); } } diff --git a/src/internal_events/remap.rs b/src/internal_events/remap.rs index e17f159c403b6..666a1a59a0674 100644 --- a/src/internal_events/remap.rs +++ b/src/internal_events/remap.rs @@ -34,8 +34,6 @@ impl InternalEvent for RemapMappingError { reason: "Mapping failed with event.", }); } - // deprecated - counter!("processing_errors_total", 1); } } diff --git a/src/internal_events/sample.rs b/src/internal_events/sample.rs index 04cb631801483..cded9a1a13343 100644 --- a/src/internal_events/sample.rs +++ b/src/internal_events/sample.rs @@ -1,5 +1,4 @@ use crate::emit; -use metrics::counter; use vector_core::internal_event::{ComponentEventsDropped, InternalEvent, INTENTIONAL}; #[derive(Debug)] @@ -7,7 +6,6 @@ pub struct SampleEventDiscarded; impl InternalEvent for SampleEventDiscarded { fn emit(self) { - counter!("events_discarded_total", 1); // Deprecated. emit!(ComponentEventsDropped:: { count: 1, reason: "Sample discarded." diff --git a/src/internal_events/sematext_metrics.rs b/src/internal_events/sematext_metrics.rs index 6d9c1dc155ef6..fea41b796c9f1 100644 --- a/src/internal_events/sematext_metrics.rs +++ b/src/internal_events/sematext_metrics.rs @@ -29,8 +29,6 @@ impl<'a> InternalEvent for SematextMetricsInvalidMetricError<'a> { "error_type" => error_type::ENCODER_FAILED, "stage" => error_stage::PROCESSING, ); - // deprecated - counter!("processing_errors_total", 1); emit!(ComponentEventsDropped:: { count: 1, reason }); } diff --git a/src/internal_events/socket.rs b/src/internal_events/socket.rs index 468701a1fa194..daa03a27991b5 100644 --- a/src/internal_events/socket.rs +++ b/src/internal_events/socket.rs @@ -1,5 +1,8 @@ use metrics::counter; -use vector_common::internal_event::{error_stage, error_type}; +use vector_common::{ + internal_event::{error_stage, error_type}, + json_size::JsonSize, +}; use vector_core::internal_event::{ComponentEventsDropped, InternalEvent, UNINTENTIONAL}; use crate::emit; @@ -13,7 +16,7 @@ pub enum SocketMode { } impl SocketMode { - const fn as_str(self) -> &'static str { + pub const fn as_str(self) -> &'static str { match self { Self::Tcp => "tcp", Self::Udp => "udp", @@ -45,7 +48,7 @@ impl InternalEvent for SocketBytesReceived { #[derive(Debug)] pub struct SocketEventsReceived { pub mode: SocketMode, - pub byte_size: usize, + pub byte_size: JsonSize, pub count: usize, } @@ -55,13 +58,11 @@ impl InternalEvent for SocketEventsReceived { trace!( message = "Events received.", count = self.count, - byte_size = self.byte_size, + byte_size = self.byte_size.get(), %mode, ); counter!("component_received_events_total", self.count as u64, "mode" => mode); - counter!("component_received_event_bytes_total", self.byte_size as u64, "mode" => mode); - // deprecated - counter!("events_in_total", self.count as u64, "mode" => mode); + counter!("component_received_event_bytes_total", self.byte_size.get() as u64, "mode" => mode); } } @@ -90,14 +91,14 @@ impl InternalEvent for SocketBytesSent { pub struct SocketEventsSent { pub mode: SocketMode, pub count: u64, - pub byte_size: usize, + pub byte_size: JsonSize, } impl InternalEvent for SocketEventsSent { fn emit(self) { - trace!(message = "Events sent.", count = %self.count, byte_size = %self.byte_size); + trace!(message = "Events sent.", count = %self.count, byte_size = %self.byte_size.get()); counter!("component_sent_events_total", self.count, "mode" => self.mode.as_str()); - counter!("component_sent_event_bytes_total", self.byte_size as u64, "mode" => self.mode.as_str()); + counter!("component_sent_event_bytes_total", self.byte_size.get() as u64, "mode" => self.mode.as_str()); } } diff --git a/src/internal_events/statsd_sink.rs b/src/internal_events/statsd_sink.rs index 248cddf8cfd5f..20766f3376aad 100644 --- a/src/internal_events/statsd_sink.rs +++ b/src/internal_events/statsd_sink.rs @@ -10,7 +10,7 @@ use vector_common::internal_event::{ #[derive(Debug)] pub struct StatsdInvalidMetricError<'a> { pub value: &'a MetricValue, - pub kind: &'a MetricKind, + pub kind: MetricKind, } impl<'a> InternalEvent for StatsdInvalidMetricError<'a> { @@ -31,8 +31,6 @@ impl<'a> InternalEvent for StatsdInvalidMetricError<'a> { "error_type" => error_type::ENCODER_FAILED, "stage" => error_stage::PROCESSING, ); - // deprecated - counter!("processing_errors_total", 1); emit!(ComponentEventsDropped:: { reason, count: 1 }); } diff --git a/src/internal_events/template.rs b/src/internal_events/template.rs index b1265d5b83924..b9ecd702d0dca 100644 --- a/src/internal_events/template.rs +++ b/src/internal_events/template.rs @@ -19,32 +19,33 @@ impl<'a> InternalEvent for TemplateRenderingError<'a> { } msg.push('.'); - error!( - message = %msg, - error = %self.error, - error_type = error_type::TEMPLATE_FAILED, - stage = error_stage::PROCESSING, - internal_log_rate_limit = true, - ); - - counter!( - "component_errors_total", 1, - "error_type" => error_type::TEMPLATE_FAILED, - "stage" => error_stage::PROCESSING, - ); - - // deprecated - counter!("processing_errors_total", 1, - "error_type" => "render_error"); - if self.drop_event { + error!( + message = %msg, + error = %self.error, + error_type = error_type::TEMPLATE_FAILED, + stage = error_stage::PROCESSING, + internal_log_rate_limit = true, + ); + + counter!( + "component_errors_total", 1, + "error_type" => error_type::TEMPLATE_FAILED, + "stage" => error_stage::PROCESSING, + ); + emit!(ComponentEventsDropped:: { count: 1, reason: "Failed to render template.", }); - - // deprecated - counter!("events_discarded_total", 1); + } else { + warn!( + message = %msg, + error = %self.error, + error_type = error_type::TEMPLATE_FAILED, + stage = error_stage::PROCESSING, + internal_log_rate_limit = true, + ); } } } diff --git a/src/internal_events/throttle.rs b/src/internal_events/throttle.rs index 18f42e2f4a802..17ceeaac95590 100644 --- a/src/internal_events/throttle.rs +++ b/src/internal_events/throttle.rs @@ -9,7 +9,15 @@ pub(crate) struct ThrottleEventDiscarded { impl InternalEvent for ThrottleEventDiscarded { fn emit(self) { - debug!(message = "Rate limit exceeded.", key = ?self.key); // Deprecated. + // TODO: Technically, the Component Specification states that the discarded events metric + // must _only_ have the `intentional` tag, in addition to the core tags like + // `component_kind`, etc, and nothing else. + // + // That doesn't give us the leeway to specify which throttle bucket the events are being + // discarded for... but including the key/bucket as a tag does seem useful and so I wonder + // if we should change the specification wording? Sort of a similar situation to the + // `error_code` tag for the component errors metric, where it's meant to be optional and + // only specified when relevant. counter!( "events_discarded_total", 1, "key" => self.key, diff --git a/src/internal_events/unix.rs b/src/internal_events/unix.rs index a74c2b7e2c12b..2f004ec38b826 100644 --- a/src/internal_events/unix.rs +++ b/src/internal_events/unix.rs @@ -90,6 +90,34 @@ impl InternalEvent for UnixSocketSendError<'_, E> { } } +#[derive(Debug)] +pub struct UnixSendIncompleteError { + pub data_size: usize, + pub sent: usize, +} + +impl InternalEvent for UnixSendIncompleteError { + fn emit(self) { + let reason = "Could not send all data in one Unix datagram."; + error!( + message = reason, + data_size = self.data_size, + sent = self.sent, + dropped = self.data_size - self.sent, + error_type = error_type::WRITER_FAILED, + stage = error_stage::SENDING, + internal_log_rate_limit = true, + ); + counter!( + "component_errors_total", 1, + "error_type" => error_type::WRITER_FAILED, + "stage" => error_stage::SENDING, + ); + + emit!(ComponentEventsDropped:: { count: 1, reason }); + } +} + #[derive(Debug)] pub struct UnixSocketFileDeleteError<'a> { pub path: &'a Path, diff --git a/src/lib.rs b/src/lib.rs index cd52b86412209..a9d0a1d828dfc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -18,17 +18,9 @@ #![deny(clippy::disallowed_methods)] // [nursery] mark some functions as verboten #![deny(clippy::missing_const_for_fn)] // [nursery] valuable to the optimizer, but may produce false positives #![allow( - clippy::arc_with_non_send_sync, - clippy::default_constructed_unit_structs, clippy::explicit_iter_loop, - clippy::incorrect_clone_impl_on_copy_type, - clippy::manual_range_patterns, - clippy::missing_fields_in_debug, clippy::missing_panics_doc, clippy::needless_lifetimes, - clippy::needless_pub_self, - clippy::needless_raw_string_hashes, - clippy::non_minimal_cfg, clippy::redundant_closure_call, clippy::redundant_pattern_matching, clippy::useless_conversion, @@ -100,6 +92,7 @@ pub mod line_agg; pub mod list; #[cfg(any(feature = "sources-nats", feature = "sinks-nats"))] pub(crate) mod nats; +pub mod net; #[allow(unreachable_pub)] pub(crate) mod proto; pub mod providers; @@ -129,7 +122,6 @@ pub mod trace; #[allow(unreachable_pub)] pub mod transforms; pub mod types; -pub mod udp; pub mod unit_test; pub(crate) mod utilization; pub mod validate; diff --git a/src/main.rs b/src/main.rs index 1859eff381c06..66818155ab9ea 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,8 +3,10 @@ extern crate vector; use vector::app::Application; +use std::process::ExitCode; + #[cfg(unix)] -fn main() { +fn main() -> ExitCode { #[cfg(feature = "allocation-tracing")] { use crate::vector::internal_telemetry::allocations::{ @@ -35,14 +37,17 @@ fn main() { } } - Application::run(); + let exit_code = Application::run().code().unwrap_or(exitcode::UNAVAILABLE) as u8; + ExitCode::from(exit_code) } #[cfg(windows)] -pub fn main() { +pub fn main() -> ExitCode { // We need to be able to run vector in User Interactive mode. We first try // to run vector as a service. If we fail, we consider that we are in // interactive mode and then fallback to console mode. See // https://docs.microsoft.com/en-us/dotnet/api/system.environment.userinteractive?redirectedfrom=MSDN&view=netcore-3.1#System_Environment_UserInteractive - vector::vector_windows::run().unwrap_or_else(|_| Application::run()); + let exit_code = vector::vector_windows::run() + .unwrap_or_else(|_| Application::run().code().unwrap_or(exitcode::UNAVAILABLE)); + ExitCode::from(exit_code as u8) } diff --git a/src/net.rs b/src/net.rs new file mode 100644 index 0000000000000..f62abfa32974a --- /dev/null +++ b/src/net.rs @@ -0,0 +1,52 @@ +//! Networking-related helper functions. + +use std::{io, time::Duration}; + +use socket2::{SockRef, TcpKeepalive}; +use tokio::net::TcpStream; + +/// Sets the receive buffer size for a socket. +/// +/// This is the equivalent of setting the `SO_RCVBUF` socket setting directly. +/// +/// # Errors +/// +/// If there is an error setting the receive buffer size on the given socket, or if the value given +/// as the socket is not a valid socket, an error variant will be returned explaining the underlying +/// I/O error. +pub fn set_receive_buffer_size<'s, S>(socket: &'s S, size: usize) -> io::Result<()> +where + SockRef<'s>: From<&'s S>, +{ + SockRef::from(socket).set_recv_buffer_size(size) +} + +/// Sets the send buffer size for a socket. +/// +/// This is the equivalent of setting the `SO_SNDBUF` socket setting directly. +/// +/// # Errors +/// +/// If there is an error setting the send buffer size on the given socket, or if the value given +/// as the socket is not a valid socket, an error variant will be returned explaining the underlying +/// I/O error. +pub fn set_send_buffer_size<'s, S>(socket: &'s S, size: usize) -> io::Result<()> +where + SockRef<'s>: From<&'s S>, +{ + SockRef::from(socket).set_send_buffer_size(size) +} + +/// Sets the TCP keepalive behavior on a socket. +/// +/// This is the equivalent of setting the `SO_KEEPALIVE` and `TCP_KEEPALIVE` socket settings +/// directly. +/// +/// # Errors +/// +/// If there is an error with either enabling keepalive probes or setting the TCP keepalive idle +/// timeout on the given socket, an error variant will be returned explaining the underlying I/O +/// error. +pub fn set_keepalive(socket: &TcpStream, ttl: Duration) -> io::Result<()> { + SockRef::from(socket).set_tcp_keepalive(&TcpKeepalive::new().with_time(ttl)) +} diff --git a/src/proto.rs b/src/proto.rs deleted file mode 100644 index b77e94c30f793..0000000000000 --- a/src/proto.rs +++ /dev/null @@ -1,5 +0,0 @@ -#[cfg(any(feature = "sources-vector", feature = "sinks-vector"))] -use crate::event::proto as event; - -#[cfg(any(feature = "sources-vector", feature = "sinks-vector"))] -pub mod vector; diff --git a/src/proto/mod.rs b/src/proto/mod.rs new file mode 100644 index 0000000000000..efa1728fb6988 --- /dev/null +++ b/src/proto/mod.rs @@ -0,0 +1,19 @@ +#[cfg(any(feature = "sources-vector", feature = "sinks-vector"))] +use crate::event::proto as event; + +#[cfg(any(feature = "sources-vector", feature = "sinks-vector"))] +pub mod vector; + +#[cfg(feature = "sinks-datadog_metrics")] +pub mod fds { + use once_cell::sync::OnceCell; + use prost_reflect::DescriptorPool; + + pub fn protobuf_descriptors() -> &'static DescriptorPool { + static PROTOBUF_FDS: OnceCell = OnceCell::new(); + PROTOBUF_FDS.get_or_init(|| { + DescriptorPool::decode(include_bytes!(concat!(env!("OUT_DIR"), "/protobuf-fds.bin")).as_ref()) + .expect("should not fail to decode protobuf file descriptor set generated from build script") + }) + } +} diff --git a/src/sinks/amqp/config.rs b/src/sinks/amqp/config.rs index 3af34e943d7b2..b266bb1892370 100644 --- a/src/sinks/amqp/config.rs +++ b/src/sinks/amqp/config.rs @@ -1,17 +1,8 @@ //! Configuration functionality for the `AMQP` sink. -use crate::{ - amqp::AmqpConfig, - codecs::EncodingConfig, - config::{DataType, GenerateConfig, Input, SinkConfig, SinkContext}, - sinks::{Healthcheck, VectorSink}, - template::Template, -}; +use crate::{amqp::AmqpConfig, sinks::prelude::*}; use codecs::TextSerializerConfig; -use futures::FutureExt; use lapin::{types::ShortString, BasicProperties}; use std::sync::Arc; -use vector_config::configurable_component; -use vector_core::config::AcknowledgementsConfig; use super::sink::AmqpSink; @@ -45,7 +36,10 @@ impl AmqpPropertiesConfig { /// Configuration for the `amqp` sink. /// /// Supports AMQP version 0.9.1 -#[configurable_component(sink("amqp"))] +#[configurable_component(sink( + "amqp", + "Send events to AMQP 0.9.1 compatible brokers like RabbitMQ." +))] #[derive(Clone, Debug)] pub struct AmqpSinkConfig { /// The exchange to publish messages to. @@ -98,6 +92,7 @@ impl GenerateConfig for AmqpSinkConfig { } #[async_trait::async_trait] +#[typetag::serde(name = "amqp")] impl SinkConfig for AmqpSinkConfig { async fn build(&self, _cx: SinkContext) -> crate::Result<(VectorSink, Healthcheck)> { let sink = AmqpSink::new(self.clone()).await?; diff --git a/src/sinks/amqp/encoder.rs b/src/sinks/amqp/encoder.rs index 6e86828c82923..d3d449811372f 100644 --- a/src/sinks/amqp/encoder.rs +++ b/src/sinks/amqp/encoder.rs @@ -1,8 +1,5 @@ //! Encoding for the `AMQP` sink. -use crate::{ - event::Event, - sinks::util::encoding::{write_all, Encoder}, -}; +use crate::sinks::prelude::*; use bytes::BytesMut; use std::io; use tokio_util::codec::Encoder as _; @@ -13,7 +10,7 @@ pub(super) struct AmqpEncoder { pub(super) transformer: crate::codecs::Transformer, } -impl Encoder for AmqpEncoder { +impl encoding::Encoder for AmqpEncoder { fn encode_input(&self, mut input: Event, writer: &mut dyn io::Write) -> io::Result { let mut body = BytesMut::new(); self.transformer.transform(&mut input); diff --git a/src/sinks/amqp/integration_tests.rs b/src/sinks/amqp/integration_tests.rs index ba8a9ee557022..08db9c6e46b27 100644 --- a/src/sinks/amqp/integration_tests.rs +++ b/src/sinks/amqp/integration_tests.rs @@ -72,7 +72,7 @@ async fn amqp_happy_path() { .await .unwrap(); - let cx = SinkContext::new_test(); + let cx = SinkContext::default(); let (sink, healthcheck) = config.build(cx).await.unwrap(); healthcheck.await.expect("Health check failed"); @@ -153,7 +153,7 @@ async fn amqp_round_trip() { .await .unwrap(); - let cx = SinkContext::new_test(); + let cx = SinkContext::default(); let (amqp_sink, healthcheck) = config.build(cx).await.unwrap(); healthcheck.await.expect("Health check failed"); diff --git a/src/sinks/amqp/request_builder.rs b/src/sinks/amqp/request_builder.rs index 313b37626aec8..13aaeab81cfcd 100644 --- a/src/sinks/amqp/request_builder.rs +++ b/src/sinks/amqp/request_builder.rs @@ -1,20 +1,10 @@ //! Request builder for the `AMQP` sink. //! Responsible for taking the event (which includes rendered template values) and turning //! it into the raw bytes and other data needed to send the request to `AMQP`. -use crate::{ - event::Event, - sinks::util::{ - metadata::RequestMetadataBuilder, request_builder::EncodeResult, Compression, - RequestBuilder, - }, -}; +use crate::sinks::prelude::*; use bytes::Bytes; use lapin::BasicProperties; use std::io; -use vector_common::{ - finalization::{EventFinalizers, Finalizable}, - request_metadata::RequestMetadata, -}; use super::{encoder::AmqpEncoder, service::AmqpRequest, sink::AmqpEvent}; @@ -52,7 +42,7 @@ impl RequestBuilder for AmqpRequestBuilder { &self, mut input: AmqpEvent, ) -> (Self::Metadata, RequestMetadataBuilder, Self::Events) { - let builder = RequestMetadataBuilder::from_events(&input); + let builder = RequestMetadataBuilder::from_event(&input.event); let metadata = AmqpMetadata { exchange: input.exchange, diff --git a/src/sinks/amqp/service.rs b/src/sinks/amqp/service.rs index 000c753ec97e2..42ccf467e5692 100644 --- a/src/sinks/amqp/service.rs +++ b/src/sinks/amqp/service.rs @@ -1,6 +1,9 @@ //! The main tower service that takes the request created by the request builder //! and sends it to `AMQP`. -use crate::internal_events::sink::{AmqpAcknowledgementError, AmqpDeliveryError}; +use crate::{ + internal_events::sink::{AmqpAcknowledgementError, AmqpDeliveryError}, + sinks::prelude::*, +}; use bytes::Bytes; use futures::future::BoxFuture; use lapin::{options::BasicPublishOptions, BasicProperties}; @@ -9,13 +12,6 @@ use std::{ sync::Arc, task::{Context, Poll}, }; -use tower::Service; -use vector_common::{ - finalization::{EventFinalizers, EventStatus, Finalizable}, - internal_event::CountByteSize, - request_metadata::{MetaDescriptive, RequestMetadata}, -}; -use vector_core::stream::DriverResponse; /// The request contains the data to send to `AMQP` together /// with the information need to route the message. @@ -55,14 +51,19 @@ impl Finalizable for AmqpRequest { } impl MetaDescriptive for AmqpRequest { - fn get_metadata(&self) -> RequestMetadata { - self.metadata + fn get_metadata(&self) -> &RequestMetadata { + &self.metadata + } + + fn metadata_mut(&mut self) -> &mut RequestMetadata { + &mut self.metadata } } /// A successful response from `AMQP`. pub(super) struct AmqpResponse { byte_size: usize, + json_size: GroupedCountByteSize, } impl DriverResponse for AmqpResponse { @@ -70,8 +71,8 @@ impl DriverResponse for AmqpResponse { EventStatus::Delivered } - fn events_sent(&self) -> CountByteSize { - CountByteSize(1, self.byte_size) + fn events_sent(&self) -> &GroupedCountByteSize { + &self.json_size } fn bytes_sent(&self) -> Option { @@ -128,14 +129,20 @@ impl Service for AmqpService { Ok(result) => match result.await { Ok(lapin::publisher_confirm::Confirmation::Nack(_)) => { warn!("Received Negative Acknowledgement from AMQP server."); - Ok(AmqpResponse { byte_size }) + Ok(AmqpResponse { + json_size: req.metadata.into_events_estimated_json_encoded_byte_size(), + byte_size, + }) } Err(error) => { // TODO: In due course the caller could emit these on error. emit!(AmqpAcknowledgementError { error: &error }); Err(AmqpError::AmqpAcknowledgementFailed { error }) } - Ok(_) => Ok(AmqpResponse { byte_size }), + Ok(_) => Ok(AmqpResponse { + json_size: req.metadata.into_events_estimated_json_encoded_byte_size(), + byte_size, + }), }, Err(error) => { // TODO: In due course the caller could emit these on error. diff --git a/src/sinks/amqp/sink.rs b/src/sinks/amqp/sink.rs index 333b325dbeab4..287b002b935f2 100644 --- a/src/sinks/amqp/sink.rs +++ b/src/sinks/amqp/sink.rs @@ -1,18 +1,9 @@ //! The sink for the `AMQP` sink that wires together the main stream that takes the //! event and sends it to `AMQP`. -use crate::{ - codecs::Transformer, event::Event, internal_events::TemplateRenderingError, - sinks::util::builder::SinkBuilderExt, template::Template, -}; -use async_trait::async_trait; -use futures::StreamExt; -use futures_util::stream::BoxStream; +use crate::sinks::prelude::*; use lapin::{options::ConfirmSelectOptions, BasicProperties}; use serde::Serialize; use std::sync::Arc; -use tower::ServiceBuilder; -use vector_buffers::EventCount; -use vector_core::{sink::StreamSink, ByteSizeOf, EstimatedJsonEncodedSizeOf}; use super::{ config::{AmqpPropertiesConfig, AmqpSinkConfig}, @@ -35,25 +26,6 @@ pub(super) struct AmqpEvent { pub(super) properties: BasicProperties, } -impl EventCount for AmqpEvent { - fn event_count(&self) -> usize { - // An AmqpEvent represents one event. - 1 - } -} - -impl ByteSizeOf for AmqpEvent { - fn allocated_bytes(&self) -> usize { - self.event.size_of() - } -} - -impl EstimatedJsonEncodedSizeOf for AmqpEvent { - fn estimated_json_encoded_size_of(&self) -> usize { - self.event.estimated_json_encoded_size_of() - } -} - pub(super) struct AmqpSink { pub(super) channel: Arc, exchange: Template, diff --git a/src/sinks/appsignal/mod.rs b/src/sinks/appsignal/mod.rs index 3007c6089513b..7b10fe02f7a8f 100644 --- a/src/sinks/appsignal/mod.rs +++ b/src/sinks/appsignal/mod.rs @@ -45,7 +45,7 @@ enum FinishError { } /// Configuration for the `appsignal` sink. -#[configurable_component(sink("appsignal"))] +#[configurable_component(sink("appsignal", "Send events to AppSignal."))] #[derive(Clone, Debug, Default)] pub struct AppsignalSinkConfig { /// The URI for the AppSignal API to send data to. @@ -106,6 +106,7 @@ impl SinkBatchSettings for AppsignalDefaultBatchSettings { impl_generate_config_from_default!(AppsignalSinkConfig); #[async_trait::async_trait] +#[typetag::serde(name = "appsignal")] impl SinkConfig for AppsignalSinkConfig { async fn build( &self, @@ -137,6 +138,7 @@ impl SinkConfig for AppsignalSinkConfig { ) .boxed(); + #[allow(deprecated)] Ok((super::VectorSink::from_event_sink(sink), healthcheck)) } @@ -250,7 +252,7 @@ mod test { .expect("config should be valid"); config.endpoint = mock_endpoint.to_string(); - let context = SinkContext::new_test(); + let context = SinkContext::default(); let (sink, _healthcheck) = config.build(context).await.unwrap(); let event = Event::Log(LogEvent::from("simple message")); diff --git a/src/sinks/aws_cloudwatch_logs/config.rs b/src/sinks/aws_cloudwatch_logs/config.rs index 52833704533ce..8d8f7795f299a 100644 --- a/src/sinks/aws_cloudwatch_logs/config.rs +++ b/src/sinks/aws_cloudwatch_logs/config.rs @@ -49,7 +49,10 @@ impl ClientBuilder for CloudwatchLogsClientBuilder { } /// Configuration for the `aws_cloudwatch_logs` sink. -#[configurable_component(sink("aws_cloudwatch_logs"))] +#[configurable_component(sink( + "aws_cloudwatch_logs", + "Publish log events to AWS CloudWatch Logs." +))] #[derive(Clone, Debug)] #[serde(deny_unknown_fields)] pub struct CloudwatchLogsSinkConfig { @@ -136,7 +139,7 @@ impl CloudwatchLogsSinkConfig { create_client::( &self.auth, self.region.region(), - self.region.endpoint()?, + self.region.endpoint(), proxy, &self.tls, true, @@ -161,6 +164,7 @@ impl CloudwatchLogsSinkConfig { } #[async_trait::async_trait] +#[typetag::serde(name = "aws_cloudwatch_logs")] impl SinkConfig for CloudwatchLogsSinkConfig { async fn build(&self, cx: SinkContext) -> crate::Result<(VectorSink, Healthcheck)> { let batcher_settings = self.batch.into_batcher_settings()?; diff --git a/src/sinks/aws_cloudwatch_logs/integration_tests.rs b/src/sinks/aws_cloudwatch_logs/integration_tests.rs index a8b85e8428504..c0dbf0204260a 100644 --- a/src/sinks/aws_cloudwatch_logs/integration_tests.rs +++ b/src/sinks/aws_cloudwatch_logs/integration_tests.rs @@ -1,12 +1,10 @@ use std::convert::TryFrom; -use std::str::FromStr; use aws_sdk_cloudwatchlogs::Client as CloudwatchLogsClient; -use aws_sdk_cloudwatchlogs::{Endpoint, Region}; +use aws_sdk_cloudwatchlogs::Region; use chrono::Duration; use codecs::TextSerializerConfig; use futures::{stream, StreamExt}; -use http::Uri; use similar_asserts::assert_eq; use super::*; @@ -53,7 +51,7 @@ async fn cloudwatch_insert_log_event() { acknowledgements: Default::default(), }; - let (sink, _) = config.build(SinkContext::new_test()).await.unwrap(); + let (sink, _) = config.build(SinkContext::default()).await.unwrap(); let timestamp = chrono::Utc::now(); @@ -103,7 +101,7 @@ async fn cloudwatch_insert_log_events_sorted() { acknowledgements: Default::default(), }; - let (sink, _) = config.build(SinkContext::new_test()).await.unwrap(); + let (sink, _) = config.build(SinkContext::default()).await.unwrap(); let timestamp = chrono::Utc::now() - Duration::days(1); @@ -178,7 +176,7 @@ async fn cloudwatch_insert_out_of_range_timestamp() { acknowledgements: Default::default(), }; - let (sink, _) = config.build(SinkContext::new_test()).await.unwrap(); + let (sink, _) = config.build(SinkContext::default()).await.unwrap(); let now = chrono::Utc::now(); @@ -257,7 +255,7 @@ async fn cloudwatch_dynamic_group_and_stream_creation() { acknowledgements: Default::default(), }; - let (sink, _) = config.build(SinkContext::new_test()).await.unwrap(); + let (sink, _) = config.build(SinkContext::default()).await.unwrap(); let timestamp = chrono::Utc::now(); @@ -312,7 +310,7 @@ async fn cloudwatch_insert_log_event_batched() { acknowledgements: Default::default(), }; - let (sink, _) = config.build(SinkContext::new_test()).await.unwrap(); + let (sink, _) = config.build(SinkContext::default()).await.unwrap(); let timestamp = chrono::Utc::now(); @@ -362,7 +360,7 @@ async fn cloudwatch_insert_log_event_partitioned() { acknowledgements: Default::default(), }; - let (sink, _) = config.build(SinkContext::new_test()).await.unwrap(); + let (sink, _) = config.build(SinkContext::default()).await.unwrap(); let timestamp = chrono::Utc::now(); @@ -461,10 +459,7 @@ async fn cloudwatch_healthcheck() { async fn create_client_test() -> CloudwatchLogsClient { let auth = AwsAuthentication::test_auth(); let region = Some(Region::new("localstack")); - let watchlogs_address = watchlogs_address(); - let endpoint = Some(Endpoint::immutable( - Uri::from_str(&watchlogs_address).unwrap(), - )); + let endpoint = Some(watchlogs_address()); let proxy = ProxyConfig::default(); create_client::(&auth, region, endpoint, &proxy, &None, true) diff --git a/src/sinks/aws_cloudwatch_logs/request.rs b/src/sinks/aws_cloudwatch_logs/request.rs index 64a41948c3f7f..c8f85d399dd69 100644 --- a/src/sinks/aws_cloudwatch_logs/request.rs +++ b/src/sinks/aws_cloudwatch_logs/request.rs @@ -14,8 +14,9 @@ use aws_sdk_cloudwatchlogs::model::InputLogEvent; use aws_sdk_cloudwatchlogs::output::{DescribeLogStreamsOutput, PutLogEventsOutput}; use aws_sdk_cloudwatchlogs::types::SdkError; use aws_sdk_cloudwatchlogs::Client as CloudwatchLogsClient; -use aws_smithy_http::operation::{Operation, Request}; use futures::{future::BoxFuture, FutureExt}; +use http::header::HeaderName; +use http::HeaderValue; use indexmap::IndexMap; use tokio::sync::oneshot; @@ -103,9 +104,9 @@ impl Future for CloudwatchFuture { let response = match ready!(fut.poll_unpin(cx)) { Ok(response) => response, Err(err) => { - if let SdkError::ServiceError { err, raw: _ } = &err { + if let SdkError::ServiceError(inner) = &err { if let DescribeLogStreamsErrorKind::ResourceNotFoundException(_) = - err.kind + inner.err().kind { if self.create_missing_group { info!("Log group provided does not exist; creating a new one."); @@ -150,8 +151,8 @@ impl Future for CloudwatchFuture { Ok(_) => {} Err(err) => { let resource_already_exists = match &err { - SdkError::ServiceError { err, raw: _ } => matches!( - err.kind, + SdkError::ServiceError(inner) => matches!( + inner.err().kind, CreateLogGroupErrorKind::ResourceAlreadyExistsException(_) ), _ => false, @@ -175,8 +176,8 @@ impl Future for CloudwatchFuture { Ok(_) => {} Err(err) => { let resource_already_exists = match &err { - SdkError::ServiceError { err, raw: _ } => matches!( - err.kind, + SdkError::ServiceError(inner) => matches!( + inner.err().kind, CreateLogStreamErrorKind::ResourceAlreadyExistsException(_) ), _ => false, @@ -229,39 +230,34 @@ impl Client { let group_name = self.group_name.clone(); let stream_name = self.stream_name.clone(); let headers = self.headers.clone(); + Box::pin(async move { // #12760 this is a relatively convoluted way of changing the headers of a request // about to be sent. https://github.com/awslabs/aws-sdk-rust/issues/537 should // eventually make this better. - let op = PutLogEvents::builder() + let mut op = PutLogEvents::builder() .set_log_events(Some(log_events)) .set_sequence_token(sequence_token) .log_group_name(group_name) .log_stream_name(stream_name) .build() - .map_err(|err| SdkError::ConstructionFailure(err.into()))? + .map_err(SdkError::construction_failure)? .make_operation(cw_client.conf()) .await - .map_err(|err| SdkError::ConstructionFailure(err.into()))?; + .map_err(SdkError::construction_failure)?; - let (req, parts) = op.into_request_response(); - let (mut body, props) = req.into_parts(); for (header, value) in headers.iter() { let owned_header = header.clone(); let owned_value = value.clone(); - body.headers_mut().insert( - http::header::HeaderName::from_bytes(owned_header.as_bytes()) - .map_err(|err| SdkError::ConstructionFailure(err.into()))?, - http::HeaderValue::from_str(owned_value.as_str()) - .map_err(|err| SdkError::ConstructionFailure(err.into()))?, + op.request_mut().headers_mut().insert( + HeaderName::from_bytes(owned_header.as_bytes()) + .map_err(SdkError::construction_failure)?, + HeaderValue::from_str(owned_value.as_str()) + .map_err(SdkError::construction_failure)?, ); } - client - .call(Operation::from_parts( - Request::from_parts(body, props), - parts, - )) - .await + + client.call(op).await }) } diff --git a/src/sinks/aws_cloudwatch_logs/request_builder.rs b/src/sinks/aws_cloudwatch_logs/request_builder.rs index 0d2b63fa3322d..edbf4a752233c 100644 --- a/src/sinks/aws_cloudwatch_logs/request_builder.rs +++ b/src/sinks/aws_cloudwatch_logs/request_builder.rs @@ -39,8 +39,12 @@ impl Finalizable for CloudwatchRequest { } impl MetaDescriptive for CloudwatchRequest { - fn get_metadata(&self) -> RequestMetadata { - self.metadata + fn get_metadata(&self) -> &RequestMetadata { + &self.metadata + } + + fn metadata_mut(&mut self) -> &mut RequestMetadata { + &mut self.metadata } } @@ -87,7 +91,7 @@ impl CloudwatchRequestBuilder { self.transformer.transform(&mut event); let mut message_bytes = BytesMut::new(); - let builder = RequestMetadataBuilder::from_events(&event); + let builder = RequestMetadataBuilder::from_event(&event); if self.encoder.encode(event, &mut message_bytes).is_err() { // The encoder handles internal event emission for Error and EventsDropped. diff --git a/src/sinks/aws_cloudwatch_logs/retry.rs b/src/sinks/aws_cloudwatch_logs/retry.rs index f3a03e48645e4..c089f532f5dd0 100644 --- a/src/sinks/aws_cloudwatch_logs/retry.rs +++ b/src/sinks/aws_cloudwatch_logs/retry.rs @@ -32,11 +32,13 @@ impl RetryLogic for CloudwatchRetryLogic { type Error = CloudwatchError; type Response = T; + // TODO this match may not be necessary given the logic in `is_retriable_error()` #[allow(clippy::cognitive_complexity)] // long, but just a hair over our limit fn is_retriable_error(&self, error: &Self::Error) -> bool { match error { CloudwatchError::Put(err) => { - if let SdkError::ServiceError { err, raw: _ } = err { + if let SdkError::ServiceError(inner) = err { + let err = inner.err(); if let PutLogEventsErrorKind::ServiceUnavailableException(_) = err.kind { return true; } @@ -44,7 +46,8 @@ impl RetryLogic for CloudwatchRetryLogic { is_retriable_error(err) } CloudwatchError::Describe(err) => { - if let SdkError::ServiceError { err, raw: _ } = err { + if let SdkError::ServiceError(inner) = err { + let err = inner.err(); if let DescribeLogStreamsErrorKind::ServiceUnavailableException(_) = err.kind { return true; } @@ -52,7 +55,8 @@ impl RetryLogic for CloudwatchRetryLogic { is_retriable_error(err) } CloudwatchError::CreateStream(err) => { - if let SdkError::ServiceError { err, raw: _ } = err { + if let SdkError::ServiceError(inner) = err { + let err = inner.err(); if let CreateLogStreamErrorKind::ServiceUnavailableException(_) = err.kind { return true; } @@ -66,7 +70,7 @@ impl RetryLogic for CloudwatchRetryLogic { #[cfg(test)] mod test { - use aws_sdk_cloudwatchlogs::error::{PutLogEventsError, PutLogEventsErrorKind}; + use aws_sdk_cloudwatchlogs::error::PutLogEventsError; use aws_sdk_cloudwatchlogs::types::SdkError; use aws_smithy_http::body::SdkBody; use aws_smithy_http::operation::Response; @@ -89,13 +93,10 @@ mod test { *http_response.status_mut() = http::StatusCode::BAD_REQUEST; let raw = Response::new(http_response); - let err = CloudwatchError::Put(SdkError::ServiceError { - err: PutLogEventsError::new( - PutLogEventsErrorKind::Unhandled(Box::new(meta_err.clone())), - meta_err, - ), + let err = CloudwatchError::Put(SdkError::service_error( + PutLogEventsError::unhandled(meta_err), raw, - }); + )); assert!(retry_logic.is_retriable_error(&err)); } } diff --git a/src/sinks/aws_cloudwatch_logs/service.rs b/src/sinks/aws_cloudwatch_logs/service.rs index 8bebe320b8715..f5341986d1ab5 100644 --- a/src/sinks/aws_cloudwatch_logs/service.rs +++ b/src/sinks/aws_cloudwatch_logs/service.rs @@ -4,6 +4,16 @@ use std::{ task::{ready, Context, Poll}, }; +use crate::sinks::prelude::{ + MezmoLoggingService, SinkContext, UserLoggingError, UserLoggingResponse, +}; +use crate::sinks::{ + aws_cloudwatch_logs::{ + config::CloudwatchLogsSinkConfig, request, retry::CloudwatchRetryLogic, + sink::BatchCloudwatchRequest, CloudwatchKey, + }, + util::{retries::FixedRetryPolicy, EncodedLength, TowerRequestConfig, TowerRequestSettings}, +}; use aws_sdk_cloudwatchlogs::error::{ CreateLogGroupError, CreateLogStreamError, DescribeLogStreamsError, PutLogEventsError, }; @@ -22,24 +32,12 @@ use tower::{ timeout::Timeout, Service, ServiceBuilder, ServiceExt, }; -use vector_common::request_metadata::MetaDescriptive; -use vector_core::{internal_event::CountByteSize, stream::DriverResponse}; -use vrl::value::Value; - -use crate::{ - config::SinkContext, - event::EventStatus, - mezmo::user_trace::{MezmoLoggingService, UserLoggingError, UserLoggingResponse}, - sinks::{ - aws_cloudwatch_logs::{ - config::CloudwatchLogsSinkConfig, request, retry::CloudwatchRetryLogic, - sink::BatchCloudwatchRequest, CloudwatchKey, - }, - util::{ - retries::FixedRetryPolicy, EncodedLength, TowerRequestConfig, TowerRequestSettings, - }, - }, +use vector_common::{ + finalization::EventStatus, + request_metadata::{GroupedCountByteSize, MetaDescriptive}, }; +use vector_core::stream::DriverResponse; +use vrl::value::Value; type Svc = Buffer< ConcurrencyLimit< @@ -140,7 +138,7 @@ where E: SdkErrorWithMessage, { match err { - SdkError::ServiceError { err, raw: _ } => err.inner_message().map(Into::into), + SdkError::ServiceError(inner) => inner.err().inner_message().map(Into::into), _ => None, // Other errors are not user-facing } } @@ -152,8 +150,7 @@ impl UserLoggingResponse for CloudwatchInnerResponse {} #[derive(Debug)] pub struct CloudwatchResponse { - events_count: usize, - events_byte_size: usize, + events_byte_size: GroupedCountByteSize, } impl crate::sinks::util::sink::Response for CloudwatchResponse { @@ -171,8 +168,8 @@ impl DriverResponse for CloudwatchResponse { EventStatus::Delivered } - fn events_sent(&self) -> CountByteSize { - CountByteSize(self.events_count, self.events_byte_size) + fn events_sent(&self) -> &GroupedCountByteSize { + &self.events_byte_size } } @@ -212,9 +209,9 @@ impl Service for CloudwatchLogsPartitionSvc { Poll::Ready(Ok(())) } - fn call(&mut self, req: BatchCloudwatchRequest) -> Self::Future { - let events_count = req.get_metadata().event_count(); - let events_byte_size = req.get_metadata().events_byte_size(); + fn call(&mut self, mut req: BatchCloudwatchRequest) -> Self::Future { + let metadata = std::mem::take(req.metadata_mut()); + let events_byte_size = metadata.into_events_estimated_json_encoded_byte_size(); let key = req.key; let events = req @@ -259,10 +256,7 @@ impl Service for CloudwatchLogsPartitionSvc { }; svc.oneshot(events) - .map_ok(move |_x| CloudwatchResponse { - events_count, - events_byte_size, - }) + .map_ok(move |_x| CloudwatchResponse { events_byte_size }) .map_err(Into::into) .boxed() } diff --git a/src/sinks/aws_cloudwatch_logs/sink.rs b/src/sinks/aws_cloudwatch_logs/sink.rs index a1546e8135687..3c320ad6236e7 100644 --- a/src/sinks/aws_cloudwatch_logs/sink.rs +++ b/src/sinks/aws_cloudwatch_logs/sink.rs @@ -51,8 +51,9 @@ where }) .batched_partitioned(CloudwatchPartitioner, batcher_settings) .map(|(key, events)| { - let metadata = - RequestMetadata::from_batch(events.iter().map(|req| req.get_metadata())); + let metadata = RequestMetadata::from_batch( + events.iter().map(|req| req.get_metadata().clone()), + ); BatchCloudwatchRequest { key, @@ -80,8 +81,12 @@ impl Finalizable for BatchCloudwatchRequest { } impl MetaDescriptive for BatchCloudwatchRequest { - fn get_metadata(&self) -> RequestMetadata { - self.metadata + fn get_metadata(&self) -> &RequestMetadata { + &self.metadata + } + + fn metadata_mut(&mut self) -> &mut RequestMetadata { + &mut self.metadata } } diff --git a/src/sinks/aws_cloudwatch_metrics/integration_tests.rs b/src/sinks/aws_cloudwatch_metrics/integration_tests.rs index f1044672e9aff..25c4f6648704e 100644 --- a/src/sinks/aws_cloudwatch_metrics/integration_tests.rs +++ b/src/sinks/aws_cloudwatch_metrics/integration_tests.rs @@ -35,7 +35,7 @@ async fn cloudwatch_metrics_healthcheck() { #[tokio::test] async fn cloudwatch_metrics_put_data() { - let cx = SinkContext::new_test(); + let cx = SinkContext::default(); let config = config(); let client = config.create_client(&cx.globals.proxy).await.unwrap(); let sink = CloudWatchMetricsSvc::new(config, client, cx).unwrap(); @@ -94,7 +94,7 @@ async fn cloudwatch_metrics_put_data() { #[tokio::test] async fn cloudwatch_metrics_namespace_partitioning() { - let cx = SinkContext::new_test(); + let cx = SinkContext::default(); let config = config(); let client = config.create_client(&cx.globals.proxy).await.unwrap(); let sink = CloudWatchMetricsSvc::new(config, client, cx).unwrap(); diff --git a/src/sinks/aws_cloudwatch_metrics/mod.rs b/src/sinks/aws_cloudwatch_metrics/mod.rs index 378f87e712fd3..683ec252e7145 100644 --- a/src/sinks/aws_cloudwatch_metrics/mod.rs +++ b/src/sinks/aws_cloudwatch_metrics/mod.rs @@ -15,7 +15,7 @@ use futures_util::{future, future::BoxFuture}; use std::task::{Context, Poll}; use tower::Service; use vector_config::configurable_component; -use vector_core::{sink::VectorSink, EstimatedJsonEncodedSizeOf}; +use vector_core::{sink::VectorSink, ByteSizeOf, EstimatedJsonEncodedSizeOf}; use vrl::value::Value; use crate::{ @@ -49,7 +49,10 @@ impl SinkBatchSettings for CloudWatchMetricsDefaultBatchSettings { } /// Configuration for the `aws_cloudwatch_metrics` sink. -#[configurable_component(sink("aws_cloudwatch_metrics"))] +#[configurable_component(sink( + "aws_cloudwatch_metrics", + "Publish metric events to AWS CloudWatch Metrics." +))] #[derive(Clone, Debug, Default)] #[serde(deny_unknown_fields)] pub struct CloudWatchMetricsSinkConfig { @@ -123,6 +126,7 @@ impl ClientBuilder for CloudwatchMetricsClientBuilder { } #[async_trait::async_trait] +#[typetag::serde(name = "aws_cloudwatch_metrics")] impl SinkConfig for CloudWatchMetricsSinkConfig { async fn build( &self, @@ -171,7 +175,7 @@ impl CloudWatchMetricsSinkConfig { create_client::( &self.auth, region, - self.region.endpoint()?, + self.region.endpoint(), proxy, &self.tls, true, @@ -221,7 +225,7 @@ type CloudwatchMetricsError = SdkError; impl UserLoggingError for CloudwatchMetricsError { fn log_msg(&self) -> Option { match &self { - SdkError::ServiceError { err, raw: _ } => err.message().map(Into::into), + SdkError::ServiceError(inner) => inner.err().message().map(Into::into), _ => None, // Other errors are not user-facing } } @@ -256,7 +260,8 @@ impl CloudWatchMetricsSvc { .sink_map_err(|error| error!(message = "Fatal CloudwatchMetrics sink error.", %error)) .with_flat_map(move |event: Event| { stream::iter({ - let byte_size = event.estimated_json_encoded_size_of(); + let byte_size = event.allocated_bytes(); + let json_byte_size = event.estimated_json_encoded_size_of(); normalizer.normalize(event.into_metric()).map(|mut metric| { let namespace = metric .take_namespace() @@ -265,11 +270,13 @@ impl CloudWatchMetricsSvc { Ok(EncodedEvent::new( PartitionInnerBuffer::new(metric, namespace), byte_size, + json_byte_size, )) }) }) }); + #[allow(deprecated)] Ok(VectorSink::from_event_sink(sink)) } diff --git a/src/sinks/aws_kinesis/config.rs b/src/sinks/aws_kinesis/config.rs index bcafe9e17e05b..45070b6d65fbf 100644 --- a/src/sinks/aws_kinesis/config.rs +++ b/src/sinks/aws_kinesis/config.rs @@ -1,20 +1,13 @@ use std::marker::PhantomData; -use tower::ServiceBuilder; -use vector_config::configurable_component; -use vector_core::{ - config::{DataType, Input}, - sink::VectorSink, - stream::BatcherSettings, -}; +use vector_core::stream::BatcherSettings; use crate::{ aws::{AwsAuthentication, RegionOrEndpoint}, - codecs::{Encoder, EncodingConfig}, - config::{AcknowledgementsConfig, SinkContext}, - mezmo::user_trace::{MezmoLoggingService, UserLoggingError}, - sinks::util::{retries::RetryLogic, Compression, ServiceBuilderExt, TowerRequestConfig}, - tls::TlsConfig, + sinks::{ + prelude::*, + util::{retries::RetryLogic, TowerRequestConfig}, + }, }; use aws_sdk_firehose::types::SdkError; @@ -60,6 +53,11 @@ pub struct KinesisSinkBaseConfig { #[serde(default)] pub auth: AwsAuthentication, + /// Whether or not to retry successful requests containing partial failures. + #[serde(default)] + #[configurable(metadata(docs::advanced))] + pub request_retry_partial: bool, + #[configurable(derived)] #[serde( default, @@ -80,12 +78,13 @@ impl KinesisSinkBaseConfig { } /// Builds an aws_kinesis sink. -pub async fn build_sink( +pub fn build_sink( config: &KinesisSinkBaseConfig, partition_key_field: Option, batch_settings: BatcherSettings, client: C, cx: SinkContext, + retry_logic: RT, ) -> crate::Result where C: SendRecord + Clone + Send + Sync + 'static, @@ -102,7 +101,7 @@ where let region = config.region.region(); let service = ServiceBuilder::new() - .settings::>(request_limits, RT::default()) + .settings::>(request_limits, retry_logic) .service(MezmoLoggingService::new( KinesisService:: { client, diff --git a/src/sinks/aws_kinesis/firehose/config.rs b/src/sinks/aws_kinesis/firehose/config.rs index abfc18e8f89dc..7c6956450096e 100644 --- a/src/sinks/aws_kinesis/firehose/config.rs +++ b/src/sinks/aws_kinesis/firehose/config.rs @@ -8,6 +8,7 @@ use futures::FutureExt; use snafu::Snafu; use vector_config::configurable_component; +use crate::sinks::util::retries::RetryAction; use crate::{ aws::{create_client, is_retriable_error, ClientBuilder}, config::{AcknowledgementsConfig, GenerateConfig, Input, ProxyConfig, SinkConfig, SinkContext}, @@ -65,7 +66,10 @@ impl SinkBatchSettings for KinesisFirehoseDefaultBatchSettings { } /// Configuration for the `aws_kinesis_firehose` sink. -#[configurable_component(sink("aws_kinesis_firehose"))] +#[configurable_component(sink( + "aws_kinesis_firehose", + "Publish logs to AWS Kinesis Data Firehose topics." +))] #[derive(Clone, Debug)] pub struct KinesisFirehoseSinkConfig { #[serde(flatten)] @@ -108,7 +112,7 @@ impl KinesisFirehoseSinkConfig { create_client::( &self.base.auth, self.base.region.region(), - self.base.region.endpoint()?, + self.base.region.endpoint(), proxy, &self.base.tls, true, @@ -118,6 +122,7 @@ impl KinesisFirehoseSinkConfig { } #[async_trait::async_trait] +#[typetag::serde(name = "aws_kinesis_firehose")] impl SinkConfig for KinesisFirehoseSinkConfig { async fn build(&self, cx: SinkContext) -> crate::Result<(VectorSink, Healthcheck)> { let client = self.create_client(&cx.proxy).await?; @@ -142,8 +147,10 @@ impl SinkConfig for KinesisFirehoseSinkConfig { batch_settings, KinesisFirehoseClient { client }, cx, - ) - .await?; + KinesisRetryLogic { + retry_partial: self.base.request_retry_partial, + }, + )?; Ok((sink, healthcheck)) } @@ -168,18 +175,29 @@ impl GenerateConfig for KinesisFirehoseSinkConfig { } #[derive(Clone, Default)] -struct KinesisRetryLogic; +struct KinesisRetryLogic { + retry_partial: bool, +} impl RetryLogic for KinesisRetryLogic { type Error = SdkError; type Response = KinesisResponse; fn is_retriable_error(&self, error: &Self::Error) -> bool { - if let SdkError::ServiceError { err, raw: _ } = error { - if let PutRecordBatchErrorKind::ServiceUnavailableException(_) = err.kind { + if let SdkError::ServiceError(inner) = error { + if let PutRecordBatchErrorKind::ServiceUnavailableException(_) = inner.err().kind { return true; } } is_retriable_error(error) } + + fn should_retry_response(&self, response: &Self::Response) -> RetryAction { + if response.failure_count > 0 && self.retry_partial { + let msg = format!("partial error count {}", response.failure_count); + RetryAction::Retry(msg.into()) + } else { + RetryAction::Successful + } + } } diff --git a/src/sinks/aws_kinesis/firehose/integration_tests.rs b/src/sinks/aws_kinesis/firehose/integration_tests.rs index 8a46c57f83e14..648d5082a1ad7 100644 --- a/src/sinks/aws_kinesis/firehose/integration_tests.rs +++ b/src/sinks/aws_kinesis/firehose/integration_tests.rs @@ -57,11 +57,12 @@ async fn firehose_put_records() { tls: None, auth: Default::default(), acknowledgements: Default::default(), + request_retry_partial: Default::default(), }; let config = KinesisFirehoseSinkConfig { batch, base }; - let cx = SinkContext::new_test(); + let cx = SinkContext::default(); let (sink, _) = config.build(cx).await.unwrap(); @@ -138,7 +139,7 @@ async fn firehose_client() -> aws_sdk_firehose::Client { create_client::( &auth, region_endpoint.region(), - region_endpoint.endpoint().unwrap(), + region_endpoint.endpoint(), &proxy, &None, true, @@ -157,7 +158,7 @@ async fn ensure_elasticsearch_domain(domain_name: String) -> String { .await .unwrap(), ) - .endpoint_resolver(test_region_endpoint().endpoint().unwrap().unwrap()) + .endpoint_url(test_region_endpoint().endpoint().unwrap()) .region(test_region_endpoint().region()) .build(), ); diff --git a/src/sinks/aws_kinesis/firehose/mod.rs b/src/sinks/aws_kinesis/firehose/mod.rs index 484568847af66..e3871594ebad2 100644 --- a/src/sinks/aws_kinesis/firehose/mod.rs +++ b/src/sinks/aws_kinesis/firehose/mod.rs @@ -26,7 +26,7 @@ pub type KinesisClient = Client; impl UserLoggingError for SdkError { fn log_msg(&self) -> Option { match &self { - SdkError::ServiceError { err, raw: _ } => err.message().map(Into::into), + SdkError::ServiceError(inner) => inner.err().message().map(Into::into), _ => None, // Other errors are not user-facing } } diff --git a/src/sinks/aws_kinesis/firehose/record.rs b/src/sinks/aws_kinesis/firehose/record.rs index 49d1ee821f5c3..114d487558558 100644 --- a/src/sinks/aws_kinesis/firehose/record.rs +++ b/src/sinks/aws_kinesis/firehose/record.rs @@ -1,8 +1,11 @@ +use aws_sdk_firehose::output::PutRecordBatchOutput; use aws_sdk_firehose::types::{Blob, SdkError}; use bytes::Bytes; use tracing::Instrument; -use super::{KinesisClient, KinesisError, KinesisRecord, Record, SendRecord}; +use crate::sinks::prelude::*; + +use super::{KinesisClient, KinesisError, KinesisRecord, KinesisResponse, Record, SendRecord}; #[derive(Clone)] pub struct KinesisFirehoseRecord { @@ -46,7 +49,15 @@ impl SendRecord for KinesisFirehoseClient { type T = KinesisRecord; type E = KinesisError; - async fn send(&self, records: Vec, stream_name: String) -> Option> { + async fn send( + &self, + records: Vec, + stream_name: String, + ) -> Result> { + let rec_count = records.len(); + let total_size = records.iter().fold(0, |acc, record| { + acc + record.data().map(|v| v.as_ref().len()).unwrap_or_default() + }); self.client .put_record_batch() .set_records(Some(records)) @@ -54,6 +65,9 @@ impl SendRecord for KinesisFirehoseClient { .send() .instrument(info_span!("request").or_current()) .await - .err() + .map(|output: PutRecordBatchOutput| KinesisResponse { + failure_count: output.failed_put_count().unwrap_or(0) as usize, + events_byte_size: CountByteSize(rec_count, JsonSize::new(total_size)).into(), + }) } } diff --git a/src/sinks/aws_kinesis/firehose/tests.rs b/src/sinks/aws_kinesis/firehose/tests.rs index a15fb47a4e794..bb6b94729d344 100644 --- a/src/sinks/aws_kinesis/firehose/tests.rs +++ b/src/sinks/aws_kinesis/firehose/tests.rs @@ -33,12 +33,13 @@ async fn check_batch_size() { request: Default::default(), tls: None, auth: Default::default(), + request_retry_partial: false, acknowledgements: Default::default(), }; let config = KinesisFirehoseSinkConfig { batch, base }; - let cx = SinkContext::new_test(); + let cx = SinkContext::default(); let res = config.build(cx).await; assert_eq!( @@ -62,12 +63,13 @@ async fn check_batch_events() { request: Default::default(), tls: None, auth: Default::default(), + request_retry_partial: false, acknowledgements: Default::default(), }; let config = KinesisFirehoseSinkConfig { batch, base }; - let cx = SinkContext::new_test(); + let cx = SinkContext::default(); let res = config.build(cx).await; assert_eq!( diff --git a/src/sinks/aws_kinesis/record.rs b/src/sinks/aws_kinesis/record.rs index 03ad11c710416..a244f028cb78d 100644 --- a/src/sinks/aws_kinesis/record.rs +++ b/src/sinks/aws_kinesis/record.rs @@ -2,6 +2,7 @@ use async_trait::async_trait; use aws_smithy_client::SdkError; use bytes::Bytes; +use super::KinesisResponse; /// An AWS Kinesis record type primarily to store the underlying aws crates' actual record `T`, and /// to abstract the encoded length calculation. pub trait Record { @@ -24,5 +25,9 @@ pub trait SendRecord { type E; /// Sends the records. - async fn send(&self, records: Vec, stream_name: String) -> Option>; + async fn send( + &self, + records: Vec, + stream_name: String, + ) -> Result>; } diff --git a/src/sinks/aws_kinesis/request_builder.rs b/src/sinks/aws_kinesis/request_builder.rs index 1491d59b08ee7..0483dd01e318b 100644 --- a/src/sinks/aws_kinesis/request_builder.rs +++ b/src/sinks/aws_kinesis/request_builder.rs @@ -53,8 +53,12 @@ impl MetaDescriptive for KinesisRequest where R: Record, { - fn get_metadata(&self) -> RequestMetadata { - self.metadata + fn get_metadata(&self) -> &RequestMetadata { + &self.metadata + } + + fn metadata_mut(&mut self) -> &mut RequestMetadata { + &mut self.metadata } } @@ -102,7 +106,7 @@ where partition_key: processed_event.metadata.partition_key, }; let event = Event::from(processed_event.event); - let builder = RequestMetadataBuilder::from_events(&event); + let builder = RequestMetadataBuilder::from_event(&event); (kinesis_metadata, builder, event) } diff --git a/src/sinks/aws_kinesis/service.rs b/src/sinks/aws_kinesis/service.rs index 42ea5a951e8d1..e7c2476cb7d1a 100644 --- a/src/sinks/aws_kinesis/service.rs +++ b/src/sinks/aws_kinesis/service.rs @@ -5,17 +5,12 @@ use std::{ use aws_smithy_client::SdkError; use aws_types::region::Region; -use futures::future::BoxFuture; -use tower::Service; -use vector_common::request_metadata::MetaDescriptive; -use vector_core::{internal_event::CountByteSize, stream::DriverResponse}; use super::{ record::{Record, SendRecord}, sink::BatchKinesisRequest, }; -use crate::event::EventStatus; -use crate::mezmo::user_trace::UserLoggingResponse; +use crate::{event::EventStatus, sinks::prelude::*}; pub struct KinesisService { pub client: C, @@ -42,8 +37,8 @@ where #[derive(Debug)] pub struct KinesisResponse { - count: usize, - events_byte_size: usize, + pub(crate) failure_count: usize, + pub(crate) events_byte_size: GroupedCountByteSize, } impl UserLoggingResponse for KinesisResponse {} @@ -53,8 +48,8 @@ impl DriverResponse for KinesisResponse { EventStatus::Delivered } - fn events_sent(&self) -> CountByteSize { - CountByteSize(self.count, self.events_byte_size) + fn events_sent(&self) -> &GroupedCountByteSize { + &self.events_byte_size } } @@ -75,9 +70,9 @@ where } // Emission of internal events for errors and dropped events is handled upstream by the caller. - fn call(&mut self, requests: BatchKinesisRequest) -> Self::Future { - let events_byte_size = requests.get_metadata().events_byte_size(); - let count = requests.get_metadata().event_count(); + fn call(&mut self, mut requests: BatchKinesisRequest) -> Self::Future { + let metadata = std::mem::take(requests.metadata_mut()); + let events_byte_size = metadata.into_events_estimated_json_encoded_byte_size(); let records = requests .events @@ -89,16 +84,10 @@ where let stream_name = self.stream_name.clone(); Box::pin(async move { - // Returning a Result (a trait that implements Try) is not a stable feature, - // so instead we have to explicitly check for error and return. - // https://github.com/rust-lang/rust/issues/84277 - if let Some(e) = client.send(records, stream_name).await { - return Err(e); - } - - Ok(KinesisResponse { - count, - events_byte_size, + client.send(records, stream_name).await.map(|mut r| { + // augment the response + r.events_byte_size = events_byte_size; + r }) }) } diff --git a/src/sinks/aws_kinesis/sink.rs b/src/sinks/aws_kinesis/sink.rs index 0f74320ad4be1..0341c0e8244d6 100644 --- a/src/sinks/aws_kinesis/sink.rs +++ b/src/sinks/aws_kinesis/sink.rs @@ -1,22 +1,13 @@ use std::{borrow::Cow, fmt::Debug, marker::PhantomData, num::NonZeroUsize}; -use async_trait::async_trait; -use futures::{future, stream::BoxStream, StreamExt}; use rand::random; -use tower::Service; -use vector_common::{ - finalization::{EventFinalizers, Finalizable}, - request_metadata::{MetaDescriptive, RequestMetadata}, -}; -use vector_core::{ - partition::Partitioner, - stream::{BatcherSettings, DriverResponse}, -}; use crate::{ - event::{Event, LogEvent}, internal_events::{AwsKinesisStreamNoPartitionKeyError, SinkRequestBuildError}, - sinks::util::{processed_event::ProcessedEvent, SinkBuilderExt, StreamSink}, + sinks::{ + prelude::*, + util::{processed_event::ProcessedEvent, StreamSink}, + }, }; use super::{ @@ -78,8 +69,9 @@ where self.batch_settings, ) .map(|(key, events)| { - let metadata = - RequestMetadata::from_batch(events.iter().map(|req| req.get_metadata())); + let metadata = RequestMetadata::from_batch( + events.iter().map(|req| req.get_metadata().clone()), + ); BatchKinesisRequest { key, events, @@ -168,7 +160,7 @@ where partition_key: self.key.partition_key.clone(), }, events: self.events.to_vec(), - metadata: self.metadata, + metadata: self.metadata.clone(), } } } @@ -186,8 +178,12 @@ impl MetaDescriptive for BatchKinesisRequest where R: Record + Clone, { - fn get_metadata(&self) -> RequestMetadata { - self.metadata + fn get_metadata(&self) -> &RequestMetadata { + &self.metadata + } + + fn metadata_mut(&mut self) -> &mut RequestMetadata { + &mut self.metadata } } diff --git a/src/sinks/aws_kinesis/streams/config.rs b/src/sinks/aws_kinesis/streams/config.rs index 5c02e11726c6d..0e03ea3f526fa 100644 --- a/src/sinks/aws_kinesis/streams/config.rs +++ b/src/sinks/aws_kinesis/streams/config.rs @@ -6,6 +6,7 @@ use futures::FutureExt; use snafu::Snafu; use vector_config::{component::GenerateConfig, configurable_component}; +use crate::sinks::util::retries::RetryAction; use crate::{ aws::{create_client, is_retriable_error, ClientBuilder}, config::{AcknowledgementsConfig, Input, ProxyConfig, SinkConfig, SinkContext}, @@ -66,7 +67,10 @@ impl SinkBatchSettings for KinesisDefaultBatchSettings { } /// Configuration for the `aws_kinesis_streams` sink. -#[configurable_component(sink("aws_kinesis_streams"))] +#[configurable_component(sink( + "aws_kinesis_streams", + "Publish logs to AWS Kinesis Streams topics." +))] #[derive(Clone, Debug)] pub struct KinesisStreamsSinkConfig { #[serde(flatten)] @@ -115,7 +119,7 @@ impl KinesisStreamsSinkConfig { create_client::( &self.base.auth, self.base.region.region(), - self.base.region.endpoint()?, + self.base.region.endpoint(), proxy, &self.base.tls, true, @@ -125,6 +129,7 @@ impl KinesisStreamsSinkConfig { } #[async_trait::async_trait] +#[typetag::serde(name = "aws_kinesis_streams")] impl SinkConfig for KinesisStreamsSinkConfig { async fn build(&self, cx: SinkContext) -> crate::Result<(VectorSink, Healthcheck)> { let client = self.create_client(&cx.proxy).await?; @@ -149,8 +154,10 @@ impl SinkConfig for KinesisStreamsSinkConfig { batch_settings, KinesisStreamClient { client }, cx, - ) - .await?; + KinesisRetryLogic { + retry_partial: self.base.request_retry_partial, + }, + )?; Ok((sink, healthcheck)) } @@ -175,26 +182,38 @@ impl GenerateConfig for KinesisStreamsSinkConfig { } } #[derive(Default, Clone)] -struct KinesisRetryLogic; +struct KinesisRetryLogic { + retry_partial: bool, +} impl RetryLogic for KinesisRetryLogic { type Error = SdkError; type Response = KinesisResponse; fn is_retriable_error(&self, error: &Self::Error) -> bool { - if let SdkError::ServiceError { err, raw: _ } = error { + if let SdkError::ServiceError(inner) = error { // Note that if the request partially fails (records sent to one // partition fail but the others do not, for example), Vector // does not retry. This line only covers a failure for the entire // request. // // https://github.com/vectordotdev/vector/issues/359 - if let PutRecordsErrorKind::ProvisionedThroughputExceededException(_) = err.kind { + if let PutRecordsErrorKind::ProvisionedThroughputExceededException(_) = inner.err().kind + { return true; } } is_retriable_error(error) } + + fn should_retry_response(&self, response: &Self::Response) -> RetryAction { + if response.failure_count > 0 && self.retry_partial { + let msg = format!("partial error count {}", response.failure_count); + RetryAction::Retry(msg.into()) + } else { + RetryAction::Successful + } + } } #[cfg(test)] diff --git a/src/sinks/aws_kinesis/streams/integration_tests.rs b/src/sinks/aws_kinesis/streams/integration_tests.rs index a9a66804e3729..a800ff2b2960a 100644 --- a/src/sinks/aws_kinesis/streams/integration_tests.rs +++ b/src/sinks/aws_kinesis/streams/integration_tests.rs @@ -52,7 +52,7 @@ fn kinesis_address() -> String { // base, // }; // -// let cx = SinkContext::new_test(); +// let cx = SinkContext::default(); // // let sink = config.build(cx).await.unwrap().0; // @@ -98,6 +98,7 @@ async fn kinesis_put_records_without_partition_key() { tls: Default::default(), auth: Default::default(), acknowledgements: Default::default(), + request_retry_partial: Default::default(), }; let config = KinesisStreamsSinkConfig { @@ -106,7 +107,7 @@ async fn kinesis_put_records_without_partition_key() { base, }; - let cx = SinkContext::new_test(); + let cx = SinkContext::default(); let sink = config.build(cx).await.unwrap().0; @@ -174,7 +175,7 @@ async fn client() -> aws_sdk_kinesis::Client { create_client::( &auth, region.region(), - region.endpoint().unwrap(), + region.endpoint(), &proxy, &None, true, diff --git a/src/sinks/aws_kinesis/streams/mod.rs b/src/sinks/aws_kinesis/streams/mod.rs index 9094718712d6e..8b1ef88ba17d3 100644 --- a/src/sinks/aws_kinesis/streams/mod.rs +++ b/src/sinks/aws_kinesis/streams/mod.rs @@ -25,7 +25,7 @@ pub type KinesisClient = Client; impl UserLoggingError for SdkError { fn log_msg(&self) -> Option { match &self { - SdkError::ServiceError { err, raw: _ } => err.message().map(Into::into), + SdkError::ServiceError(inner) => inner.err().message().map(Into::into), _ => None, // Other errors are not user-facing } } diff --git a/src/sinks/aws_kinesis/streams/record.rs b/src/sinks/aws_kinesis/streams/record.rs index 67eba50d9aff2..c7eebe4f6e0a7 100644 --- a/src/sinks/aws_kinesis/streams/record.rs +++ b/src/sinks/aws_kinesis/streams/record.rs @@ -1,8 +1,11 @@ +use aws_sdk_kinesis::output::PutRecordsOutput; use aws_sdk_kinesis::types::{Blob, SdkError}; use bytes::Bytes; use tracing::Instrument; -use super::{KinesisClient, KinesisError, KinesisRecord, Record, SendRecord}; +use crate::sinks::prelude::*; + +use super::{KinesisClient, KinesisError, KinesisRecord, KinesisResponse, Record, SendRecord}; #[derive(Clone)] pub struct KinesisStreamRecord { @@ -62,7 +65,15 @@ impl SendRecord for KinesisStreamClient { type T = KinesisRecord; type E = KinesisError; - async fn send(&self, records: Vec, stream_name: String) -> Option> { + async fn send( + &self, + records: Vec, + stream_name: String, + ) -> Result> { + let rec_count = records.len(); + let total_size = records.iter().fold(0, |acc, record| { + acc + record.data().map(|v| v.as_ref().len()).unwrap_or_default() + }); self.client .put_records() .set_records(Some(records)) @@ -70,6 +81,9 @@ impl SendRecord for KinesisStreamClient { .send() .instrument(info_span!("request").or_current()) .await - .err() + .map(|output: PutRecordsOutput| KinesisResponse { + failure_count: output.failed_record_count().unwrap_or(0) as usize, + events_byte_size: CountByteSize(rec_count, JsonSize::new(total_size)).into(), + }) } } diff --git a/src/sinks/aws_s3/config.rs b/src/sinks/aws_s3/config.rs index b4315a2847911..25296f5bc4a24 100644 --- a/src/sinks/aws_s3/config.rs +++ b/src/sinks/aws_s3/config.rs @@ -34,7 +34,10 @@ use crate::{ }; /// Configuration for the `aws_s3` sink. -#[configurable_component(sink("aws_s3"))] +#[configurable_component(sink( + "aws_s3", + "Store observability events in the AWS S3 object storage system." +))] #[derive(Clone, Debug)] #[serde(deny_unknown_fields)] pub struct S3SinkConfig { @@ -84,6 +87,7 @@ pub struct S3SinkConfig { /// This ensures there are no name collisions, and can be useful in high-volume workloads where /// object keys must be unique. #[serde(default = "crate::serde::default_true")] + #[configurable(metadata(docs::human_name = "Append UUID to Filename"))] pub filename_append_uuid: bool, /// The filename extension to use in the object key. @@ -166,6 +170,7 @@ impl GenerateConfig for S3SinkConfig { } #[async_trait::async_trait] +#[typetag::serde(name = "aws_s3")] impl SinkConfig for S3SinkConfig { async fn build(&self, cx: SinkContext) -> crate::Result<(VectorSink, Healthcheck)> { let service = self.create_service(&cx.proxy).await?; diff --git a/src/sinks/aws_s3/integration_tests.rs b/src/sinks/aws_s3/integration_tests.rs index 17d54ffa710ba..c8d176c85415b 100644 --- a/src/sinks/aws_s3/integration_tests.rs +++ b/src/sinks/aws_s3/integration_tests.rs @@ -51,7 +51,7 @@ pub fn s3_address() -> String { #[tokio::test] async fn s3_insert_message_into_with_flat_key_prefix() { - let cx = SinkContext::new_test(); + let cx = SinkContext::default(); let bucket = uuid::Uuid::new_v4().to_string(); @@ -85,7 +85,7 @@ async fn s3_insert_message_into_with_flat_key_prefix() { #[tokio::test] async fn s3_insert_message_into_with_folder_key_prefix() { - let cx = SinkContext::new_test(); + let cx = SinkContext::default(); let bucket = uuid::Uuid::new_v4().to_string(); @@ -119,7 +119,7 @@ async fn s3_insert_message_into_with_folder_key_prefix() { #[tokio::test] async fn s3_insert_message_into_with_ssekms_key_id() { - let cx = SinkContext::new_test(); + let cx = SinkContext::default(); let bucket = uuid::Uuid::new_v4().to_string(); @@ -156,7 +156,7 @@ async fn s3_insert_message_into_with_ssekms_key_id() { #[tokio::test] async fn s3_rotate_files_after_the_buffer_size_is_reached() { - let cx = SinkContext::new_test(); + let cx = SinkContext::default(); let bucket = uuid::Uuid::new_v4().to_string(); @@ -213,7 +213,7 @@ async fn s3_gzip() { // to 1000, and using gzip compression. We test to ensure that all of the keys we end up // writing represent the sum total of the lines: we expect 3 batches, each of which should // have 1000 lines. - let cx = SinkContext::new_test(); + let cx = SinkContext::default(); let bucket = uuid::Uuid::new_v4().to_string(); @@ -252,13 +252,58 @@ async fn s3_gzip() { assert_eq!(lines, response_lines); } +#[tokio::test] +async fn s3_zstd() { + // Here, we're creating a bunch of events, approximately 3000, while setting our batch size + // to 1000, and using zstd compression. We test to ensure that all of the keys we end up + // writing represent the sum total of the lines: we expect 3 batches, each of which should + // have 1000 lines. + let cx = SinkContext::default(); + + let bucket = uuid::Uuid::new_v4().to_string(); + + create_bucket(&bucket, false).await; + + let batch_size = 1_000; + let batch_multiplier = 3; + let config = S3SinkConfig { + compression: Compression::zstd_default(), + filename_time_format: "%s%f".into(), + ..config(&bucket, batch_size) + }; + + let prefix = config.key_prefix.clone(); + let service = config.create_service(&cx.globals.proxy).await.unwrap(); + let sink = config.build_processor(service, cx).unwrap(); + + let (lines, events, receiver) = make_events_batch(100, batch_size * batch_multiplier); + run_and_assert_sink_compliance(sink, events, &AWS_SINK_TAGS).await; + assert_eq!(receiver.await, BatchStatus::Delivered); + + let keys = get_keys(&bucket, prefix).await; + assert_eq!(keys.len(), batch_multiplier); + + let mut response_lines: Vec = Vec::new(); + let mut key_stream = stream::iter(keys); + while let Some(key) = key_stream.next().await { + assert!(key.ends_with(".log.zst")); + + let obj = get_object(&bucket, key).await; + assert_eq!(obj.content_encoding, Some("zstd".to_string())); + + response_lines.append(&mut get_zstd_lines(obj).await); + } + + assert_eq!(lines, response_lines); +} + // NOTE: this test doesn't actually validate anything because localstack // doesn't enforce the required Content-MD5 header on the request for // buckets with object lock enabled // https://github.com/localstack/localstack/issues/4166 #[tokio::test] async fn s3_insert_message_into_object_lock() { - let cx = SinkContext::new_test(); + let cx = SinkContext::default(); let bucket = uuid::Uuid::new_v4().to_string(); @@ -312,7 +357,7 @@ async fn s3_insert_message_into_object_lock() { #[tokio::test] async fn acknowledges_failures() { - let cx = SinkContext::new_test(); + let cx = SinkContext::default(); let bucket = uuid::Uuid::new_v4().to_string(); @@ -363,6 +408,80 @@ async fn s3_healthchecks_invalid_bucket() { .is_err()); } +#[tokio::test] +async fn s3_flush_on_exhaustion() { + let cx = SinkContext::default(); + + let bucket = uuid::Uuid::new_v4().to_string(); + create_bucket(&bucket, false).await; + + // batch size of ten events, timeout of ten seconds + let config = { + let mut batch = BatchConfig::default(); + batch.max_events = Some(10); + batch.timeout_secs = Some(10.0); + + S3SinkConfig { + bucket: bucket.to_string(), + key_prefix: random_string(10) + "/date=%F", + filename_time_format: default_filename_time_format(), + filename_append_uuid: true, + filename_extension: None, + options: S3Options::default(), + region: RegionOrEndpoint::with_both("minio", s3_address()), + encoding: (None::, TextSerializerConfig::default()).into(), + compression: Compression::None, + batch, + request: TowerRequestConfig::default(), + tls: Default::default(), + auth: Default::default(), + acknowledgements: Default::default(), + } + }; + let prefix = config.key_prefix.clone(); + let service = config.create_service(&cx.globals.proxy).await.unwrap(); + let sink = config.build_processor(service, cx).unwrap(); + + let (lines, _events) = random_lines_with_stream(100, 2, None); // only generate two events (less than batch size) + + let events = lines.clone().into_iter().enumerate().map(|(i, line)| { + let mut e = LogEvent::from(line); + let i = if i < 10 { + 1 + } else if i < 20 { + 2 + } else { + 3 + }; + e.insert("i", i.to_string()); + Event::from(e) + }); + + // Here, we validate that the s3 sink flushes when its source stream is exhausted + // by giving it a number of inputs less than the batch size, verifying that the + // outputs for the in-flight batch are flushed. By timing out in 3 seconds with a + // flush period of ten seconds, we verify that the flush is triggered *at stream + // completion* and not because of periodic flushing. + assert!(tokio::time::timeout( + Duration::from_secs(3), + run_and_assert_sink_compliance(sink, stream::iter(events), &AWS_SINK_TAGS) + ) + .await + .is_ok()); + + let keys = get_keys(&bucket, prefix).await; + assert_eq!(keys.len(), 1); + + let mut response_lines: Vec = Vec::new(); + let mut key_stream = stream::iter(keys); + while let Some(key) = key_stream.next().await { + let obj = get_object(&bucket, key).await; + response_lines.append(&mut get_lines(obj).await); + } + + assert_eq!(lines, response_lines); // if all events are received, and lines.len() < batch size, then a flush was performed. +} + async fn client() -> S3Client { let auth = AwsAuthentication::test_auth(); let region = RegionOrEndpoint::with_both("minio", s3_address()); @@ -371,7 +490,7 @@ async fn client() -> S3Client { create_client::( &auth, region.region(), - region.endpoint().unwrap(), + region.endpoint(), &proxy, &tls_options, true, @@ -428,7 +547,7 @@ pub async fn create_bucket(bucket: &str, object_lock_enabled: bool) { { Ok(_) => {} Err(err) => match err { - SdkError::ServiceError { err, raw: _ } => match err.kind { + SdkError::ServiceError(inner) => match &inner.err().kind { CreateBucketErrorKind::BucketAlreadyOwnedByYou(_) => {} err => panic!("Failed to create bucket: {:?}", err), }, @@ -483,6 +602,13 @@ async fn get_gzipped_lines(obj: GetObjectOutput) -> Vec { buf_read.lines().map(|l| l.unwrap()).collect() } +async fn get_zstd_lines(obj: GetObjectOutput) -> Vec { + let body = get_object_output_body(obj).await; + let decoder = zstd::Decoder::new(body).expect("zstd decoder initialization failed"); + let buf_read = BufReader::new(decoder); + buf_read.lines().map(|l| l.unwrap()).collect() +} + async fn get_object_output_body(obj: GetObjectOutput) -> impl std::io::Read { obj.body.collect().await.unwrap().reader() } diff --git a/src/sinks/aws_s3/integration_tests_mezmo.rs b/src/sinks/aws_s3/integration_tests_mezmo.rs index b6a6aeea153e1..d19968e0a7fcc 100644 --- a/src/sinks/aws_s3/integration_tests_mezmo.rs +++ b/src/sinks/aws_s3/integration_tests_mezmo.rs @@ -4,7 +4,7 @@ use crate::mezmo::reshape_log_event_by_message; use assay::assay; use bytes::Bytes; use codecs::decoding::format::Deserializer; -use codecs::decoding::format::JsonDeserializerConfig; +use codecs::decoding::format::{JsonDeserializerConfig, JsonDeserializerOptions}; use codecs::{encoding::FramingConfig, JsonSerializerConfig, MetricTagValues}; use futures::Stream; use similar_asserts::assert_eq; @@ -65,7 +65,7 @@ async fn s3_message_objects_are_reshaped() { let response_lines = get_lines(obj).await; let input = Bytes::from(response_lines[0].clone()); - let deserializer = JsonDeserializerConfig::new().build(); + let deserializer = JsonDeserializerConfig::new(JsonDeserializerOptions::default()).build(); let got_events = deserializer.parse(input, LogNamespace::Vector).unwrap(); // Loop to assert results for 2 reasons: @@ -119,7 +119,7 @@ async fn s3_message_objects_not_reshaped_because_of_env() { let response_lines = get_lines(obj).await; let input = Bytes::from(response_lines[0].clone()); - let deserializer = JsonDeserializerConfig::new().build(); + let deserializer = JsonDeserializerConfig::new(JsonDeserializerOptions::default()).build(); let got_events = deserializer.parse(input, LogNamespace::Vector).unwrap(); // The `message` property should still exist diff --git a/src/sinks/aws_sqs/config.rs b/src/sinks/aws_sqs/config.rs index a12bc43de5d52..e8339a1a64ad9 100644 --- a/src/sinks/aws_sqs/config.rs +++ b/src/sinks/aws_sqs/config.rs @@ -32,7 +32,10 @@ pub(super) enum BuildError { } /// Configuration for the `aws_sqs` sink. -#[configurable_component(sink("aws_sqs"))] +#[configurable_component(sink( + "aws_sqs", + "Publish observability events to AWS Simple Queue Service topics." +))] #[derive(Clone, Debug)] #[serde(deny_unknown_fields)] pub struct SqsSinkConfig { @@ -111,6 +114,7 @@ impl GenerateConfig for SqsSinkConfig { } #[async_trait::async_trait] +#[typetag::serde(name = "aws_sqs")] impl SinkConfig for SqsSinkConfig { async fn build( &self, @@ -149,7 +153,7 @@ impl SqsSinkConfig { create_client::( &self.auth, self.region.region(), - self.region.endpoint()?, + self.region.endpoint(), proxy, &self.tls, true, diff --git a/src/sinks/aws_sqs/integration_tests.rs b/src/sinks/aws_sqs/integration_tests.rs index 6c20a7167c6f3..c2f5176dd50f6 100644 --- a/src/sinks/aws_sqs/integration_tests.rs +++ b/src/sinks/aws_sqs/integration_tests.rs @@ -1,11 +1,9 @@ #![cfg(all(test, feature = "aws-sqs-integration-tests"))] use std::collections::HashMap; -use std::str::FromStr; -use aws_sdk_sqs::{model::QueueAttributeName, Client as SqsClient, Endpoint, Region}; +use aws_sdk_sqs::{model::QueueAttributeName, Client as SqsClient, Region}; use codecs::TextSerializerConfig; -use http::Uri; use tokio::time::{sleep, Duration}; use super::{config::SqsSinkConfig, sink::SqsSink}; @@ -32,7 +30,7 @@ async fn create_test_client() -> SqsClient { create_client::( &auth, Some(Region::new("localstack")), - Some(Endpoint::immutable(Uri::from_str(&endpoint).unwrap())), + Some(endpoint), &proxy, &None, true, diff --git a/src/sinks/aws_sqs/request_builder.rs b/src/sinks/aws_sqs/request_builder.rs index 22e1340be1a1f..34f54b5c1c0a2 100644 --- a/src/sinks/aws_sqs/request_builder.rs +++ b/src/sinks/aws_sqs/request_builder.rs @@ -93,7 +93,7 @@ impl RequestBuilder for SqsRequestBuilder { None => None, }; - let builder = RequestMetadataBuilder::from_events(&event); + let builder = RequestMetadataBuilder::from_event(&event); let sqs_metadata = SqsMetadata { finalizers: event.take_finalizers(), @@ -130,7 +130,7 @@ pub(crate) struct SendMessageEntry { pub message_deduplication_id: Option, pub queue_url: String, finalizers: EventFinalizers, - metadata: RequestMetadata, + pub metadata: RequestMetadata, } impl ByteSizeOf for SendMessageEntry { @@ -154,7 +154,11 @@ impl Finalizable for SendMessageEntry { } impl MetaDescriptive for SendMessageEntry { - fn get_metadata(&self) -> RequestMetadata { - self.metadata + fn get_metadata(&self) -> &RequestMetadata { + &self.metadata + } + + fn metadata_mut(&mut self) -> &mut RequestMetadata { + &mut self.metadata } } diff --git a/src/sinks/aws_sqs/service.rs b/src/sinks/aws_sqs/service.rs index eb6ace3ea287d..dfb6ab10116fa 100644 --- a/src/sinks/aws_sqs/service.rs +++ b/src/sinks/aws_sqs/service.rs @@ -4,12 +4,11 @@ use aws_sdk_sqs::{error::SendMessageError, types::SdkError, Client as SqsClient} use futures::{future::BoxFuture, TryFutureExt}; use tower::Service; use tracing::Instrument; -use vector_core::{ - event::EventStatus, internal_event::CountByteSize, stream::DriverResponse, ByteSizeOf, -}; use vrl::value::Value; use crate::mezmo::user_trace::{UserLoggingError, UserLoggingResponse}; +use vector_common::request_metadata::GroupedCountByteSize; +use vector_core::{event::EventStatus, stream::DriverResponse, ByteSizeOf}; use super::request_builder::SendMessageEntry; @@ -47,7 +46,13 @@ impl Service for SqsService { .set_message_deduplication_id(entry.message_deduplication_id) .queue_url(entry.queue_url) .send() - .map_ok(|_| SendMessageResponse { byte_size }) + .map_ok(|_| SendMessageResponse { + byte_size, + json_byte_size: entry + .metadata + .events_estimated_json_encoded_byte_size() + .clone(), + }) .instrument(info_span!("request").or_current()) .await }) @@ -57,6 +62,7 @@ impl Service for SqsService { #[derive(Debug)] pub(crate) struct SendMessageResponse { byte_size: usize, + json_byte_size: GroupedCountByteSize, } impl DriverResponse for SendMessageResponse { @@ -64,8 +70,12 @@ impl DriverResponse for SendMessageResponse { EventStatus::Delivered } - fn events_sent(&self) -> CountByteSize { - CountByteSize(1, self.byte_size) + fn events_sent(&self) -> &GroupedCountByteSize { + &self.json_byte_size + } + + fn bytes_sent(&self) -> Option { + Some(self.byte_size) } } @@ -73,7 +83,7 @@ impl UserLoggingResponse for SendMessageResponse {} impl UserLoggingError for SdkError { fn log_msg(&self) -> Option { match &self { - SdkError::ServiceError { err, raw: _ } => err.message().map(Into::into), + SdkError::ServiceError(inner) => inner.err().message().map(Into::into), _ => None, // Other errors are not user-facing } } diff --git a/src/sinks/axiom.rs b/src/sinks/axiom.rs index a8a73a52f7c40..54fa54025430a 100644 --- a/src/sinks/axiom.rs +++ b/src/sinks/axiom.rs @@ -16,7 +16,7 @@ use crate::{ static CLOUD_URL: &str = "https://api.axiom.co"; /// Configuration for the `axiom` sink. -#[configurable_component(sink("axiom"))] +#[configurable_component(sink("axiom", "Deliver log events to Axiom."))] #[derive(Clone, Debug, Default)] pub struct AxiomConfig { /// URI of the Axiom endpoint to send data to. @@ -77,6 +77,7 @@ impl GenerateConfig for AxiomConfig { } #[async_trait::async_trait] +#[typetag::serde(name = "axiom")] impl SinkConfig for AxiomConfig { async fn build(&self, cx: SinkContext) -> crate::Result<(VectorSink, Healthcheck)> { let mut request = self.request.clone(); @@ -161,7 +162,7 @@ mod integration_tests { assert!(!token.is_empty(), "$AXIOM_TOKEN required"); let dataset = env::var("AXIOM_DATASET").unwrap(); - let cx = SinkContext::new_test(); + let cx = SinkContext::default(); let config = AxiomConfig { url: Some(url.clone()), diff --git a/src/sinks/azure_blob/config.rs b/src/sinks/azure_blob/config.rs index 90ac2d1b87c0d..52bd2282be970 100644 --- a/src/sinks/azure_blob/config.rs +++ b/src/sinks/azure_blob/config.rs @@ -27,7 +27,10 @@ use crate::{ }; /// Configuration for the `azure_blob` sink. -#[configurable_component(sink("azure_blob"))] +#[configurable_component(sink( + "azure_blob", + "Store your observability data in Azure Blob Storage." +))] #[derive(Clone, Debug)] #[serde(deny_unknown_fields)] pub struct AzureBlobSinkConfig { @@ -166,6 +169,7 @@ impl GenerateConfig for AzureBlobSinkConfig { } #[async_trait::async_trait] +#[typetag::serde(name = "azure_blob")] impl SinkConfig for AzureBlobSinkConfig { async fn build(&self, cx: SinkContext) -> Result<(VectorSink, Healthcheck)> { let client = azure_common::config::build_client( diff --git a/src/sinks/azure_blob/request_builder.rs b/src/sinks/azure_blob/request_builder.rs index f3e85cc5a8e7c..ae8f8770381a5 100644 --- a/src/sinks/azure_blob/request_builder.rs +++ b/src/sinks/azure_blob/request_builder.rs @@ -3,7 +3,7 @@ use chrono::Utc; use codecs::encoding::Framer; use uuid::Uuid; use vector_common::request_metadata::RequestMetadata; -use vector_core::ByteSizeOf; +use vector_core::EstimatedJsonEncodedSizeOf; use crate::{ codecs::{Encoder, Transformer}, @@ -51,7 +51,7 @@ impl RequestBuilder<(String, Vec)> for AzureBlobRequestOptions { let azure_metadata = AzureBlobMetadata { partition_key, count: events.len(), - byte_size: events.size_of(), + byte_size: events.estimated_json_encoded_size_of(), finalizers, }; @@ -106,6 +106,7 @@ impl Compression { Self::None => "text/plain", Self::Gzip(_) => "application/gzip", Self::Zlib(_) => "application/zlib", + Self::Zstd(_) => "application/zstd", } } } diff --git a/src/sinks/azure_common/config.rs b/src/sinks/azure_common/config.rs index 97172e15e89a2..414772906fb0a 100644 --- a/src/sinks/azure_common/config.rs +++ b/src/sinks/azure_common/config.rs @@ -8,8 +8,11 @@ use bytes::Bytes; use futures::FutureExt; use http::StatusCode; use snafu::Snafu; -use vector_common::request_metadata::{MetaDescriptive, RequestMetadata}; -use vector_core::{internal_event::CountByteSize, stream::DriverResponse}; +use vector_common::{ + json_size::JsonSize, + request_metadata::{GroupedCountByteSize, MetaDescriptive, RequestMetadata}, +}; +use vector_core::stream::DriverResponse; use vrl::value::Value; use crate::{ @@ -36,8 +39,12 @@ impl Finalizable for AzureBlobRequest { } impl MetaDescriptive for AzureBlobRequest { - fn get_metadata(&self) -> RequestMetadata { - self.request_metadata + fn get_metadata(&self) -> &RequestMetadata { + &self.request_metadata + } + + fn metadata_mut(&mut self) -> &mut RequestMetadata { + &mut self.request_metadata } } @@ -45,7 +52,7 @@ impl MetaDescriptive for AzureBlobRequest { pub struct AzureBlobMetadata { pub partition_key: String, pub count: usize, - pub byte_size: usize, + pub byte_size: JsonSize, pub finalizers: EventFinalizers, } @@ -65,8 +72,7 @@ impl RetryLogic for AzureBlobRetryLogic { #[derive(Debug)] pub struct AzureBlobResponse { pub inner: PutBlockBlobResponse, - pub count: usize, - pub events_byte_size: usize, + pub events_byte_size: GroupedCountByteSize, pub byte_size: usize, } @@ -75,8 +81,8 @@ impl DriverResponse for AzureBlobResponse { EventStatus::Delivered } - fn events_sent(&self) -> CountByteSize { - CountByteSize(self.count, self.events_byte_size) + fn events_sent(&self) -> &GroupedCountByteSize { + &self.events_byte_size } fn bytes_sent(&self) -> Option { diff --git a/src/sinks/azure_common/mod.rs b/src/sinks/azure_common/mod.rs index f3e4ff73ebe40..4d1c931977f03 100644 --- a/src/sinks/azure_common/mod.rs +++ b/src/sinks/azure_common/mod.rs @@ -1,3 +1,3 @@ -pub(crate) mod config; -pub(crate) mod service; -pub(crate) mod sink; +pub mod config; +pub mod service; +pub mod sink; diff --git a/src/sinks/azure_common/service.rs b/src/sinks/azure_common/service.rs index d61e960fde26e..11709058920c9 100644 --- a/src/sinks/azure_common/service.rs +++ b/src/sinks/azure_common/service.rs @@ -58,8 +58,9 @@ impl Service for AzureBlobService { result.map(|inner| AzureBlobResponse { inner, - count: request.metadata.count, - events_byte_size: request.metadata.byte_size, + events_byte_size: request + .request_metadata + .into_events_estimated_json_encoded_byte_size(), byte_size, }) } diff --git a/src/sinks/azure_monitor_logs.rs b/src/sinks/azure_monitor_logs.rs index 74b8e824a85da..59759defdb525 100644 --- a/src/sinks/azure_monitor_logs.rs +++ b/src/sinks/azure_monitor_logs.rs @@ -39,7 +39,10 @@ fn default_host() -> String { } /// Configuration for the `azure_monitor_logs` sink. -#[configurable_component(sink("azure_monitor_logs"))] +#[configurable_component(sink( + "azure_monitor_logs", + "Publish log events to the Azure Monitor Logs service." +))] #[derive(Clone, Debug)] #[serde(deny_unknown_fields)] pub struct AzureMonitorLogsConfig { @@ -177,6 +180,7 @@ const SHARED_KEY: &str = "SharedKey"; const API_VERSION: &str = "2016-04-01"; #[async_trait::async_trait] +#[typetag::serde(name = "azure_monitor_logs")] impl SinkConfig for AzureMonitorLogsConfig { async fn build(&self, cx: SinkContext) -> crate::Result<(VectorSink, Healthcheck)> { let batch_settings = self @@ -205,6 +209,7 @@ impl SinkConfig for AzureMonitorLogsConfig { ) .sink_map_err(|error| error!(message = "Fatal azure_monitor_logs sink error.", %error)); + #[allow(deprecated)] Ok((VectorSink::from_event_sink(sink), healthcheck)) } @@ -455,7 +460,7 @@ mod tests { default_headers: HeaderMap::new(), }; - let context = SinkContext::new_test(); + let context = SinkContext::default(); let client = HttpClient::new(None, &context.proxy).expect("should not fail to create HTTP client"); @@ -473,6 +478,7 @@ mod tests { .sink_map_err(|error| error!(message = "Fatal azure_monitor_logs sink error.", %error)); let event = Event::Log(LogEvent::from("simple message")); + #[allow(deprecated)] run_and_assert_sink_compliance( VectorSink::from_event_sink(sink), stream::once(ready(event)), @@ -613,7 +619,7 @@ mod tests { "#, ) .unwrap(); - if config.build(SinkContext::new_test()).await.is_ok() { + if config.build(SinkContext::default()).await.is_ok() { panic!("config.build failed to error"); } } @@ -653,7 +659,7 @@ mod tests { "#, ) .unwrap(); - if config.build(SinkContext::new_test()).await.is_ok() { + if config.build(SinkContext::default()).await.is_ok() { panic!("config.build failed to error"); } } diff --git a/src/sinks/blackhole/config.rs b/src/sinks/blackhole/config.rs index e5d878a5434bf..d2d69e75cc44f 100644 --- a/src/sinks/blackhole/config.rs +++ b/src/sinks/blackhole/config.rs @@ -15,7 +15,10 @@ const fn default_print_interval_secs() -> Duration { /// Configuration for the `blackhole` sink. #[serde_as] -#[configurable_component(sink("blackhole"))] +#[configurable_component(sink( + "blackhole", + "Send observability events nowhere, which can be useful for debugging purposes." +))] #[derive(Clone, Debug, Derivative)] #[serde(deny_unknown_fields, default)] #[derivative(Default)] @@ -26,6 +29,7 @@ pub struct BlackholeConfig { #[derivative(Default(value = "default_print_interval_secs()"))] #[serde(default = "default_print_interval_secs")] #[serde_as(as = "serde_with::DurationSeconds")] + #[configurable(metadata(docs::human_name = "Print Interval"))] #[configurable(metadata(docs::examples = 10))] pub print_interval_secs: Duration, @@ -45,6 +49,7 @@ pub struct BlackholeConfig { } #[async_trait::async_trait] +#[typetag::serde(name = "blackhole")] impl SinkConfig for BlackholeConfig { async fn build(&self, _cx: SinkContext) -> crate::Result<(VectorSink, Healthcheck)> { let sink = BlackholeSink::new(self.clone()); diff --git a/src/sinks/blackhole/sink.rs b/src/sinks/blackhole/sink.rs index 92f90377931b1..09fb5f6353d94 100644 --- a/src/sinks/blackhole/sink.rs +++ b/src/sinks/blackhole/sink.rs @@ -92,10 +92,10 @@ impl StreamSink for BlackholeSink { _ = self.total_events.fetch_add(events.len(), Ordering::AcqRel); _ = self .total_raw_bytes - .fetch_add(message_len, Ordering::AcqRel); + .fetch_add(message_len.get(), Ordering::AcqRel); events_sent.emit(CountByteSize(events.len(), message_len)); - bytes_sent.emit(ByteSize(message_len)); + bytes_sent.emit(ByteSize(message_len.get())); } // Notify the reporting task to shutdown. diff --git a/src/sinks/clickhouse/config.rs b/src/sinks/clickhouse/config.rs index a5ae3763f2ad5..0efffb97ef3ce 100644 --- a/src/sinks/clickhouse/config.rs +++ b/src/sinks/clickhouse/config.rs @@ -1,23 +1,20 @@ -use vector_config::configurable_component; +use http::{Request, StatusCode, Uri}; +use hyper::Body; +use super::{ + service::{ClickhouseRetryLogic, ClickhouseService}, + sink::ClickhouseSink, +}; use crate::{ - codecs::Transformer, - config::{AcknowledgementsConfig, Input, SinkConfig, SinkContext}, - http::Auth, + http::{get_http_scheme_from_uri, Auth, HttpClient, MaybeAuth}, sinks::{ - util::{ - BatchConfig, Compression, RealtimeSizeBasedDefaultBatchSettings, TowerRequestConfig, - UriSerde, - }, - Healthcheck, VectorSink, + prelude::*, + util::{RealtimeSizeBasedDefaultBatchSettings, UriSerde}, }, - tls::TlsConfig, }; -use super::http_sink::build_http_sink; - /// Configuration for the `clickhouse` sink. -#[configurable_component(sink("clickhouse"))] +#[configurable_component(sink("clickhouse", "Deliver log data to a ClickHouse database."))] #[derive(Clone, Debug, Default)] #[serde(deny_unknown_fields)] pub struct ClickhouseConfig { @@ -79,11 +76,44 @@ pub struct ClickhouseConfig { impl_generate_config_from_default!(ClickhouseConfig); #[async_trait::async_trait] +#[typetag::serde(name = "clickhouse")] impl SinkConfig for ClickhouseConfig { async fn build(&self, cx: SinkContext) -> crate::Result<(VectorSink, Healthcheck)> { - // later we can build different sink(http, native) here - // according to the clickhouseConfig - build_http_sink(self, cx).await + let endpoint = self.endpoint.with_default_parts().uri; + let protocol = get_http_scheme_from_uri(&endpoint); + + let auth = self.auth.choose_one(&self.endpoint.auth)?; + + let tls_settings = TlsSettings::from_options(&self.tls)?; + let client = HttpClient::new(tls_settings, &cx.proxy)?; + + let service = ClickhouseService::new( + client.clone(), + auth.clone(), + &endpoint, + self.database.as_deref(), + self.table.as_str(), + self.skip_unknown_fields, + self.date_time_best_effort, + )?; + + let request_limits = self.request.unwrap_with(&Default::default()); + let service = ServiceBuilder::new() + .settings(request_limits, ClickhouseRetryLogic::default()) + .service(service); + + let batch_settings = self.batch.into_batcher_settings()?; + let sink = ClickhouseSink::new( + batch_settings, + self.compression, + self.encoding.clone(), + service, + protocol, + ); + + let healthcheck = Box::pin(healthcheck(client, endpoint, auth)); + + Ok((VectorSink::from_event_streamsink(sink), healthcheck)) } fn input(&self) -> Input { @@ -94,3 +124,30 @@ impl SinkConfig for ClickhouseConfig { &self.acknowledgements } } + +async fn healthcheck(client: HttpClient, endpoint: Uri, auth: Option) -> crate::Result<()> { + // TODO: check if table exists? + let uri = format!("{}/?query=SELECT%201", endpoint); + let mut request = Request::get(uri).body(Body::empty()).unwrap(); + + if let Some(auth) = auth { + auth.apply(&mut request); + } + + let response = client.send(request).await?; + + match response.status() { + StatusCode::OK => Ok(()), + status => Err(HealthcheckError::UnexpectedStatus { status }.into()), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn generate_config() { + crate::test_util::test_generate_config::(); + } +} diff --git a/src/sinks/clickhouse/http_sink.rs b/src/sinks/clickhouse/http_sink.rs deleted file mode 100644 index ca8a52f702112..0000000000000 --- a/src/sinks/clickhouse/http_sink.rs +++ /dev/null @@ -1,250 +0,0 @@ -use bytes::{BufMut, Bytes, BytesMut}; -use futures::{FutureExt, SinkExt}; -use http::{Request, StatusCode, Uri}; -use hyper::Body; -use snafu::ResultExt; - -use super::ClickhouseConfig; -use crate::{ - codecs::Transformer, - config::SinkContext, - event::Event, - http::{HttpClient, HttpError, MaybeAuth}, - sinks::{ - util::{ - http::{BatchedHttpSink, HttpEventEncoder, HttpRetryLogic, HttpSink}, - retries::{RetryAction, RetryLogic}, - Buffer, TowerRequestConfig, - }, - Healthcheck, HealthcheckError, UriParseSnafu, VectorSink, - }, - tls::TlsSettings, -}; - -pub(crate) async fn build_http_sink( - cfg: &ClickhouseConfig, - cx: SinkContext, -) -> crate::Result<(VectorSink, Healthcheck)> { - let batch = cfg.batch.into_batch_settings()?; - let request = cfg.request.unwrap_with(&TowerRequestConfig::default()); - let tls_settings = TlsSettings::from_options(&cfg.tls)?; - let client = HttpClient::new(tls_settings, &cx.proxy)?; - - let config = ClickhouseConfig { - auth: cfg.auth.choose_one(&cfg.endpoint.auth)?, - ..cfg.clone() - }; - - let sink = BatchedHttpSink::with_logic( - config.clone(), - Buffer::new(batch.size, cfg.compression), - ClickhouseRetryLogic::default(), - request, - batch.timeout, - client.clone(), - cx, - ) - .sink_map_err(|error| error!(message = "Fatal clickhouse sink error.", %error)); - - let healthcheck = healthcheck(client, config).boxed(); - - Ok((VectorSink::from_event_sink(sink), healthcheck)) -} - -pub struct ClickhouseEventEncoder { - transformer: Transformer, -} - -impl HttpEventEncoder for ClickhouseEventEncoder { - fn encode_event(&mut self, mut event: Event) -> Option { - self.transformer.transform(&mut event); - let log = event.into_log(); - - let mut body = crate::serde::json::to_bytes(&log).expect("Events should be valid json!"); - body.put_u8(b'\n'); - - Some(body) - } -} - -#[async_trait::async_trait] -impl HttpSink for ClickhouseConfig { - type Input = BytesMut; - type Output = BytesMut; - type Encoder = ClickhouseEventEncoder; - - fn build_encoder(&self) -> Self::Encoder { - ClickhouseEventEncoder { - transformer: self.encoding.clone(), - } - } - - async fn build_request(&self, events: Self::Output) -> crate::Result> { - let database = if let Some(database) = &self.database { - database.as_str() - } else { - "default" - }; - - let uri = set_uri_query( - &self.endpoint.with_default_parts().uri, - database, - &self.table, - self.skip_unknown_fields, - self.date_time_best_effort, - ) - .expect("Unable to encode uri"); - - let mut builder = Request::post(&uri).header("Content-Type", "application/x-ndjson"); - - if let Some(ce) = self.compression.content_encoding() { - builder = builder.header("Content-Encoding", ce); - } - - let mut request = builder.body(events.freeze()).unwrap(); - - if let Some(auth) = &self.auth { - auth.apply(&mut request); - } - - Ok(request) - } -} - -async fn healthcheck(client: HttpClient, config: ClickhouseConfig) -> crate::Result<()> { - // TODO: check if table exists? - let uri = format!("{}/?query=SELECT%201", config.endpoint.with_default_parts()); - let mut request = Request::get(uri).body(Body::empty()).unwrap(); - - if let Some(auth) = &config.auth { - auth.apply(&mut request); - } - - let response = client.send(request).await?; - - match response.status() { - StatusCode::OK => Ok(()), - status => Err(HealthcheckError::UnexpectedStatus { status }.into()), - } -} - -fn set_uri_query( - uri: &Uri, - database: &str, - table: &str, - skip_unknown: bool, - date_time_best_effort: bool, -) -> crate::Result { - let query = url::form_urlencoded::Serializer::new(String::new()) - .append_pair( - "query", - format!( - "INSERT INTO \"{}\".\"{}\" FORMAT JSONEachRow", - database, - table.replace('\"', "\\\"") - ) - .as_str(), - ) - .finish(); - - let mut uri = uri.to_string(); - if !uri.ends_with('/') { - uri.push('/'); - } - uri.push_str("?input_format_import_nested_json=1&"); - if skip_unknown { - uri.push_str("input_format_skip_unknown_fields=1&"); - } - if date_time_best_effort { - uri.push_str("date_time_input_format=best_effort&") - } - uri.push_str(query.as_str()); - - uri.parse::() - .context(UriParseSnafu) - .map_err(Into::into) -} - -#[derive(Debug, Default, Clone)] -struct ClickhouseRetryLogic { - inner: HttpRetryLogic, -} - -impl RetryLogic for ClickhouseRetryLogic { - type Error = HttpError; - type Response = http::Response; - - fn is_retriable_error(&self, error: &Self::Error) -> bool { - self.inner.is_retriable_error(error) - } - - fn should_retry_response(&self, response: &Self::Response) -> RetryAction { - match response.status() { - StatusCode::INTERNAL_SERVER_ERROR => { - let body = response.body(); - - // Currently, ClickHouse returns 500's incorrect data and type mismatch errors. - // This attempts to check if the body starts with `Code: {code_num}` and to not - // retry those errors. - // - // Reference: https://github.com/vectordotdev/vector/pull/693#issuecomment-517332654 - // Error code definitions: https://github.com/ClickHouse/ClickHouse/blob/master/dbms/src/Common/ErrorCodes.cpp - // - // Fix already merged: https://github.com/ClickHouse/ClickHouse/pull/6271 - if body.starts_with(b"Code: 117") { - RetryAction::DontRetry("incorrect data".into()) - } else if body.starts_with(b"Code: 53") { - RetryAction::DontRetry("type mismatch".into()) - } else { - RetryAction::Retry(String::from_utf8_lossy(body).to_string().into()) - } - } - _ => self.inner.should_retry_response(response), - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn generate_config() { - crate::test_util::test_generate_config::(); - } - - #[test] - fn encode_valid() { - let uri = set_uri_query( - &"http://localhost:80".parse().unwrap(), - "my_database", - "my_table", - false, - true, - ) - .unwrap(); - assert_eq!(uri.to_string(), "http://localhost:80/?input_format_import_nested_json=1&date_time_input_format=best_effort&query=INSERT+INTO+%22my_database%22.%22my_table%22+FORMAT+JSONEachRow"); - - let uri = set_uri_query( - &"http://localhost:80".parse().unwrap(), - "my_database", - "my_\"table\"", - false, - false, - ) - .unwrap(); - assert_eq!(uri.to_string(), "http://localhost:80/?input_format_import_nested_json=1&query=INSERT+INTO+%22my_database%22.%22my_%5C%22table%5C%22%22+FORMAT+JSONEachRow"); - } - - #[test] - fn encode_invalid() { - set_uri_query( - &"localhost:80".parse().unwrap(), - "my_database", - "my_table", - false, - false, - ) - .unwrap_err(); - } -} diff --git a/src/sinks/clickhouse/integration_tests.rs b/src/sinks/clickhouse/integration_tests.rs index 1061b8ddf8e11..f16b19ffcc7ed 100644 --- a/src/sinks/clickhouse/integration_tests.rs +++ b/src/sinks/clickhouse/integration_tests.rs @@ -24,7 +24,7 @@ use crate::{ config::{log_schema, SinkConfig, SinkContext}, sinks::util::{BatchConfig, Compression, TowerRequestConfig}, test_util::{ - components::{run_and_assert_sink_compliance, HTTP_SINK_TAGS}, + components::{run_and_assert_sink_compliance, SINK_TAGS}, random_string, trace_init, }, }; @@ -63,19 +63,15 @@ async fn insert_events() { ) .await; - let (sink, _hc) = config.build(SinkContext::new_test()).await.unwrap(); + let (sink, _hc) = config.build(SinkContext::default()).await.unwrap(); let (mut input_event, mut receiver) = make_event(); input_event .as_mut_log() .insert("items", vec!["item1", "item2"]); - run_and_assert_sink_compliance( - sink, - stream::once(ready(input_event.clone())), - &HTTP_SINK_TAGS, - ) - .await; + run_and_assert_sink_compliance(sink, stream::once(ready(input_event.clone())), &SINK_TAGS) + .await; let output = client.select_all(&table).await; assert_eq!(1, output.rows); @@ -114,17 +110,13 @@ async fn skip_unknown_fields() { .create_table(&table, "host String, timestamp String, message String") .await; - let (sink, _hc) = config.build(SinkContext::new_test()).await.unwrap(); + let (sink, _hc) = config.build(SinkContext::default()).await.unwrap(); let (mut input_event, mut receiver) = make_event(); input_event.as_mut_log().insert("unknown", "mysteries"); - run_and_assert_sink_compliance( - sink, - stream::once(ready(input_event.clone())), - &HTTP_SINK_TAGS, - ) - .await; + run_and_assert_sink_compliance(sink, stream::once(ready(input_event.clone())), &SINK_TAGS) + .await; let output = client.select_all(&table).await; assert_eq!(1, output.rows); @@ -167,16 +159,12 @@ async fn insert_events_unix_timestamps() { ) .await; - let (sink, _hc) = config.build(SinkContext::new_test()).await.unwrap(); + let (sink, _hc) = config.build(SinkContext::default()).await.unwrap(); let (mut input_event, _receiver) = make_event(); - run_and_assert_sink_compliance( - sink, - stream::once(ready(input_event.clone())), - &HTTP_SINK_TAGS, - ) - .await; + run_and_assert_sink_compliance(sink, stream::once(ready(input_event.clone())), &SINK_TAGS) + .await; let output = client.select_all(&table).await; assert_eq!(1, output.rows); @@ -235,16 +223,12 @@ timestamp_format = "unix""#, ) .await; - let (sink, _hc) = config.build(SinkContext::new_test()).await.unwrap(); + let (sink, _hc) = config.build(SinkContext::default()).await.unwrap(); let (mut input_event, _receiver) = make_event(); - run_and_assert_sink_compliance( - sink, - stream::once(ready(input_event.clone())), - &HTTP_SINK_TAGS, - ) - .await; + run_and_assert_sink_compliance(sink, stream::once(ready(input_event.clone())), &SINK_TAGS) + .await; let output = client.select_all(&table).await; assert_eq!(1, output.rows); @@ -292,13 +276,13 @@ async fn no_retry_on_incorrect_data() { }; let client = ClickhouseClient::new(host); - // the event contains a message field, but its being omitted to - // fail the request. + // The event contains a message field, but it's of type String, which will cause + // the request to fail. client - .create_table(&table, "host String, timestamp String") + .create_table(&table, "host String, timestamp String, message Int32") .await; - let (sink, _hc) = config.build(SinkContext::new_test()).await.unwrap(); + let (sink, _hc) = config.build(SinkContext::default()).await.unwrap(); let (input_event, mut receiver) = make_event(); @@ -340,7 +324,7 @@ async fn no_retry_on_incorrect_data_warp() { batch, ..Default::default() }; - let (sink, _hc) = config.build(SinkContext::new_test()).await.unwrap(); + let (sink, _hc) = config.build(SinkContext::default()).await.unwrap(); let (input_event, mut receiver) = make_event(); @@ -351,7 +335,7 @@ async fn no_retry_on_incorrect_data_warp() { .unwrap() .unwrap(); - assert_eq!(receiver.try_recv(), Ok(BatchStatus::Errored)); + assert_eq!(receiver.try_recv(), Ok(BatchStatus::Rejected)); } fn make_event() -> (Event, BatchStatusReceiver) { @@ -378,7 +362,6 @@ impl ClickhouseClient { let response = self .client .post(&self.host) - // .body(format!( "CREATE TABLE {} ({}) diff --git a/src/sinks/clickhouse/mod.rs b/src/sinks/clickhouse/mod.rs index 2f4e5af1870fc..488df9181a8bf 100644 --- a/src/sinks/clickhouse/mod.rs +++ b/src/sinks/clickhouse/mod.rs @@ -1,5 +1,17 @@ +//! The Clickhouse [`vector_core::sink::VectorSink`] +//! +//! This module contains the [`vector_core::sink::VectorSink`] instance that is responsible for +//! taking a stream of [`vector_core::event::Event`] instances and forwarding them to Clickhouse. +//! +//! Events are sent to Clickhouse using the HTTP interface with a query of the following structure: +//! `INSERT INTO my_db.my_table FORMAT JSONEachRow`. The event payload is encoded as new-line +//! delimited JSON. +//! +//! This sink only supports logs for now but could support metrics and traces as well in the future. + mod config; -mod http_sink; #[cfg(all(test, feature = "clickhouse-integration-tests"))] mod integration_tests; +mod service; +mod sink; pub use self::config::ClickhouseConfig; diff --git a/src/sinks/clickhouse/service.rs b/src/sinks/clickhouse/service.rs new file mode 100644 index 0000000000000..ecce62e537290 --- /dev/null +++ b/src/sinks/clickhouse/service.rs @@ -0,0 +1,257 @@ +use bytes::Bytes; +use http::{ + header::{CONTENT_ENCODING, CONTENT_LENGTH, CONTENT_TYPE}, + Request, Response, StatusCode, Uri, +}; +use hyper::{body, Body}; +use snafu::ResultExt; +use std::task::{Context, Poll}; +use tracing::Instrument; + +use crate::{ + http::{Auth, HttpClient, HttpError}, + sinks::{ + prelude::*, + util::{http::HttpRetryLogic, retries::RetryAction}, + UriParseSnafu, + }, +}; + +#[derive(Debug, Clone)] +pub struct ClickhouseRequest { + pub body: Bytes, + pub compression: Compression, + pub finalizers: EventFinalizers, + pub metadata: RequestMetadata, +} + +impl MetaDescriptive for ClickhouseRequest { + fn get_metadata(&self) -> &RequestMetadata { + &self.metadata + } + + fn metadata_mut(&mut self) -> &mut RequestMetadata { + &mut self.metadata + } +} + +impl Finalizable for ClickhouseRequest { + fn take_finalizers(&mut self) -> EventFinalizers { + self.finalizers.take_finalizers() + } +} + +pub struct ClickhouseResponse { + http_response: Response, + events_byte_size: GroupedCountByteSize, + raw_byte_size: usize, +} + +impl DriverResponse for ClickhouseResponse { + fn event_status(&self) -> EventStatus { + match self.http_response.status().is_success() { + true => EventStatus::Delivered, + false => EventStatus::Rejected, + } + } + + fn events_sent(&self) -> &GroupedCountByteSize { + &self.events_byte_size + } + + fn bytes_sent(&self) -> Option { + Some(self.raw_byte_size) + } +} + +#[derive(Debug, Default, Clone)] +pub struct ClickhouseRetryLogic { + inner: HttpRetryLogic, +} + +impl RetryLogic for ClickhouseRetryLogic { + type Error = HttpError; + type Response = ClickhouseResponse; + + fn is_retriable_error(&self, error: &Self::Error) -> bool { + self.inner.is_retriable_error(error) + } + + fn should_retry_response(&self, response: &Self::Response) -> RetryAction { + match response.http_response.status() { + StatusCode::INTERNAL_SERVER_ERROR => { + let body = response.http_response.body(); + + // Currently, ClickHouse returns 500's incorrect data and type mismatch errors. + // This attempts to check if the body starts with `Code: {code_num}` and to not + // retry those errors. + // + // Reference: https://github.com/vectordotdev/vector/pull/693#issuecomment-517332654 + // Error code definitions: https://github.com/ClickHouse/ClickHouse/blob/master/dbms/src/Common/ErrorCodes.cpp + // + // Fix already merged: https://github.com/ClickHouse/ClickHouse/pull/6271 + if body.starts_with(b"Code: 117") { + RetryAction::DontRetry("incorrect data".into()) + } else if body.starts_with(b"Code: 53") { + RetryAction::DontRetry("type mismatch".into()) + } else { + RetryAction::Retry(String::from_utf8_lossy(body).to_string().into()) + } + } + _ => self.inner.should_retry_response(&response.http_response), + } + } +} + +/// `ClickhouseService` is a `Tower` service used to send logs to Clickhouse. +#[derive(Debug, Clone)] +pub struct ClickhouseService { + client: HttpClient, + uri: Uri, + auth: Option, +} + +impl ClickhouseService { + /// Creates a new `ClickhouseService`. + pub fn new( + client: HttpClient, + auth: Option, + endpoint: &Uri, + database: Option<&str>, + table: &str, + skip_unknown_fields: bool, + date_time_best_effort: bool, + ) -> crate::Result { + // Set the URI query once during initialization, as it won't change throughout the lifecycle + // of the service. + let uri = set_uri_query( + endpoint, + database.unwrap_or("default"), + table, + skip_unknown_fields, + date_time_best_effort, + )?; + Ok(Self { client, auth, uri }) + } +} + +impl Service for ClickhouseService { + type Response = ClickhouseResponse; + type Error = crate::Error; + type Future = BoxFuture<'static, Result>; + + // Emission of Error internal event is handled upstream by the caller. + fn poll_ready(&mut self, _cx: &mut Context) -> Poll> { + Poll::Ready(Ok(())) + } + + // Emission of Error internal event is handled upstream by the caller. + fn call(&mut self, request: ClickhouseRequest) -> Self::Future { + let mut client = self.client.clone(); + + let mut builder = Request::post(&self.uri) + .header(CONTENT_TYPE, "application/x-ndjson") + .header(CONTENT_LENGTH, request.body.len()); + if let Some(ce) = request.compression.content_encoding() { + builder = builder.header(CONTENT_ENCODING, ce); + } + if let Some(auth) = &self.auth { + builder = auth.apply_builder(builder); + } + + let http_request = builder + .body(Body::from(request.body)) + .expect("building HTTP request failed unexpectedly"); + + Box::pin(async move { + let response = client.call(http_request).in_current_span().await?; + let (parts, body) = response.into_parts(); + let body = body::to_bytes(body).await?; + Ok(ClickhouseResponse { + http_response: hyper::Response::from_parts(parts, body), + raw_byte_size: request.metadata.request_encoded_size(), + events_byte_size: request + .metadata + .into_events_estimated_json_encoded_byte_size(), + }) + }) + } +} + +fn set_uri_query( + uri: &Uri, + database: &str, + table: &str, + skip_unknown: bool, + date_time_best_effort: bool, +) -> crate::Result { + let query = url::form_urlencoded::Serializer::new(String::new()) + .append_pair( + "query", + format!( + "INSERT INTO \"{}\".\"{}\" FORMAT JSONEachRow", + database, + table.replace('\"', "\\\"") + ) + .as_str(), + ) + .finish(); + + let mut uri = uri.to_string(); + if !uri.ends_with('/') { + uri.push('/'); + } + + uri.push_str("?input_format_import_nested_json=1&"); + if skip_unknown { + uri.push_str("input_format_skip_unknown_fields=1&"); + } + if date_time_best_effort { + uri.push_str("date_time_input_format=best_effort&") + } + uri.push_str(query.as_str()); + + uri.parse::() + .context(UriParseSnafu) + .map_err(Into::into) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn encode_valid() { + let uri = set_uri_query( + &"http://localhost:80".parse().unwrap(), + "my_database", + "my_table", + false, + true, + ) + .unwrap(); + assert_eq!(uri.to_string(), "http://localhost:80/?input_format_import_nested_json=1&date_time_input_format=best_effort&query=INSERT+INTO+%22my_database%22.%22my_table%22+FORMAT+JSONEachRow"); + + let uri = set_uri_query( + &"http://localhost:80".parse().unwrap(), + "my_database", + "my_\"table\"", + false, + false, + ) + .unwrap(); + assert_eq!(uri.to_string(), "http://localhost:80/?input_format_import_nested_json=1&query=INSERT+INTO+%22my_database%22.%22my_%5C%22table%5C%22%22+FORMAT+JSONEachRow"); + } + + #[test] + fn encode_invalid() { + set_uri_query( + &"localhost:80".parse().unwrap(), + "my_database", + "my_table", + false, + false, + ) + .unwrap_err(); + } +} diff --git a/src/sinks/clickhouse/sink.rs b/src/sinks/clickhouse/sink.rs new file mode 100644 index 0000000000000..805cc50bcf4bc --- /dev/null +++ b/src/sinks/clickhouse/sink.rs @@ -0,0 +1,117 @@ +use bytes::Bytes; +use codecs::{encoding::Framer, JsonSerializerConfig, NewlineDelimitedEncoderConfig}; + +use super::service::{ClickhouseRequest, ClickhouseRetryLogic, ClickhouseService}; +use crate::{internal_events::SinkRequestBuildError, sinks::prelude::*}; + +pub struct ClickhouseSink { + batch_settings: BatcherSettings, + compression: Compression, + encoding: (Transformer, Encoder), + service: Svc, + protocol: &'static str, +} + +impl ClickhouseSink { + pub fn new( + batch_settings: BatcherSettings, + compression: Compression, + transformer: Transformer, + service: Svc, + protocol: &'static str, + ) -> Self { + Self { + batch_settings, + compression, + encoding: ( + transformer, + Encoder::::new( + NewlineDelimitedEncoderConfig::default().build().into(), + JsonSerializerConfig::default().build().into(), + ), + ), + service, + protocol, + } + } + + async fn run_inner(self: Box, input: BoxStream<'_, Event>) -> Result<(), ()> { + input + .batched(self.batch_settings.into_byte_size_config()) + .request_builder( + None, + ClickhouseRequestBuilder { + compression: self.compression, + encoding: self.encoding, + }, + ) + .filter_map(|request| async { + match request { + Err(error) => { + emit!(SinkRequestBuildError { error }); + None + } + Ok(req) => Some(req), + } + }) + .into_driver(self.service) + .protocol(self.protocol) + .run() + .await + } +} + +#[async_trait::async_trait] +impl StreamSink for ClickhouseSink { + async fn run( + self: Box, + input: futures_util::stream::BoxStream<'_, Event>, + ) -> Result<(), ()> { + self.run_inner(input).await + } +} + +struct ClickhouseRequestBuilder { + compression: Compression, + encoding: (Transformer, Encoder), +} + +impl RequestBuilder> for ClickhouseRequestBuilder { + type Metadata = EventFinalizers; + type Events = Vec; + type Encoder = (Transformer, Encoder); + type Payload = Bytes; + type Request = ClickhouseRequest; + type Error = std::io::Error; + + fn compression(&self) -> Compression { + self.compression + } + + fn encoder(&self) -> &Self::Encoder { + &self.encoding + } + + fn split_input( + &self, + mut events: Vec, + ) -> (Self::Metadata, RequestMetadataBuilder, Self::Events) { + let finalizers = events.take_finalizers(); + let builder = RequestMetadataBuilder::from_events(&events); + (finalizers, builder, events) + } + + fn build_request( + &self, + metadata: Self::Metadata, + request_metadata: RequestMetadata, + payload: EncodeResult, + ) -> Self::Request { + ClickhouseRequest { + body: payload.into_payload(), + compression: self.compression, + finalizers: metadata, + metadata: request_metadata, + } + } +} diff --git a/src/sinks/console/config.rs b/src/sinks/console/config.rs index dcdefb12fb949..3aa1c46bb62af 100644 --- a/src/sinks/console/config.rs +++ b/src/sinks/console/config.rs @@ -33,7 +33,10 @@ pub enum Target { } /// Configuration for the `console` sink. -#[configurable_component(sink("console"))] +#[configurable_component(sink( + "console", + "Display observability events in the console, which can be useful for debugging purposes." +))] #[derive(Clone, Debug)] #[serde(deny_unknown_fields)] pub struct ConsoleSinkConfig { @@ -69,6 +72,7 @@ impl GenerateConfig for ConsoleSinkConfig { } #[async_trait::async_trait] +#[typetag::serde(name = "console")] impl SinkConfig for ConsoleSinkConfig { async fn build(&self, _cx: SinkContext) -> crate::Result<(VectorSink, Healthcheck)> { let transformer = self.encoding.transformer(); diff --git a/src/sinks/databend/api.rs b/src/sinks/databend/api.rs index 73d769ff6a67a..a3b87e9a72c1a 100644 --- a/src/sinks/databend/api.rs +++ b/src/sinks/databend/api.rs @@ -111,19 +111,8 @@ impl DatabendAPIClient { async fn do_request( &self, - url: String, - req: Option, + mut request: Request, ) -> Result { - let body = match req { - Some(r) => { - let body = serde_json::to_vec(&r)?; - Body::from(body) - } - None => Body::empty(), - }; - let mut request = Request::post(url) - .header("Content-Type", "application/json") - .body(body)?; if let Some(a) = &self.auth { a.apply(&mut request); } @@ -163,7 +152,10 @@ impl DatabendAPIClient { next_uri: String, ) -> Result { let endpoint = self.get_page_endpoint(&next_uri)?; - self.do_request(endpoint, None).await + let request = Request::get(endpoint) + .header("Content-Type", "application/json") + .body(Body::empty())?; + self.do_request(request).await } pub(super) async fn query( @@ -171,7 +163,10 @@ impl DatabendAPIClient { req: DatabendHttpRequest, ) -> Result { let endpoint = self.get_query_endpoint()?; - let resp = self.do_request(endpoint, Some(req)).await?; + let request = Request::post(endpoint) + .header("Content-Type", "application/json") + .body(Body::from(serde_json::to_vec(&req)?))?; + let resp = self.do_request(request).await?; match resp.next_uri { None => Ok(resp), Some(_) => { diff --git a/src/sinks/databend/config.rs b/src/sinks/databend/config.rs index 4a47adb01de8b..69a464090c445 100644 --- a/src/sinks/databend/config.rs +++ b/src/sinks/databend/config.rs @@ -30,7 +30,7 @@ use super::{ }; /// Configuration for the `databend` sink. -#[configurable_component(sink("databend"))] +#[configurable_component(sink("databend", "Deliver log data to a Databend database."))] #[derive(Clone, Debug)] #[serde(deny_unknown_fields)] pub struct DatabendConfig { @@ -103,6 +103,7 @@ impl DatabendConfig { } #[async_trait::async_trait] +#[typetag::serde(name = "databend")] impl SinkConfig for DatabendConfig { async fn build(&self, cx: SinkContext) -> crate::Result<(VectorSink, Healthcheck)> { let auth = self.auth.choose_one(&self.endpoint.auth)?; diff --git a/src/sinks/databend/integration_tests.rs b/src/sinks/databend/integration_tests.rs index b8c647c6f7d2d..5438775a4d385 100644 --- a/src/sinks/databend/integration_tests.rs +++ b/src/sinks/databend/integration_tests.rs @@ -124,7 +124,7 @@ async fn insert_event_with_cfg(cfg: String, table: String, client: DatabendAPICl .unwrap(); let (config, _) = load_sink::(&cfg).unwrap(); - let (sink, _hc) = config.build(SinkContext::new_test()).await.unwrap(); + let (sink, _hc) = config.build(SinkContext::default()).await.unwrap(); let (input_event, mut receiver) = make_event(); run_and_assert_sink_compliance( diff --git a/src/sinks/databend/service.rs b/src/sinks/databend/service.rs index 23da26f560d41..05a8b4629a767 100644 --- a/src/sinks/databend/service.rs +++ b/src/sinks/databend/service.rs @@ -9,8 +9,7 @@ use rand_distr::Alphanumeric; use snafu::Snafu; use tower::Service; use vector_common::finalization::{EventFinalizers, EventStatus, Finalizable}; -use vector_common::internal_event::CountByteSize; -use vector_common::request_metadata::{MetaDescriptive, RequestMetadata}; +use vector_common::request_metadata::{GroupedCountByteSize, MetaDescriptive, RequestMetadata}; use vector_core::stream::DriverResponse; use crate::{internal_events::EndpointBytesSent, sinks::util::retries::RetryLogic}; @@ -67,8 +66,12 @@ impl Finalizable for DatabendRequest { } impl MetaDescriptive for DatabendRequest { - fn get_metadata(&self) -> RequestMetadata { - self.metadata + fn get_metadata(&self) -> &RequestMetadata { + &self.metadata + } + + fn metadata_mut(&mut self) -> &mut RequestMetadata { + &mut self.metadata } } @@ -82,11 +85,8 @@ impl DriverResponse for DatabendResponse { EventStatus::Delivered } - fn events_sent(&self) -> CountByteSize { - CountByteSize( - self.metadata.event_count(), - self.metadata.events_byte_size(), - ) + fn events_sent(&self) -> &GroupedCountByteSize { + self.metadata.events_estimated_json_encoded_byte_size() } fn bytes_sent(&self) -> Option { @@ -205,7 +205,7 @@ impl Service for DatabendService { let service = self.clone(); let future = async move { - let metadata = request.get_metadata(); + let metadata = request.get_metadata().clone(); let stage_location = service.new_stage_location(); let protocol = service.client.get_protocol(); let endpoint = service.client.get_host(); diff --git a/src/sinks/datadog/events/config.rs b/src/sinks/datadog/events/config.rs index 8ca941d1cf0cb..606d364b32ec8 100644 --- a/src/sinks/datadog/events/config.rs +++ b/src/sinks/datadog/events/config.rs @@ -24,7 +24,10 @@ use crate::{ }; /// Configuration for the `datadog_events` sink. -#[configurable_component(sink("datadog_events"))] +#[configurable_component(sink( + "datadog_events", + "Publish observability events to the Datadog Events API." +))] #[derive(Clone, Debug, Default)] #[serde(deny_unknown_fields)] pub struct DatadogEventsConfig { @@ -89,6 +92,7 @@ impl DatadogEventsConfig { } #[async_trait::async_trait] +#[typetag::serde(name = "datadog_events")] impl SinkConfig for DatadogEventsConfig { async fn build(&self, cx: SinkContext) -> crate::Result<(VectorSink, Healthcheck)> { let client = self.build_client(cx.proxy())?; diff --git a/src/sinks/datadog/events/request_builder.rs b/src/sinks/datadog/events/request_builder.rs index 664f99beca436..93e4eeeb17c31 100644 --- a/src/sinks/datadog/events/request_builder.rs +++ b/src/sinks/datadog/events/request_builder.rs @@ -42,8 +42,12 @@ impl ElementCount for DatadogEventsRequest { } impl MetaDescriptive for DatadogEventsRequest { - fn get_metadata(&self) -> RequestMetadata { - self.request_metadata + fn get_metadata(&self) -> &RequestMetadata { + &self.request_metadata + } + + fn metadata_mut(&mut self) -> &mut RequestMetadata { + &mut self.request_metadata } } @@ -86,7 +90,7 @@ impl RequestBuilder for DatadogEventsRequestBuilder { } fn split_input(&self, event: Event) -> (Self::Metadata, RequestMetadataBuilder, Self::Events) { - let builder = RequestMetadataBuilder::from_events(&event); + let builder = RequestMetadataBuilder::from_event(&event); let mut log = event.into_log(); let metadata = Metadata { diff --git a/src/sinks/datadog/events/service.rs b/src/sinks/datadog/events/service.rs index 25669c45a9ff0..374bd3268b802 100644 --- a/src/sinks/datadog/events/service.rs +++ b/src/sinks/datadog/events/service.rs @@ -8,8 +8,8 @@ use futures::{ use http::Request; use hyper::Body; use tower::{Service, ServiceExt}; -use vector_common::request_metadata::MetaDescriptive; -use vector_core::{internal_event::CountByteSize, stream::DriverResponse}; +use vector_common::request_metadata::{GroupedCountByteSize, MetaDescriptive}; +use vector_core::stream::DriverResponse; use crate::{ event::EventStatus, @@ -23,7 +23,7 @@ use crate::{ pub struct DatadogEventsResponse { pub(self) event_status: EventStatus, pub http_status: http::StatusCode, - pub event_byte_size: usize, + pub event_byte_size: GroupedCountByteSize, } impl DriverResponse for DatadogEventsResponse { @@ -31,8 +31,8 @@ impl DriverResponse for DatadogEventsResponse { self.event_status } - fn events_sent(&self) -> CountByteSize { - CountByteSize(1, self.event_byte_size) + fn events_sent(&self) -> &GroupedCountByteSize { + &self.event_byte_size } fn bytes_sent(&self) -> Option { @@ -85,12 +85,13 @@ impl Service for DatadogEventsService { } // Emission of Error internal event is handled upstream by the caller - fn call(&mut self, req: DatadogEventsRequest) -> Self::Future { + fn call(&mut self, mut req: DatadogEventsRequest) -> Self::Future { let mut http_service = self.batch_http_service.clone(); Box::pin(async move { + let metadata = std::mem::take(req.metadata_mut()); http_service.ready().await?; - let event_byte_size = req.get_metadata().events_byte_size(); + let event_byte_size = metadata.into_events_estimated_json_encoded_byte_size(); let http_response = http_service.call(req).await?; let event_status = if http_response.is_successful() { EventStatus::Delivered diff --git a/src/sinks/datadog/logs/config.rs b/src/sinks/datadog/logs/config.rs index dadf9d884b752..91071379133d3 100644 --- a/src/sinks/datadog/logs/config.rs +++ b/src/sinks/datadog/logs/config.rs @@ -47,7 +47,7 @@ impl SinkBatchSettings for DatadogLogsDefaultBatchSettings { } /// Configuration for the `datadog_logs` sink. -#[configurable_component(sink("datadog_logs"))] +#[configurable_component(sink("datadog_logs", "Publish log events to Datadog."))] #[derive(Clone, Debug, Default)] #[serde(deny_unknown_fields)] pub struct DatadogLogsConfig { @@ -163,6 +163,7 @@ impl DatadogLogsConfig { } #[async_trait::async_trait] +#[typetag::serde(name = "datadog_logs")] impl SinkConfig for DatadogLogsConfig { async fn build(&self, cx: SinkContext) -> crate::Result<(VectorSink, Healthcheck)> { let client = self.create_client(&cx.proxy)?; @@ -196,7 +197,7 @@ impl SinkConfig for DatadogLogsConfig { #[cfg(test)] mod test { - use crate::sinks::datadog::logs::DatadogLogsConfig; + use super::super::config::DatadogLogsConfig; #[test] fn generate_config() { diff --git a/src/sinks/datadog/logs/integration_tests.rs b/src/sinks/datadog/logs/integration_tests.rs index 78156c1cf5c77..0d34486ec0897 100644 --- a/src/sinks/datadog/logs/integration_tests.rs +++ b/src/sinks/datadog/logs/integration_tests.rs @@ -1,9 +1,9 @@ use indoc::indoc; use vector_core::event::{BatchNotifier, BatchStatus}; +use super::config::DatadogLogsConfig; use crate::{ config::SinkConfig, - sinks::datadog::logs::DatadogLogsConfig, sinks::util::test::load_sink, test_util::{ components::{run_and_assert_sink_compliance, SINK_TAGS}, diff --git a/src/sinks/datadog/logs/mod.rs b/src/sinks/datadog/logs/mod.rs index 02b857a160181..0f99ee8dc8947 100644 --- a/src/sinks/datadog/logs/mod.rs +++ b/src/sinks/datadog/logs/mod.rs @@ -28,4 +28,4 @@ mod config; mod service; mod sink; -pub(crate) use config::DatadogLogsConfig; +pub use self::config::DatadogLogsConfig; diff --git a/src/sinks/datadog/logs/service.rs b/src/sinks/datadog/logs/service.rs index 9dcfb9ed0e2ce..07de315c886ed 100644 --- a/src/sinks/datadog/logs/service.rs +++ b/src/sinks/datadog/logs/service.rs @@ -14,10 +14,9 @@ use hyper::Body; use indexmap::IndexMap; use tower::Service; use tracing::Instrument; -use vector_common::request_metadata::{MetaDescriptive, RequestMetadata}; +use vector_common::request_metadata::{GroupedCountByteSize, MetaDescriptive, RequestMetadata}; use vector_core::{ event::{EventFinalizers, EventStatus, Finalizable}, - internal_event::CountByteSize, stream::DriverResponse, }; @@ -57,16 +56,19 @@ impl Finalizable for LogApiRequest { } impl MetaDescriptive for LogApiRequest { - fn get_metadata(&self) -> RequestMetadata { - self.metadata + fn get_metadata(&self) -> &RequestMetadata { + &self.metadata + } + + fn metadata_mut(&mut self) -> &mut RequestMetadata { + &mut self.metadata } } #[derive(Debug)] pub struct LogApiResponse { event_status: EventStatus, - count: usize, - events_byte_size: usize, + events_byte_size: GroupedCountByteSize, raw_byte_size: usize, } @@ -75,8 +77,8 @@ impl DriverResponse for LogApiResponse { self.event_status } - fn events_sent(&self) -> CountByteSize { - CountByteSize(self.count, self.events_byte_size) + fn events_sent(&self) -> &GroupedCountByteSize { + &self.events_byte_size } fn bytes_sent(&self) -> Option { @@ -125,7 +127,7 @@ impl Service for LogApiService { } // Emission of Error internal event is handled upstream by the caller - fn call(&mut self, request: LogApiRequest) -> Self::Future { + fn call(&mut self, mut request: LogApiRequest) -> Self::Future { let mut client = self.client.clone(); let http_request = Request::post(&self.uri) .header(CONTENT_TYPE, "application/json") @@ -139,8 +141,8 @@ impl Service for LogApiService { http_request }; - let count = request.get_metadata().event_count(); - let events_byte_size = request.get_metadata().events_byte_size(); + let metadata = std::mem::take(request.metadata_mut()); + let events_byte_size = metadata.into_events_estimated_json_encoded_byte_size(); let raw_byte_size = request.uncompressed_size; let mut http_request = http_request.header(CONTENT_LENGTH, request.body.len()); @@ -160,7 +162,6 @@ impl Service for LogApiService { DatadogApiError::from_result(client.call(http_request).in_current_span().await).map( |_| LogApiResponse { event_status: EventStatus::Delivered, - count, events_byte_size, raw_byte_size, }, diff --git a/src/sinks/datadog/logs/tests.rs b/src/sinks/datadog/logs/tests.rs index 86430fd5890bf..c8ef154280e4f 100644 --- a/src/sinks/datadog/logs/tests.rs +++ b/src/sinks/datadog/logs/tests.rs @@ -11,28 +11,29 @@ use futures::{ use http::request::Parts; use hyper::StatusCode; use indoc::indoc; -use vector_core::event::{BatchNotifier, BatchStatus, Event, LogEvent}; +use vector_core::{ + config::{init_telemetry, Tags, Telemetry}, + event::{BatchNotifier, BatchStatus, Event, LogEvent}, +}; use crate::{ config::SinkConfig, http::HttpError, sinks::{ - datadog::logs::DatadogLogsConfig, - datadog::DatadogApiError, util::retries::RetryLogic, util::test::{build_test_server_status, load_sink}, }, test_util::{ components::{ - run_and_assert_sink_compliance, run_and_assert_sink_error, COMPONENT_ERROR_TAGS, - SINK_TAGS, + run_and_assert_data_volume_sink_compliance, run_and_assert_sink_compliance, + run_and_assert_sink_error, COMPONENT_ERROR_TAGS, DATA_VOLUME_SINK_TAGS, SINK_TAGS, }, next_addr, random_lines_with_stream, }, tls::TlsError, }; -use super::service::LogApiRetry; +use super::{super::DatadogApiError, config::DatadogLogsConfig, service::LogApiRetry}; // The sink must support v1 and v2 API endpoints which have different codes for // signaling status. This enum allows us to signal which API endpoint and what @@ -73,6 +74,13 @@ fn event_with_api_key(msg: &str, key: &str) -> Event { e } +#[derive(PartialEq)] +enum TestType { + Happy, + Telemetry, + Error, +} + /// Starts a test sink with random lines running into it /// /// This function starts a Datadog Logs sink with a simplistic configuration and @@ -85,8 +93,20 @@ fn event_with_api_key(msg: &str, key: &str) -> Event { async fn start_test_detail( api_status: ApiStatus, batch_status: BatchStatus, - is_error: bool, + test_type: TestType, ) -> (Vec, Receiver<(http::request::Parts, Bytes)>) { + if test_type == TestType::Telemetry { + init_telemetry( + Telemetry { + tags: Tags { + emit_service: true, + emit_source: true, + }, + }, + true, + ); + } + let config = indoc! {r#" default_api_key = "atoken" compression = "none" @@ -107,10 +127,12 @@ async fn start_test_detail( let (batch, receiver) = BatchNotifier::new_with_receiver(); let (expected, events) = random_lines_with_stream(100, 10, Some(batch)); - if is_error { - run_and_assert_sink_error(sink, events, &COMPONENT_ERROR_TAGS).await; - } else { - run_and_assert_sink_compliance(sink, events, &SINK_TAGS).await; + match test_type { + TestType::Happy => run_and_assert_sink_compliance(sink, events, &SINK_TAGS).await, + TestType::Error => run_and_assert_sink_error(sink, events, &COMPONENT_ERROR_TAGS).await, + TestType::Telemetry => { + run_and_assert_data_volume_sink_compliance(sink, events, &DATA_VOLUME_SINK_TAGS).await + } } assert_eq!(receiver.await, batch_status); @@ -122,14 +144,21 @@ async fn start_test_success( api_status: ApiStatus, batch_status: BatchStatus, ) -> (Vec, Receiver<(http::request::Parts, Bytes)>) { - start_test_detail(api_status, batch_status, false).await + start_test_detail(api_status, batch_status, TestType::Happy).await +} + +async fn start_test_telemetry( + api_status: ApiStatus, + batch_status: BatchStatus, +) -> (Vec, Receiver<(http::request::Parts, Bytes)>) { + start_test_detail(api_status, batch_status, TestType::Telemetry).await } async fn start_test_error( api_status: ApiStatus, batch_status: BatchStatus, ) -> (Vec, Receiver<(http::request::Parts, Bytes)>) { - start_test_detail(api_status, batch_status, true).await + start_test_detail(api_status, batch_status, TestType::Error).await } /// Assert the basic functionality of the sink in good conditions @@ -176,6 +205,13 @@ async fn smoke() { } } +/// Assert the sink emits source and service tags when run with telemetry configured. +#[tokio::test] +async fn telemetry() { + let (expected, rx) = start_test_telemetry(ApiStatus::OKv1, BatchStatus::Delivered).await; + let _ = rx.take(expected.len()).collect::>().await; +} + #[tokio::test] /// Assert delivery error behavior for v1 API /// diff --git a/src/sinks/datadog/metrics/config.rs b/src/sinks/datadog/metrics/config.rs index d7ef707e118f4..245192147eaad 100644 --- a/src/sinks/datadog/metrics/config.rs +++ b/src/sinks/datadog/metrics/config.rs @@ -60,6 +60,11 @@ impl DatadogMetricsEndpoint { DatadogMetricsEndpoint::Sketches => "application/x-protobuf", } } + + // Gets whether or not this is a series endpoint. + pub const fn is_series(self) -> bool { + matches!(self, Self::Series) + } } /// Maps Datadog metric endpoints to their actual URI. @@ -87,7 +92,7 @@ impl DatadogMetricsEndpointConfiguration { } /// Configuration for the `datadog_metrics` sink. -#[configurable_component(sink("datadog_metrics"))] +#[configurable_component(sink("datadog_metrics", "Publish metric events to Datadog."))] #[derive(Clone, Debug, Default)] #[serde(deny_unknown_fields)] pub struct DatadogMetricsConfig { @@ -119,6 +124,7 @@ pub struct DatadogMetricsConfig { impl_generate_config_from_default!(DatadogMetricsConfig); #[async_trait::async_trait] +#[typetag::serde(name = "datadog_metrics")] impl SinkConfig for DatadogMetricsConfig { async fn build(&self, cx: SinkContext) -> crate::Result<(VectorSink, Healthcheck)> { let client = self.build_client(&cx.proxy)?; diff --git a/src/sinks/datadog/metrics/encoder.rs b/src/sinks/datadog/metrics/encoder.rs index a2bd8330c5f35..0dd6c393e31b5 100644 --- a/src/sinks/datadog/metrics/encoder.rs +++ b/src/sinks/datadog/metrics/encoder.rs @@ -7,11 +7,13 @@ use std::{ use bytes::{BufMut, Bytes}; use chrono::{DateTime, Utc}; +use once_cell::sync::OnceCell; use prost::Message; use snafu::{ResultExt, Snafu}; use vector_core::{ config::{log_schema, LogSchema}, event::{metric::MetricSketch, Metric, MetricTags, MetricValue}, + metrics::AgentDDSketch, }; use super::config::{ @@ -19,7 +21,8 @@ use super::config::{ }; use crate::{ common::datadog::{DatadogMetricType, DatadogPoint, DatadogSeriesMetric}, - sinks::util::{encode_namespace, Compression, Compressor}, + proto::fds::protobuf_descriptors, + sinks::util::{encode_namespace, request_builder::EncodeResult, Compression, Compressor}, }; const SERIES_PAYLOAD_HEADER: &[u8] = b"{\"series\":["; @@ -37,6 +40,17 @@ pub enum CreateError { InvalidLimits, } +impl CreateError { + /// Gets the telemetry-friendly string version of this error. + /// + /// The value will be a short string with only lowercase letters and underscores. + pub const fn as_error_type(&self) -> &'static str { + match self { + Self::InvalidLimits => "invalid_payload_limits", + } + } +} + #[derive(Debug, Snafu)] pub enum EncoderError { #[snafu(display( @@ -49,11 +63,31 @@ pub enum EncoderError { metric_value: &'static str, }, - #[snafu(display("Failed to encode series metrics to JSON: {}", source))] + #[snafu( + context(false), + display("Failed to encode series metric to JSON: {source}") + )] JsonEncodingFailed { source: serde_json::Error }, - #[snafu(display("Failed to encode sketch metrics to Protocol Buffers: {}", source))] - ProtoEncodingFailed { source: prost::EncodeError }, + // Currently, the only time `prost` ever emits `EncodeError` is when there is insufficient + // buffer capacity, so we don't need to hold on to the error, and we can just hardcode this. + #[snafu(display( + "Failed to encode sketch metric to Protocol Buffers: insufficient buffer capacity." + ))] + ProtoEncodingFailed, +} + +impl EncoderError { + /// Gets the telemetry-friendly string version of this error. + /// + /// The value will be a short string with only lowercase letters and underscores. + pub const fn as_error_type(&self) -> &'static str { + match self { + Self::InvalidMetric { .. } => "invalid_metric", + Self::JsonEncodingFailed { .. } => "failed_to_encode_series", + Self::ProtoEncodingFailed => "failed_to_encode_sketch", + } + } } #[derive(Debug, Snafu)] @@ -64,9 +98,6 @@ pub enum FinishError { ))] CompressionFailed { source: io::Error }, - #[snafu(display("Failed to encode pending metrics: {}", source))] - PendingEncodeFailed { source: EncoderError }, - #[snafu(display("Finished payload exceeded the (un)compressed size limits"))] TooLarge { metrics: Vec, @@ -81,7 +112,6 @@ impl FinishError { pub const fn as_error_type(&self) -> &'static str { match self { Self::CompressionFailed { .. } => "compression_failed", - Self::PendingEncodeFailed { .. } => "pending_encode_failed", Self::TooLarge { .. } => "too_large", } } @@ -91,21 +121,15 @@ struct EncoderState { writer: Compressor, written: usize, buf: Vec, - - pending: Vec, processed: Vec, } impl Default for EncoderState { fn default() -> Self { - EncoderState { - // We use the "zlib default" compressor because it's all Datadog supports, and adding it - // generically to `Compression` would make things a little weird because of the - // conversion trait implementations that are also only none vs gzip. + Self { writer: get_compressor(), written: 0, buf: Vec::with_capacity(1024), - pending: Vec::new(), processed: Vec::new(), } } @@ -145,7 +169,7 @@ impl DatadogMetricsEncoder { compressed_limit: usize, ) -> Result { let (uncompressed_limit, compressed_limit) = - validate_payload_size_limits(uncompressed_limit, compressed_limit) + validate_payload_size_limits(endpoint, uncompressed_limit, compressed_limit) .ok_or(CreateError::InvalidLimits)?; Ok(Self { @@ -195,15 +219,23 @@ impl DatadogMetricsEncoder { { return Ok(Some(metric)); } - serde_json::to_writer(&mut self.state.buf, series) - .context(JsonEncodingFailedSnafu)?; + serde_json::to_writer(&mut self.state.buf, series)?; } } - // We can't encode sketches incrementally (yet), so we don't do any encoding here. We - // simply store it for later, and in `try_encode_pending`, any such pending metrics will be - // encoded in a single operation. + // Sketches are encoded via ProtoBuf, also in an incremental fashion. DatadogMetricsEndpoint::Sketches => match metric.value() { - MetricValue::Sketch { .. } => {} + MetricValue::Sketch { sketch } => match sketch { + MetricSketch::AgentDDSketch(ddsketch) => { + encode_sketch_incremental( + &metric, + ddsketch, + &self.default_namespace, + self.log_schema, + &mut self.state.buf, + ) + .map_err(|_| EncoderError::ProtoEncodingFailed)?; + } + }, value => { return Err(EncoderError::InvalidMetric { expected: "sketches", @@ -213,21 +245,14 @@ impl DatadogMetricsEncoder { }, } - // If we actually encoded a metric, we try to see if our temporary buffer can be compressed - // and added to the overall payload. Otherwise, it means we're deferring the metric for - // later encoding, so we store it off to the side. - if !self.state.buf.is_empty() { - match self.try_compress_buffer() { - Err(_) | Ok(false) => return Ok(Some(metric)), - Ok(true) => {} + // Try and see if our temporary buffer can be written to the compressor. + match self.try_compress_buffer() { + Err(_) | Ok(false) => Ok(Some(metric)), + Ok(true) => { + self.state.processed.push(metric); + Ok(None) } - - self.state.processed.push(metric); - } else { - self.state.pending.push(metric); } - - Ok(None) } fn try_compress_buffer(&mut self) -> io::Result { @@ -254,7 +279,8 @@ impl DatadogMetricsEncoder { // assume the worst case while our limits assume the worst case _overhead_. Maybe our // numbers are technically off in the end, but `finish` catches that for us, too. let compressed_len = self.state.writer.get_ref().len(); - if compressed_len + n > self.compressed_limit { + let max_compressed_metric_len = n + max_compressed_overhead_len(n); + if compressed_len + max_compressed_metric_len > self.compressed_limit { return Ok(false); } @@ -292,56 +318,7 @@ impl DatadogMetricsEncoder { self.encode_single_metric(metric) } - fn try_encode_pending(&mut self) -> Result<(), FinishError> { - // The Datadog Agent uses a particular Protocol Buffers library to incrementally encode the - // DDSketch structures into a payload, similar to how we incrementally encode the series - // metrics. Unfortunately, there's no existing Rust crate that allows writing out Protocol - // Buffers payloads by hand, so we have to cheat a little and buffer up the metrics until - // the very end. - // - // `try_encode`, and thus `encode_single_metric`, specifically store sketch-oriented metrics - // off to the side for this very purpose, letting us gather them all here, encoding them - // into a single Protocol Buffers payload. - // - // Naturally, this means we might actually generate a payload that's too big. This is a - // problem for the caller to figure out. Presently, the only usage of this encoder will - // naively attempt to split the batch into two and try again. - - // Only go through this if we're targeting the sketch endpoint. - if !(matches!(self.endpoint, DatadogMetricsEndpoint::Sketches)) { - return Ok(()); - } - - // Consume of all of the "pending" metrics and try to write them out as sketches. - let pending = mem::take(&mut self.state.pending); - write_sketches( - &pending, - &self.default_namespace, - self.log_schema, - &mut self.state.buf, - ) - .context(PendingEncodeFailedSnafu)?; - - if self.try_compress_buffer().context(CompressionFailedSnafu)? { - // Since we encoded and compressed them successfully, add them to the "processed" list. - self.state.processed.extend(pending); - Ok(()) - } else { - // The payload was too big overall, which we can't do anything about. Up to the caller - // now to try to encode them again after splitting the batch. - Err(FinishError::TooLarge { - metrics: pending, - // TODO: Hard-coded split code for now because we need to hoist up the logic for - // calculating the recommended splits to an instance method or something. - recommended_splits: 2, - }) - } - } - - pub fn finish(&mut self) -> Result<(Bytes, Vec, usize), FinishError> { - // Try to encode any pending metrics we had stored up. - self.try_encode_pending()?; - + pub fn finish(&mut self) -> Result<(EncodeResult, Vec), FinishError> { // Write any payload footer necessary for the configured endpoint. let n = write_payload_footer(self.endpoint, &mut self.state.writer) .context(CompressionFailedSnafu)?; @@ -371,7 +348,10 @@ impl DatadogMetricsEncoder { if recommended_splits == 1 { // "One" split means no splits needed: our payload didn't exceed either of the limits. - Ok((payload, processed, raw_bytes_written)) + Ok(( + EncodeResult::compressed(payload, raw_bytes_written), + processed, + )) } else { Err(FinishError::TooLarge { metrics: processed, @@ -381,6 +361,104 @@ impl DatadogMetricsEncoder { } } +fn get_sketch_payload_sketches_field_number() -> u32 { + static SKETCH_PAYLOAD_SKETCHES_FIELD_NUM: OnceCell = OnceCell::new(); + *SKETCH_PAYLOAD_SKETCHES_FIELD_NUM.get_or_init(|| { + let descriptors = protobuf_descriptors(); + let descriptor = descriptors + .get_message_by_name("datadog.agentpayload.SketchPayload") + .expect("should not fail to find `SketchPayload` message in descriptor pool"); + + descriptor + .get_field_by_name("sketches") + .map(|field| field.number()) + .expect("`sketches` field must exist in `SketchPayload` message") + }) +} + +fn sketch_to_proto_message( + metric: &Metric, + ddsketch: &AgentDDSketch, + default_namespace: &Option>, + log_schema: &'static LogSchema, +) -> ddmetric_proto::sketch_payload::Sketch { + let name = get_namespaced_name(metric, default_namespace); + let ts = encode_timestamp(metric.timestamp()); + let mut tags = metric.tags().cloned().unwrap_or_default(); + let host = tags.remove(log_schema.host_key()).unwrap_or_default(); + let tags = encode_tags(&tags); + + let cnt = ddsketch.count() as i64; + let min = ddsketch + .min() + .expect("min should be present for non-empty sketch"); + let max = ddsketch + .max() + .expect("max should be present for non-empty sketch"); + let avg = ddsketch + .avg() + .expect("avg should be present for non-empty sketch"); + let sum = ddsketch + .sum() + .expect("sum should be present for non-empty sketch"); + + let (bins, counts) = ddsketch.bin_map().into_parts(); + let k = bins.into_iter().map(Into::into).collect(); + let n = counts.into_iter().map(Into::into).collect(); + + ddmetric_proto::sketch_payload::Sketch { + metric: name, + tags, + host, + distributions: Vec::new(), + dogsketches: vec![ddmetric_proto::sketch_payload::sketch::Dogsketch { + ts, + cnt, + min, + max, + avg, + sum, + k, + n, + }], + } +} + +fn encode_sketch_incremental( + metric: &Metric, + ddsketch: &AgentDDSketch, + default_namespace: &Option>, + log_schema: &'static LogSchema, + buf: &mut B, +) -> Result<(), prost::EncodeError> +where + B: BufMut, +{ + // This encodes a single sketch metric incrementally, which means that we specifically write it + // as if we were writing a single field entry in the overall `SketchPayload` message + // type. + // + // By doing so, we can encode multiple sketches and concatenate all the buffers, and have the + // resulting buffer appear as if it's a normal `SketchPayload` message with a bunch of repeats + // of the `sketches` field. + // + // Crucially, this code works because `SketchPayload` has two fields -- metadata and sketches -- + // and we never actually set the metadata field... so the resulting message generated overall + // for `SketchPayload` with a single sketch looks just like as if we literally wrote out a + // single value for the given field. + + let sketch_proto = sketch_to_proto_message(metric, ddsketch, default_namespace, log_schema); + + // Manually write the field tag for `sketches` and then encode the sketch payload directly as a + // length-delimited message. + prost::encoding::encode_key( + get_sketch_payload_sketches_field_number(), + prost::encoding::WireType::LengthDelimited, + buf, + ); + sketch_proto.encode_length_delimited(buf) +} + fn get_namespaced_name(metric: &Metric, default_namespace: &Option>) -> String { encode_namespace( metric @@ -481,89 +559,10 @@ fn generate_series_metrics( Ok(results) } -fn write_sketches( - metrics: &[Metric], - default_namespace: &Option>, - log_schema: &'static LogSchema, - buf: &mut B, -) -> Result<(), EncoderError> -where - B: BufMut, -{ - let mut sketches = Vec::new(); - for metric in metrics { - match metric.value() { - MetricValue::Sketch { sketch } => match sketch { - MetricSketch::AgentDDSketch(ddsketch) => { - // Don't encode any empty sketches. - if ddsketch.is_empty() { - continue; - } - - let name = get_namespaced_name(metric, default_namespace); - let ts = encode_timestamp(metric.timestamp()); - let mut tags = metric.tags().cloned().unwrap_or_default(); - let host = tags.remove(log_schema.host_key()).unwrap_or_default(); - let tags = encode_tags(&tags); - - let cnt = ddsketch.count() as i64; - let min = ddsketch - .min() - .expect("min should be present for non-empty sketch"); - let max = ddsketch - .max() - .expect("max should be present for non-empty sketch"); - let avg = ddsketch - .avg() - .expect("avg should be present for non-empty sketch"); - let sum = ddsketch - .sum() - .expect("sum should be present for non-empty sketch"); - - let (bins, counts) = ddsketch.bin_map().into_parts(); - let k = bins.into_iter().map(Into::into).collect(); - let n = counts.into_iter().map(Into::into).collect(); - - let sketch = ddmetric_proto::sketch_payload::Sketch { - metric: name, - tags, - host, - distributions: Vec::new(), - dogsketches: vec![ddmetric_proto::sketch_payload::sketch::Dogsketch { - ts, - cnt, - min, - max, - avg, - sum, - k, - n, - }], - }; - - sketches.push(sketch); - } - }, - // We filter out non-sketch metrics during `encode_single_metric` if we're targeting - // the sketches endpoint. - _ => unreachable!(), - } - } - - let sketch_payload = ddmetric_proto::SketchPayload { - // TODO: The "common metadata" fields are things that only very loosely apply to Vector, or - // are hard to characterize -- for example, what's the API key for a sketch that didn't originate - // from the Datadog Agent? -- so we're just omitting it here in the hopes it doesn't - // actually matter. - metadata: None, - sketches, - }; - - // Now try encoding this sketch payload, and then try to compress it. - sketch_payload.encode(buf).context(ProtoEncodingFailedSnafu) -} - fn get_compressor() -> Compressor { + // We use the "zlib default" compressor because it's all Datadog supports, and adding it + // generically to `Compression` would make things a little weird because of the conversion trait + // implementations that are also only none vs gzip. Compression::zlib_default().into() } @@ -571,39 +570,52 @@ const fn max_uncompressed_header_len() -> usize { SERIES_PAYLOAD_HEADER.len() + SERIES_PAYLOAD_FOOTER.len() } +// Datadog ingest APIs accept zlib, which is what we're accounting for here. By default, zlib +// has a 2 byte header and 4 byte CRC trailer. [1] +// +// [1] https://www.zlib.net/zlib_tech.html +const ZLIB_HEADER_TRAILER: usize = 6; + const fn max_compression_overhead_len(compressed_limit: usize) -> usize { - // Datadog ingest APIs accept zlib, which is what we're accounting for here. By default, zlib - // has a 2 byte header and 4 byte CRC trailer. Additionally, Deflate, the underlying - // compression algorithm, has a technique to ensure that input data can't be encoded in such a - // way where it's expanded by a meaningful amount. + // We calculate the overhead as the zlib header/trailer plus the worst case overhead of + // compressing `compressed_limit` bytes, such that we assume all of the data we write may not be + // compressed at all. + ZLIB_HEADER_TRAILER + max_compressed_overhead_len(compressed_limit) +} + +const fn max_compressed_overhead_len(len: usize) -> usize { + // Datadog ingest APIs accept zlib, which is what we're accounting for here. // - // This technique allows storing blocks of uncompressed data with only 5 bytes of overhead per - // block. Technically, the blocks can be up to 65KB in Deflate, but modern zlib implementations - // use block sizes of 16KB. [1][2] + // Deflate, the underlying compression algorithm, has a technique to ensure that input data + // can't be encoded in such a way where it's expanded by a meaningful amount. This technique + // allows storing blocks of uncompressed data with only 5 bytes of overhead per block. + // Technically, the blocks can be up to 65KB in Deflate, but modern zlib implementations use + // block sizes of 16KB. [1][2] // - // With all of that said, we calculate the overhead as the header plus trailer plus the given - // compressed size limit, minus the known overhead, multiplied such that it accounts for the - // worse case of entirely uncompressed data. + // We calculate the overhead of compressing a given `len` bytes as the worst case of that many + // bytes being written to the compressor and being unable to be compressed at all // // [1] https://www.zlib.net/zlib_tech.html // [2] https://www.bolet.org/~pornin/deflate-flush-fr.html - const HEADER_TRAILER: usize = 6; const STORED_BLOCK_SIZE: usize = 16384; - HEADER_TRAILER + (1 + compressed_limit.saturating_sub(HEADER_TRAILER) / STORED_BLOCK_SIZE) * 5 + (1 + len.saturating_sub(ZLIB_HEADER_TRAILER) / STORED_BLOCK_SIZE) * 5 } const fn validate_payload_size_limits( + endpoint: DatadogMetricsEndpoint, uncompressed_limit: usize, compressed_limit: usize, ) -> Option<(usize, usize)> { - // Get the maximum possible length of the header/footer combined. - // - // This only matters for series metrics at the moment, since sketches are encoded in a single - // shot to their Protocol Buffers representation. We're "wasting" `header_len` bytes in the - // case of sketches, but we're also talking about like 10 bytes: not enough to care about. - let header_len = max_uncompressed_header_len(); - if uncompressed_limit <= header_len { - return None; + if endpoint.is_series() { + // For series, we need to make sure the uncompressed limit can account for the header/footer + // we would add that wraps the encoded metrics up in the expected JSON object. This does + // imply that adding 1 to this limit would be allowed, and obviously we can't encode a + // series metric in a single byte, but this is just a simple sanity check, not an exhaustive + // search of the absolute bare minimum size. + let header_len = max_uncompressed_header_len(); + if uncompressed_limit <= header_len { + return None; + } } // Get the maximum possible overhead of the compression container, based on the incoming @@ -659,6 +671,7 @@ mod tests { use std::{ io::{self, copy}, num::NonZeroU32, + sync::Arc, }; use bytes::{BufMut, Bytes, BytesMut}; @@ -668,16 +681,21 @@ mod tests { arbitrary::any, collection::btree_map, num::f64::POSITIVE as ARB_POSITIVE_F64, prop_assert, proptest, strategy::Strategy, string::string_regex, }; + use prost::Message; use vector_core::{ - config::log_schema, - event::{metric::TagValue, Metric, MetricKind, MetricTags, MetricValue}, + config::{log_schema, LogSchema}, + event::{ + metric::{MetricSketch, TagValue}, + Metric, MetricKind, MetricTags, MetricValue, + }, metric_tags, metrics::AgentDDSketch, }; use super::{ - encode_tags, encode_timestamp, generate_series_metrics, get_compressor, - max_compression_overhead_len, max_uncompressed_header_len, validate_payload_size_limits, + ddmetric_proto, encode_sketch_incremental, encode_tags, encode_timestamp, + generate_series_metrics, get_compressor, max_compression_overhead_len, + max_uncompressed_header_len, sketch_to_proto_message, validate_payload_size_limits, write_payload_footer, write_payload_header, DatadogMetricsEncoder, EncoderError, }; use crate::{ @@ -714,6 +732,10 @@ mod tests { compressor.finish().expect("should not fail").freeze() } + fn get_compressed_empty_sketches_payload() -> Bytes { + get_compressor().finish().expect("should not fail").freeze() + } + fn decompress_payload(payload: Bytes) -> io::Result { let mut decompressor = ZlibDecoder::new(&payload[..]); let mut decompressed = BytesMut::new().writer(); @@ -738,6 +760,41 @@ mod tests { } } + fn encode_sketches_normal( + metrics: &[Metric], + default_namespace: &Option>, + log_schema: &'static LogSchema, + buf: &mut B, + ) where + B: BufMut, + { + let mut sketches = Vec::new(); + for metric in metrics { + let MetricValue::Sketch { sketch } = metric.value() else { panic!("must be sketch") }; + match sketch { + MetricSketch::AgentDDSketch(ddsketch) => { + // Don't encode any empty sketches. + if ddsketch.is_empty() { + continue; + } + + let sketch = + sketch_to_proto_message(metric, ddsketch, default_namespace, log_schema); + + sketches.push(sketch); + } + } + } + + let sketch_payload = ddmetric_proto::SketchPayload { + metadata: None, + sketches, + }; + + // Now try encoding this sketch payload, and then try to compress it. + sketch_payload.encode(buf).unwrap() + } + #[test] fn test_encode_tags() { assert_eq!( @@ -825,16 +882,9 @@ mod tests { let result = encoder.finish(); assert!(result.is_ok()); - let (payload, mut processed, raw_bytes) = result.unwrap(); + let (_payload, mut processed) = result.unwrap(); assert_eq!(processed.len(), 1); assert_eq!(expected, processed.pop().unwrap()); - assert_eq!(100, payload.len()); - - // The payload is: - // {"series":[{"metric":"basic_counter","type":"count","interval":null,"points":[[1651664333,3.14]],"tags":[]}]} - // which comes to a total of 98 bytes. - // There are extra bytes that make up the header and footer. These should not be included in the raw bytes. - assert_eq!(109, raw_bytes); } #[test] @@ -855,25 +905,60 @@ mod tests { let result = encoder.finish(); assert!(result.is_ok()); - let (payload, mut processed, raw_bytes) = result.unwrap(); + let (_payload, mut processed) = result.unwrap(); assert_eq!(processed.len(), 1); assert_eq!(expected, processed.pop().unwrap()); + } - assert_eq!(81, payload.len()); - assert_eq!(70, raw_bytes); + #[test] + fn encode_multiple_sketch_metrics_normal_vs_incremental() { + // This tests our incremental sketch encoding against the more straightforward approach of + // just building/encoding a full `SketchPayload` message. + let metrics = vec![ + get_simple_sketch(), + get_simple_sketch(), + get_simple_sketch(), + ]; + + let mut normal_buf = Vec::new(); + encode_sketches_normal(&metrics, &None, log_schema(), &mut normal_buf); + + let mut incremental_buf = Vec::new(); + for metric in &metrics { + match metric.value() { + MetricValue::Sketch { sketch } => match sketch { + MetricSketch::AgentDDSketch(ddsketch) => encode_sketch_incremental( + metric, + ddsketch, + &None, + log_schema(), + &mut incremental_buf, + ) + .unwrap(), + }, + _ => panic!("should be a sketch"), + } + } + + assert_eq!(normal_buf, incremental_buf); } #[test] - fn payload_size_limits() { + fn payload_size_limits_series() { // Get the maximum length of the header/trailer data. let header_len = max_uncompressed_header_len(); // This is too small. - let result = validate_payload_size_limits(header_len, usize::MAX); + let result = + validate_payload_size_limits(DatadogMetricsEndpoint::Series, header_len, usize::MAX); assert_eq!(result, None); // This is just right. - let result = validate_payload_size_limits(header_len + 1, usize::MAX); + let result = validate_payload_size_limits( + DatadogMetricsEndpoint::Series, + header_len + 1, + usize::MAX, + ); assert_eq!(result, Some((header_len + 1, usize::MAX))); // Get the maximum compressed overhead length, based on our input uncompressed size. This @@ -882,16 +967,52 @@ mod tests { let compression_overhead_len = max_compression_overhead_len(usize::MAX); // This is too small. - let result = validate_payload_size_limits(usize::MAX, compression_overhead_len); + let result = validate_payload_size_limits( + DatadogMetricsEndpoint::Series, + usize::MAX, + compression_overhead_len, + ); + assert_eq!(result, None); + + // This is just right. + let result = validate_payload_size_limits( + DatadogMetricsEndpoint::Series, + usize::MAX, + compression_overhead_len + 1, + ); + assert_eq!(result, Some((usize::MAX, compression_overhead_len + 1))); + } + + #[test] + fn payload_size_limits_sketches() { + // There's no lower bound on uncompressed size for the sketches payload. + let result = validate_payload_size_limits(DatadogMetricsEndpoint::Sketches, 0, usize::MAX); + assert_eq!(result, Some((0, usize::MAX))); + + // Get the maximum compressed overhead length, based on our input uncompressed size. This + // represents the worst case overhead based on the input data (of length usize::MAX, in this + // case) being entirely incompressible. + let compression_overhead_len = max_compression_overhead_len(usize::MAX); + + // This is too small. + let result = validate_payload_size_limits( + DatadogMetricsEndpoint::Sketches, + usize::MAX, + compression_overhead_len, + ); assert_eq!(result, None); // This is just right. - let result = validate_payload_size_limits(usize::MAX, compression_overhead_len + 1); + let result = validate_payload_size_limits( + DatadogMetricsEndpoint::Sketches, + usize::MAX, + compression_overhead_len + 1, + ); assert_eq!(result, Some((usize::MAX, compression_overhead_len + 1))); } #[test] - fn encode_breaks_out_when_limit_reached_uncompressed() { + fn encode_series_breaks_out_when_limit_reached_uncompressed() { // We manually create the encoder with an arbitrarily low "uncompressed" limit but high // "compressed" limit to exercise the codepath that should avoid encoding a metric when the // uncompressed payload would exceed the limit. @@ -905,7 +1026,8 @@ mod tests { .expect("payload size limits should be valid"); // Trying to encode a metric that would cause us to exceed our uncompressed limits will - // _not_ return an error from `try_encode`. + // _not_ return an error from `try_encode`, but instead will simply return back the metric + // as it could not be added. let counter = get_simple_counter(); let result = encoder.try_encode(counter.clone()); assert!(result.is_ok()); @@ -917,17 +1039,55 @@ mod tests { let result = encoder.finish(); assert!(result.is_ok()); - let (payload, processed, raw_bytes) = result.unwrap(); - let empty_payload = get_compressed_empty_series_payload(); - assert_eq!(payload, empty_payload); + let (payload, processed) = result.unwrap(); + assert_eq!( + payload.uncompressed_byte_size, + max_uncompressed_header_len() + ); + assert_eq!( + payload.into_payload(), + get_compressed_empty_series_payload() + ); assert_eq!(processed.len(), 0); + } + + #[test] + fn encode_sketches_breaks_out_when_limit_reached_uncompressed() { + // We manually create the encoder with an arbitrarily low "uncompressed" limit but high + // "compressed" limit to exercise the codepath that should avoid encoding a metric when the + // uncompressed payload would exceed the limit. + let mut encoder = DatadogMetricsEncoder::with_payload_limits( + DatadogMetricsEndpoint::Sketches, + None, + 1, + usize::MAX, + ) + .expect("payload size limits should be valid"); + + // Trying to encode a metric that would cause us to exceed our uncompressed limits will + // _not_ return an error from `try_encode`, but instead will simply return back the metric + // as it could not be added. + let sketch = get_simple_sketch(); + let result = encoder.try_encode(sketch.clone()); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), Some(sketch)); - // Just the header and footer. - assert_eq!(13, raw_bytes); + // And similarly, since we didn't actually encode a metric, we _should_ be able to finish + // this payload, but it will be empty and no processed metrics should be returned. + let result = encoder.finish(); + assert!(result.is_ok()); + + let (payload, processed) = result.unwrap(); + assert_eq!(payload.uncompressed_byte_size, 0); + assert_eq!( + payload.into_payload(), + get_compressed_empty_sketches_payload() + ); + assert_eq!(processed.len(), 0); } #[test] - fn encode_breaks_out_when_limit_reached_compressed() { + fn encode_series_breaks_out_when_limit_reached_compressed() { // We manually create the encoder with an arbitrarily low "compressed" limit but high // "uncompressed" limit to exercise the codepath that should avoid encoding a metric when the // compressed payload would exceed the limit. @@ -942,7 +1102,8 @@ mod tests { .expect("payload size limits should be valid"); // Trying to encode a metric that would cause us to exceed our compressed limits will - // _not_ return an error from `try_encode`. + // _not_ return an error from `try_encode`, but instead will simply return back the metric + // as it could not be added. let counter = get_simple_counter(); let result = encoder.try_encode(counter.clone()); assert!(result.is_ok()); @@ -954,13 +1115,54 @@ mod tests { let result = encoder.finish(); assert!(result.is_ok()); - let (payload, processed, raw_bytes) = result.unwrap(); - let empty_payload = get_compressed_empty_series_payload(); - assert_eq!(payload, empty_payload); + let (payload, processed) = result.unwrap(); + assert_eq!( + payload.uncompressed_byte_size, + max_uncompressed_header_len() + ); + assert_eq!( + payload.into_payload(), + get_compressed_empty_series_payload() + ); assert_eq!(processed.len(), 0); + } - // Just the header and footer. - assert_eq!(13, raw_bytes); + #[test] + fn encode_sketches_breaks_out_when_limit_reached_compressed() { + // We manually create the encoder with an arbitrarily low "compressed" limit but high + // "uncompressed" limit to exercise the codepath that should avoid encoding a metric when the + // compressed payload would exceed the limit. + let uncompressed_limit = 128; + let compressed_limit = 16; + let mut encoder = DatadogMetricsEncoder::with_payload_limits( + DatadogMetricsEndpoint::Sketches, + None, + uncompressed_limit, + compressed_limit, + ) + .expect("payload size limits should be valid"); + + // Trying to encode a metric that would cause us to exceed our compressed limits will + // _not_ return an error from `try_encode`, but instead will simply return back the metric + // as it could not be added. + let sketch = get_simple_sketch(); + let result = encoder.try_encode(sketch.clone()); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), Some(sketch)); + + // And similarly, since we didn't actually encode a metric, we _should_ be able to finish + // this payload, but it will be empty (effectively, the header/footer will exist) and no + // processed metrics should be returned. + let result = encoder.finish(); + assert!(result.is_ok()); + + let (payload, processed) = result.unwrap(); + assert_eq!(payload.uncompressed_byte_size, 0); + assert_eq!( + payload.into_payload(), + get_compressed_empty_sketches_payload() + ); + assert_eq!(processed.len(), 0); } fn arb_counter_metric() -> impl Strategy { @@ -1003,7 +1205,8 @@ mod tests { if let Ok(mut encoder) = result { _ = encoder.try_encode(metric); - if let Ok((payload, _processed, _raw_bytes)) = encoder.finish() { + if let Ok((payload, _processed)) = encoder.finish() { + let payload = payload.into_payload(); prop_assert!(payload.len() <= compressed_limit); let result = decompress_payload(payload); diff --git a/src/sinks/datadog/metrics/request_builder.rs b/src/sinks/datadog/metrics/request_builder.rs index c6b287b39ccc1..d217986d6f520 100644 --- a/src/sinks/datadog/metrics/request_builder.rs +++ b/src/sinks/datadog/metrics/request_builder.rs @@ -1,7 +1,6 @@ use bytes::Bytes; -use serde_json::error::Category; use snafu::Snafu; -use std::{num::NonZeroUsize, sync::Arc}; +use std::sync::Arc; use vector_common::request_metadata::RequestMetadata; use vector_core::event::{EventFinalizers, Finalizable, Metric}; @@ -14,19 +13,19 @@ use crate::sinks::util::{metadata::RequestMetadataBuilder, IncrementalRequestBui #[derive(Debug, Snafu)] pub enum RequestBuilderError { - #[snafu(display("Failed to build the request builder: {}", error_type))] - FailedToBuild { error_type: &'static str }, + #[snafu( + context(false), + display("Failed to build the request builder: {source}") + )] + FailedToBuild { source: CreateError }, - #[snafu(display("Encoding of a metric failed ({})", reason))] - FailedToEncode { - reason: &'static str, - dropped_events: u64, - }, + #[snafu(context(false), display("Failed to encode metric: {source}"))] + FailedToEncode { source: EncoderError }, - #[snafu(display("A split payload was still too big to encode/compress within size limits"))] + #[snafu(display("A split payload was still too big to encode/compress within size limits."))] FailedToSplit { dropped_events: u64 }, - #[snafu(display("An unexpected error occurred"))] + #[snafu(display("An unexpected error occurred: {error_type}"))] Unexpected { error_type: &'static str, dropped_events: u64, @@ -34,78 +33,28 @@ pub enum RequestBuilderError { } impl RequestBuilderError { - /// Converts this error into its constituent parts: the error reason, and how many events were - /// dropped as a result. - pub const fn into_parts(self) -> (&'static str, &'static str, u64) { + /// Converts this error into its constituent parts: the error reason, the error type, and how + /// many events were dropped as a result. + pub fn into_parts(self) -> (String, &'static str, u64) { match self { - Self::FailedToBuild { error_type } => { - ("Failed to build the request builder.", error_type, 0) - } - Self::FailedToEncode { - reason, - dropped_events, - } => ("Encoding of a metric failed.", reason, dropped_events), + Self::FailedToBuild { source } => (source.to_string(), source.as_error_type(), 0), + // Encoding errors always happen at the per-metric level, so we could only ever drop a + // single metric/event at a time. + Self::FailedToEncode { source } => (source.to_string(), source.as_error_type(), 1), Self::FailedToSplit { dropped_events } => ( - "A split payload was still too big to encode/compress withing size limits.", + "A split payload was still too big to encode/compress withing size limits." + .to_string(), "split_failed", dropped_events, ), Self::Unexpected { error_type, dropped_events, - } => ("An unexpected error occurred.", error_type, dropped_events), - } - } -} - -impl From for RequestBuilderError { - fn from(e: CreateError) -> Self { - match e { - CreateError::InvalidLimits => Self::FailedToBuild { - error_type: "invalid_payload_limits", - }, - } - } -} - -impl From for RequestBuilderError { - fn from(e: EncoderError) -> Self { - match e { - // Series metrics (JSON) are encoded incrementally, so we can only ever lose a single - // metric for a JSON encoding failure. - EncoderError::JsonEncodingFailed { source } => Self::FailedToEncode { - reason: match source.classify() { - Category::Io => "json_io", - Category::Syntax => "json_syntax", - Category::Data => "json_data", - Category::Eof => "json_eof", - }, - dropped_events: 1, - }, - // Sketch metrics (Protocol Buffers) are encoded in a single shot, so naturally we would - // expect `dropped_events` to be 1-N, instead of always 1. We should never emit this - // metric when calling `try_encode`, which is where we'd see the JSON variant of it. - // This is because sketch encoding happens at the end. - // - // Thus, we default `dropped_events` to 1, and if we actually hit this error when - // finishing up a payload, we'll fix up the true number of dropped events at that point. - EncoderError::ProtoEncodingFailed { .. } => Self::FailedToEncode { - // `prost` states that for an encoding error specifically, it can only ever fail due - // to insufficient capacity in the encoding buffer. - reason: "protobuf_insufficient_buf_capacity", - dropped_events: 1, - }, - // Not all metric types for valid depending on the configured endpoint of the encoder. - EncoderError::InvalidMetric { metric_value, .. } => Self::FailedToEncode { - // TODO: At some point, it would be nice to use `const_format` to build the reason - // as " _via_" to better understand in what context - // metric X is being considered as invalid. Practically it's not a huge issue, - // because the number of metric types are fixed and we should be able to inspect the - // code for issues, or if it became a big problem, we could just go ahead and do the - // `const_format` work... but it'd be nice to be ahead of curve when trivially possible. - reason: metric_value, - dropped_events: 1, - }, + } => ( + "An unexpected error occurred.".to_string(), + error_type, + dropped_events, + ), } } } @@ -115,7 +64,6 @@ pub struct DDMetricsMetadata { api_key: Option>, endpoint: DatadogMetricsEndpoint, finalizers: EventFinalizers, - raw_bytes: usize, } /// Incremental request builder specific to Datadog metrics. @@ -208,23 +156,21 @@ impl IncrementalRequestBuilder<((Option>, DatadogMetricsEndpoint), Vec< // If we encoded one or more metrics this pass, finalize the payload. if n > 0 { match encoder.finish() { - Ok((payload, mut metrics, raw_bytes_written)) => { + Ok((encode_result, mut metrics)) => { let finalizers = metrics.take_finalizers(); let metadata = DDMetricsMetadata { api_key: api_key.as_ref().map(Arc::clone), endpoint, finalizers, - raw_bytes: raw_bytes_written, }; - let builder = RequestMetadataBuilder::new( - metrics.len(), - raw_bytes_written, - raw_bytes_written, - ); - let bytes_len = NonZeroUsize::new(payload.len()) - .expect("payload should never be zero length"); - let request_metadata = builder.with_request_size(bytes_len); - results.push(Ok(((metadata, request_metadata), payload))); + + let request_metadata = + RequestMetadataBuilder::from_events(&metrics).build(&encode_result); + + results.push(Ok(( + (metadata, request_metadata), + encode_result.into_payload(), + ))); } Err(err) => match err { // The encoder informed us that the resulting payload was too big, so we're @@ -295,7 +241,6 @@ impl IncrementalRequestBuilder<((Option>, DatadogMetricsEndpoint), Vec< uri, content_type: ddmetrics_metadata.endpoint.content_type(), finalizers: ddmetrics_metadata.finalizers, - raw_bytes: ddmetrics_metadata.raw_bytes, metadata: request_metadata, } } @@ -328,21 +273,21 @@ fn encode_now_or_never( encoder .finish() - .map(|(payload, mut processed, raw_bytes_written)| { + .map(|(encode_result, mut processed)| { let finalizers = processed.take_finalizers(); let ddmetrics_metadata = DDMetricsMetadata { api_key, endpoint, finalizers, - raw_bytes: raw_bytes_written, }; - let builder = - RequestMetadataBuilder::new(metrics_len, raw_bytes_written, raw_bytes_written); - let bytes_len = - NonZeroUsize::new(payload.len()).expect("payload should never be zero length"); - let request_metadata = builder.with_request_size(bytes_len); - ((ddmetrics_metadata, request_metadata), payload) + let request_metadata = + RequestMetadataBuilder::from_events(&processed).build(&encode_result); + + ( + (ddmetrics_metadata, request_metadata), + encode_result.into_payload(), + ) }) .map_err(|_| RequestBuilderError::FailedToSplit { dropped_events: metrics_len as u64, diff --git a/src/sinks/datadog/metrics/service.rs b/src/sinks/datadog/metrics/service.rs index 267423260e35e..3e33c3c0fbb97 100644 --- a/src/sinks/datadog/metrics/service.rs +++ b/src/sinks/datadog/metrics/service.rs @@ -10,10 +10,9 @@ use http::{ use hyper::Body; use snafu::ResultExt; use tower::Service; -use vector_common::request_metadata::{MetaDescriptive, RequestMetadata}; +use vector_common::request_metadata::{GroupedCountByteSize, MetaDescriptive, RequestMetadata}; use vector_core::{ event::{EventFinalizers, EventStatus, Finalizable}, - internal_event::CountByteSize, stream::DriverResponse, }; use vrl::value::Value; @@ -63,7 +62,6 @@ pub struct DatadogMetricsRequest { pub uri: Uri, pub content_type: &'static str, pub finalizers: EventFinalizers, - pub raw_bytes: usize, pub metadata: RequestMetadata, } @@ -114,8 +112,12 @@ impl Finalizable for DatadogMetricsRequest { } impl MetaDescriptive for DatadogMetricsRequest { - fn get_metadata(&self) -> RequestMetadata { - self.metadata + fn get_metadata(&self) -> &RequestMetadata { + &self.metadata + } + + fn metadata_mut(&mut self) -> &mut RequestMetadata { + &mut self.metadata } } @@ -124,9 +126,7 @@ impl MetaDescriptive for DatadogMetricsRequest { pub struct DatadogMetricsResponse { status_code: StatusCode, body: Bytes, - batch_size: usize, - byte_size: usize, - raw_byte_size: usize, + request_metadata: RequestMetadata, } impl DriverResponse for DatadogMetricsResponse { @@ -140,12 +140,13 @@ impl DriverResponse for DatadogMetricsResponse { } } - fn events_sent(&self) -> CountByteSize { - CountByteSize(self.batch_size, self.byte_size) + fn events_sent(&self) -> &GroupedCountByteSize { + self.request_metadata + .events_estimated_json_encoded_byte_size() } fn bytes_sent(&self) -> Option { - Some(self.raw_byte_size) + Some(self.request_metadata.request_wire_size()) } } @@ -195,14 +196,12 @@ impl Service for DatadogMetricsService { } // Emission of Error internal event is handled upstream by the caller - fn call(&mut self, request: DatadogMetricsRequest) -> Self::Future { + fn call(&mut self, mut request: DatadogMetricsRequest) -> Self::Future { let client = self.client.clone(); let api_key = self.api_key.clone(); Box::pin(async move { - let byte_size = request.get_metadata().events_byte_size(); - let batch_size = request.get_metadata().event_count(); - let raw_byte_size = request.raw_bytes; + let request_metadata = std::mem::take(request.metadata_mut()); let request = request .into_http_request(api_key) @@ -221,9 +220,7 @@ impl Service for DatadogMetricsService { Ok(DatadogMetricsResponse { status_code: parts.status, body, - batch_size, - byte_size, - raw_byte_size, + request_metadata, }) }) } diff --git a/src/sinks/datadog/metrics/sink.rs b/src/sinks/datadog/metrics/sink.rs index a85eaaf7a3a11..5ceefc3c487d2 100644 --- a/src/sinks/datadog/metrics/sink.rs +++ b/src/sinks/datadog/metrics/sink.rs @@ -123,9 +123,9 @@ where .filter_map(|request| async move { match request { Err(e) => { - let (error_message, error_code, dropped_events) = e.into_parts(); + let (reason, error_code, dropped_events) = e.into_parts(); emit!(DatadogMetricsEncodingError { - error_message, + reason: reason.as_str(), error_code, dropped_events: dropped_events as usize, }); diff --git a/src/sinks/datadog/traces/config.rs b/src/sinks/datadog/traces/config.rs index 533b874a539da..bb8f4d183d63d 100644 --- a/src/sinks/datadog/traces/config.rs +++ b/src/sinks/datadog/traces/config.rs @@ -54,7 +54,7 @@ impl SinkBatchSettings for DatadogTracesDefaultBatchSettings { } /// Configuration for the `datadog_traces` sink. -#[configurable_component(sink("datadog_traces"))] +#[configurable_component(sink("datadog_traces", "Publish trace events to Datadog."))] #[derive(Clone, Debug, Default)] #[serde(deny_unknown_fields)] pub struct DatadogTracesConfig { @@ -211,6 +211,7 @@ impl DatadogTracesConfig { } #[async_trait::async_trait] +#[typetag::serde(name = "datadog_traces")] impl SinkConfig for DatadogTracesConfig { async fn build(&self, cx: SinkContext) -> crate::Result<(VectorSink, Healthcheck)> { let client = self.build_client(&cx.proxy)?; diff --git a/src/sinks/datadog/traces/request_builder.rs b/src/sinks/datadog/traces/request_builder.rs index 80613314caef7..dbe57714995fc 100644 --- a/src/sinks/datadog/traces/request_builder.rs +++ b/src/sinks/datadog/traces/request_builder.rs @@ -9,7 +9,10 @@ use bytes::Bytes; use prost::Message; use snafu::Snafu; use vector_common::request_metadata::RequestMetadata; -use vector_core::event::{EventFinalizers, Finalizable}; +use vector_core::{ + event::{EventFinalizers, Finalizable}, + EstimatedJsonEncodedSizeOf, +}; use super::{ apm_stats::{compute_apm_stats, Aggregator}, @@ -122,6 +125,7 @@ impl IncrementalRequestBuilder<(PartitionKey, Vec)> for DatadogTracesRequ .for_each(|r| match r { Ok((payload, mut processed)) => { let uncompressed_size = payload.len(); + let json_size = processed.estimated_json_encoded_size_of(); let metadata = DDTracesMetadata { api_key: key .api_key @@ -139,11 +143,8 @@ impl IncrementalRequestBuilder<(PartitionKey, Vec)> for DatadogTracesRequ let bytes = compressor.into_inner().freeze(); // build RequestMetadata - let builder = RequestMetadataBuilder::new( - n, - uncompressed_size, - uncompressed_size, - ); + let builder = + RequestMetadataBuilder::new(n, uncompressed_size, json_size); let bytes_len = NonZeroUsize::new(bytes.len()) .expect("payload should never be zero length"); let request_metadata = builder.with_request_size(bytes_len); diff --git a/src/sinks/datadog/traces/service.rs b/src/sinks/datadog/traces/service.rs index 509c909a87d4d..5128d855edb0f 100644 --- a/src/sinks/datadog/traces/service.rs +++ b/src/sinks/datadog/traces/service.rs @@ -9,10 +9,9 @@ use http::{Request, StatusCode, Uri}; use hyper::Body; use snafu::ResultExt; use tower::Service; -use vector_common::request_metadata::{MetaDescriptive, RequestMetadata}; +use vector_common::request_metadata::{GroupedCountByteSize, MetaDescriptive, RequestMetadata}; use vector_core::{ event::{EventFinalizers, EventStatus, Finalizable}, - internal_event::CountByteSize, stream::DriverResponse, }; @@ -81,8 +80,12 @@ impl Finalizable for TraceApiRequest { } impl MetaDescriptive for TraceApiRequest { - fn get_metadata(&self) -> RequestMetadata { - self.metadata + fn get_metadata(&self) -> &RequestMetadata { + &self.metadata + } + + fn metadata_mut(&mut self) -> &mut RequestMetadata { + &mut self.metadata } } @@ -90,8 +93,7 @@ impl MetaDescriptive for TraceApiRequest { pub struct TraceApiResponse { status_code: StatusCode, body: Bytes, - batch_size: usize, - byte_size: usize, + byte_size: GroupedCountByteSize, uncompressed_size: usize, } @@ -106,8 +108,8 @@ impl DriverResponse for TraceApiResponse { } } - fn events_sent(&self) -> CountByteSize { - CountByteSize(self.batch_size, self.byte_size) + fn events_sent(&self) -> &GroupedCountByteSize { + &self.byte_size } fn bytes_sent(&self) -> Option { @@ -142,12 +144,12 @@ impl Service for TraceApiService { } // Emission of Error internal event is handled upstream by the caller - fn call(&mut self, request: TraceApiRequest) -> Self::Future { + fn call(&mut self, mut request: TraceApiRequest) -> Self::Future { let client = self.client.clone(); Box::pin(async move { - let byte_size = request.get_metadata().events_byte_size(); - let batch_size = request.get_metadata().event_count(); + let metadata = std::mem::take(request.metadata_mut()); + let byte_size = metadata.into_events_estimated_json_encoded_byte_size(); let uncompressed_size = request.uncompressed_size; let http_request = request.into_http_request().context(BuildRequestSnafu)?; @@ -161,7 +163,6 @@ impl Service for TraceApiService { Ok(TraceApiResponse { status_code: parts.status, body, - batch_size, byte_size, uncompressed_size, }) diff --git a/src/sinks/datadog_archives.rs b/src/sinks/datadog_archives.rs deleted file mode 100644 index d794283bd9f77..0000000000000 --- a/src/sinks/datadog_archives.rs +++ /dev/null @@ -1,1188 +0,0 @@ -// NOTE: We intentionally do not assert/verify that `datadog_archives` meets the component specification because it -// derives all of its capabilities from existing sink implementations which themselves are tested. We probably _should_ -// also verify it here, but for now, this is a punt to avoid having to add a bunch of specific integration tests that -// exercise all possible configurations of the sink. - -use std::{ - collections::{BTreeMap, HashMap, HashSet}, - convert::TryFrom, - io::{self, Write}, - sync::{ - atomic::{AtomicU32, Ordering}, - Arc, - }, -}; - -use azure_storage_blobs::prelude::ContainerClient; -use base64::prelude::{Engine as _, BASE64_STANDARD}; -use bytes::{BufMut, Bytes, BytesMut}; -use chrono::{SecondsFormat, Utc}; -use codecs::{encoding::Framer, JsonSerializerConfig, NewlineDelimitedEncoder}; -use goauth::scopes::Scope; -use http::header::{HeaderName, HeaderValue}; -use http::Uri; -use lookup::event_path; -use rand::{thread_rng, Rng}; -use snafu::Snafu; -use tower::ServiceBuilder; -use uuid::Uuid; -use vector_common::request_metadata::RequestMetadata; -use vector_config::{configurable_component, NamedComponent}; -use vector_core::{ - config::AcknowledgementsConfig, - event::{Event, EventFinalizers, Finalizable}, - schema, ByteSizeOf, -}; -use vrl::value::Kind; - -use crate::{ - aws::{AwsAuthentication, RegionOrEndpoint}, - codecs::{Encoder, Transformer}, - config::{GenerateConfig, Input, SinkConfig, SinkContext}, - gcp::{GcpAuthConfig, GcpAuthenticator}, - http::{get_http_scheme_from_uri, HttpClient}, - serde::json::to_string, - sinks::{ - azure_common::{ - self, - config::{AzureBlobMetadata, AzureBlobRequest, AzureBlobRetryLogic}, - service::AzureBlobService, - sink::AzureBlobSink, - }, - gcs_common::{ - self, - config::{GcsPredefinedAcl, GcsRetryLogic, GcsStorageClass, BASE_URL}, - service::{GcsRequest, GcsRequestSettings, GcsService}, - sink::GcsSink, - }, - s3_common::{ - self, - config::{ - create_service, S3CannedAcl, S3RetryLogic, S3ServerSideEncryption, S3StorageClass, - }, - partitioner::{S3KeyPartitioner, S3PartitionKey}, - service::{S3Metadata, S3Request, S3Service}, - sink::S3Sink, - }, - util::{ - metadata::RequestMetadataBuilder, partitioner::KeyPartitioner, - request_builder::EncodeResult, BatchConfig, Compression, RequestBuilder, - ServiceBuilderExt, SinkBatchSettings, TowerRequestConfig, - }, - VectorSink, - }, - template::Template, - tls::{TlsConfig, TlsSettings}, -}; - -const DEFAULT_COMPRESSION: Compression = Compression::gzip_default(); - -#[derive(Clone, Copy, Debug, Default)] -pub struct DatadogArchivesDefaultBatchSettings; - -/// We should avoid producing many small batches - this might slow down Log Rehydration, -/// these values are similar with how DataDog's Log Archives work internally: -/// batch size - 100mb -/// batch timeout - 15min -impl SinkBatchSettings for DatadogArchivesDefaultBatchSettings { - const MAX_EVENTS: Option = None; - const MAX_BYTES: Option = Some(100_000_000); - const TIMEOUT_SECS: f64 = 900.0; -} -/// Configuration for the `datadog_archives` sink. -#[configurable_component] -#[derive(Clone, Debug)] -#[serde(deny_unknown_fields)] -pub struct DatadogArchivesSinkConfig { - /// The name of the object storage service to use. - // TODO: This should really be an enum. - pub service: String, - - /// The name of the bucket to store the archives in. - pub bucket: String, - - /// A prefix to apply to all object keys. - /// - /// Prefixes are useful for partitioning objects, such as by creating an object key that - /// stores objects under a particular directory. If using a prefix for this purpose, it must end - /// in `/` to act as a directory path. A trailing `/` is **not** automatically added. - pub key_prefix: Option, - - #[configurable(derived)] - #[serde(default)] - pub request: TowerRequestConfig, - - #[configurable(derived)] - #[serde(default)] - pub aws_s3: Option, - - #[configurable(derived)] - #[serde(default)] - pub azure_blob: Option, - - #[configurable(derived)] - #[serde(default)] - pub gcp_cloud_storage: Option, - - #[configurable(derived)] - tls: Option, - - #[configurable(derived)] - #[serde( - default, - skip_serializing_if = "crate::serde::skip_serializing_if_default" - )] - pub encoding: Transformer, - - #[configurable(derived)] - #[serde( - default, - deserialize_with = "crate::serde::bool_or_struct", - skip_serializing_if = "crate::serde::skip_serializing_if_default" - )] - acknowledgements: AcknowledgementsConfig, -} - -/// S3-specific configuration options. -#[configurable_component] -#[derive(Clone, Debug, Default)] -#[serde(deny_unknown_fields)] -pub struct S3Config { - #[serde(flatten)] - pub options: S3Options, - - #[serde(flatten)] - pub region: RegionOrEndpoint, - - #[configurable(derived)] - #[serde(default)] - pub auth: AwsAuthentication, -} - -/// S3-specific bucket/object options. -#[configurable_component] -#[derive(Clone, Debug, Default)] -#[serde(deny_unknown_fields)] -pub struct S3Options { - /// Canned ACL to apply to the created objects. - /// - /// For more information, see [Canned ACL][canned_acl]. - /// - /// [canned_acl]: https://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl - pub acl: Option, - - /// Grants `READ`, `READ_ACP`, and `WRITE_ACP` permissions on the created objects to the named [grantee]. - /// - /// This allows the grantee to read the created objects and their metadata, as well as read and - /// modify the ACL on the created objects. - /// - /// [grantee]: https://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#specifying-grantee - pub grant_full_control: Option, - - /// Grants `READ` permissions on the created objects to the named [grantee]. - /// - /// This allows the grantee to read the created objects and their metadata. - /// - /// [grantee]: https://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#specifying-grantee - pub grant_read: Option, - - /// Grants `READ_ACP` permissions on the created objects to the named [grantee]. - /// - /// This allows the grantee to read the ACL on the created objects. - /// - /// [grantee]: https://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#specifying-grantee - pub grant_read_acp: Option, - - /// Grants `WRITE_ACP` permissions on the created objects to the named [grantee]. - /// - /// This allows the grantee to modify the ACL on the created objects. - /// - /// [grantee]: https://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#specifying-grantee - pub grant_write_acp: Option, - - /// The Server-side Encryption algorithm used when storing these objects. - pub server_side_encryption: Option, - - /// Specifies the ID of the AWS Key Management Service (AWS KMS) symmetrical customer managed - /// customer master key (CMK) that is used for the created objects. - /// - /// Only applies when `server_side_encryption` is configured to use KMS. - /// - /// If not specified, Amazon S3 uses the AWS managed CMK in AWS to protect the data. - pub ssekms_key_id: Option, - - /// The storage class for the created objects. - /// - /// For more information, see [Using Amazon S3 storage classes][storage_classes]. - /// - /// [storage_classes]: https://docs.aws.amazon.com/AmazonS3/latest/dev/storage-class-intro.html - pub storage_class: S3StorageClass, - - /// The tag-set for the object. - #[configurable(metadata(docs::additional_props_description = "A single tag."))] - pub tags: Option>, -} - -/// ABS-specific configuration options. -#[configurable_component] -#[derive(Clone, Debug, Default)] -#[serde(deny_unknown_fields)] -pub struct AzureBlobConfig { - /// The Azure Blob Storage Account connection string. - /// - /// Authentication with access key is the only supported authentication method. - pub connection_string: String, -} - -/// GCS-specific configuration options. -#[configurable_component] -#[derive(Clone, Debug, Default)] -#[serde(deny_unknown_fields)] -pub struct GcsConfig { - #[configurable(derived)] - acl: Option, - - #[configurable(derived)] - storage_class: Option, - - /// The set of metadata `key:value` pairs for the created objects. - /// - /// For more information, see [Custom metadata][custom_metadata]. - /// - /// [custom_metadata]: https://cloud.google.com/storage/docs/metadata#custom-metadata - #[configurable(metadata(docs::additional_props_description = "A key/value pair."))] - metadata: Option>, - - #[serde(flatten)] - auth: GcpAuthConfig, -} - -impl GenerateConfig for DatadogArchivesSinkConfig { - fn generate_config() -> toml::Value { - toml::Value::try_from(Self { - service: "".to_owned(), - bucket: "".to_owned(), - key_prefix: None, - request: TowerRequestConfig::default(), - aws_s3: None, - gcp_cloud_storage: None, - tls: None, - azure_blob: None, - encoding: Default::default(), - acknowledgements: Default::default(), - }) - .unwrap() - } -} - -#[derive(Debug, Snafu, PartialEq)] -enum ConfigError { - #[snafu(display("Unsupported service: {}", service))] - UnsupportedService { service: String }, - #[snafu(display("Unsupported storage class: {}", storage_class))] - UnsupportedStorageClass { storage_class: String }, -} - -const KEY_TEMPLATE: &str = "/dt=%Y%m%d/hour=%H/"; - -impl DatadogArchivesSinkConfig { - async fn build_sink(&self, cx: SinkContext) -> crate::Result<(VectorSink, super::Healthcheck)> { - match &self.service[..] { - "aws_s3" => { - let s3_config = self.aws_s3.as_ref().expect("s3 config wasn't provided"); - let service = - create_service(&s3_config.region, &s3_config.auth, &cx.proxy, &self.tls) - .await?; - let client = service.client(); - let svc = self - .build_s3_sink(&s3_config.options, service) - .map_err(|error| error.to_string())?; - Ok(( - svc, - s3_common::config::build_healthcheck(self.bucket.clone(), client, cx)?, - )) - } - "azure_blob" => { - let azure_config = self - .azure_blob - .as_ref() - .expect("azure blob config wasn't provided"); - let client = azure_common::config::build_client( - Some(azure_config.connection_string.clone()), - None, - self.bucket.clone(), - None, - )?; - let svc = self - .build_azure_sink(Arc::::clone(&client)) - .map_err(|error| error.to_string())?; - let healthcheck = - azure_common::config::build_healthcheck(self.bucket.clone(), Some(client), cx)?; - Ok((svc, healthcheck)) - } - "gcp_cloud_storage" => { - let gcs_config = self - .gcp_cloud_storage - .as_ref() - .expect("gcs config wasn't provided"); - let auth = gcs_config.auth.build(Scope::DevStorageReadWrite).await; - if let Err(err) = &auth { - warn!("Invalid authentication: {}", err) - } - - let auth = auth.ok(); - let base_url = format!("{}{}/", BASE_URL, self.bucket); - let tls = TlsSettings::from_options(&self.tls)?; - let client = HttpClient::new(tls, cx.proxy())?; - let healthcheck = gcs_common::config::build_healthcheck( - self.bucket.clone(), - client.clone(), - base_url.clone(), - auth.clone(), - cx.mezmo_ctx.clone(), - )?; - let sink = self - .build_gcs_sink(client, base_url, auth) - .map_err(|error| error.to_string())?; - Ok((sink, healthcheck)) - } - - service => Err(Box::new(ConfigError::UnsupportedService { - service: service.to_owned(), - })), - } - } - - fn build_s3_sink( - &self, - s3_options: &S3Options, - service: S3Service, - ) -> Result { - // we use lower default limits, because we send 100mb batches, - // thus no need of the higher number of outgoing requests - let request_limits = self.request.unwrap_with(&Default::default()); - let service = ServiceBuilder::new() - .settings(request_limits, S3RetryLogic) - .service(service); - - match s3_options.storage_class { - class @ S3StorageClass::DeepArchive | class @ S3StorageClass::Glacier => { - return Err(ConfigError::UnsupportedStorageClass { - storage_class: format!("{:?}", class), - }); - } - _ => (), - } - - let batcher_settings = BatchConfig::::default() - .into_batcher_settings() - .expect("invalid batch settings"); - - let partitioner = S3KeyPartitioner::new( - Template::try_from(KEY_TEMPLATE).expect("invalid object key format"), - None, - ); - - let s3_config = self - .aws_s3 - .as_ref() - .expect("s3 config wasn't provided") - .clone(); - let request_builder = DatadogS3RequestBuilder::new( - self.bucket.clone(), - self.key_prefix.clone(), - s3_config, - self.encoding.clone(), - ); - - let sink = S3Sink::new(service, request_builder, partitioner, batcher_settings); - - Ok(VectorSink::from_event_streamsink(sink)) - } - - pub fn build_gcs_sink( - &self, - client: HttpClient, - base_url: String, - auth: Option, - ) -> crate::Result { - let request = self.request.unwrap_with(&Default::default()); - let protocol = get_http_scheme_from_uri(&base_url.parse::()?); - - let batcher_settings = BatchConfig::::default() - .into_batcher_settings() - .expect("invalid batch settings"); - - let svc = ServiceBuilder::new() - .settings(request, GcsRetryLogic) - .service(GcsService::new(client, base_url, auth)); - - let gcs_config = self - .gcp_cloud_storage - .as_ref() - .expect("gcs config wasn't provided") - .clone(); - - let acl = gcs_config - .acl - .map(|acl| HeaderValue::from_str(&to_string(acl)).unwrap()); - let storage_class = gcs_config.storage_class.unwrap_or_default(); - let storage_class = HeaderValue::from_str(&to_string(storage_class)).unwrap(); - let metadata = gcs_config - .metadata - .as_ref() - .map(|metadata| { - metadata - .iter() - .map(make_header) - .collect::, _>>() - }) - .unwrap_or_else(|| Ok(vec![]))?; - let request_builder = DatadogGcsRequestBuilder { - bucket: self.bucket.clone(), - key_prefix: self.key_prefix.clone(), - acl, - storage_class, - metadata, - encoding: DatadogArchivesEncoding::new(self.encoding.clone()), - compression: DEFAULT_COMPRESSION, - }; - - let partitioner = DatadogArchivesSinkConfig::build_partitioner(); - - let sink = GcsSink::new( - svc, - request_builder, - partitioner, - batcher_settings, - protocol, - ); - - Ok(VectorSink::from_event_streamsink(sink)) - } - - fn build_azure_sink(&self, client: Arc) -> crate::Result { - let request_limits = self.request.unwrap_with(&Default::default()); - let service = ServiceBuilder::new() - .settings(request_limits, AzureBlobRetryLogic) - .service(AzureBlobService::new(Some(client))); - - let batcher_settings = BatchConfig::::default() - .into_batcher_settings() - .expect("invalid batch settings"); - - let partitioner = DatadogArchivesSinkConfig::build_partitioner(); - let request_builder = DatadogAzureRequestBuilder { - container_name: self.bucket.clone(), - blob_prefix: self.key_prefix.clone(), - encoding: DatadogArchivesEncoding::new(self.encoding.clone()), - }; - - let sink = AzureBlobSink::new(service, request_builder, partitioner, batcher_settings); - - Ok(VectorSink::from_event_streamsink(sink)) - } - - pub fn build_partitioner() -> KeyPartitioner { - KeyPartitioner::new(Template::try_from(KEY_TEMPLATE).expect("invalid object key format")) - } -} - -const RESERVED_ATTRIBUTES: [&str; 10] = [ - "_id", "date", "message", "host", "source", "service", "status", "tags", "trace_id", "span_id", -]; - -#[derive(Debug)] -struct DatadogArchivesEncoding { - encoder: (Transformer, Encoder), - reserved_attributes: HashSet<&'static str>, - id_rnd_bytes: [u8; 8], - id_seq_number: AtomicU32, -} - -impl DatadogArchivesEncoding { - /// Generates a unique event ID compatible with DD: - /// - 18 bytes; - /// - first 6 bytes represent a "now" timestamp in millis; - /// - the rest 12 bytes can be just any sequence unique for a given timestamp. - /// - /// To generate unique-ish trailing 12 bytes we use random 8 bytes, generated at startup, - /// and a rolling-over 4-bytes sequence number. - fn generate_log_id(&self) -> String { - let mut id = BytesMut::with_capacity(18); - // timestamp in millis - 6 bytes - let now = Utc::now(); - id.put_int(now.timestamp_millis(), 6); - - // 8 random bytes - id.put_slice(&self.id_rnd_bytes); - - // 4 bytes for the counter should be more than enough - it should be unique for 1 millisecond only - let id_seq_number = self.id_seq_number.fetch_add(1, Ordering::Relaxed); - id.put_u32(id_seq_number); - - BASE64_STANDARD.encode(id.freeze()) - } -} - -impl DatadogArchivesEncoding { - pub fn new(transformer: Transformer) -> Self { - Self { - encoder: ( - transformer, - Encoder::::new( - NewlineDelimitedEncoder::new().into(), - JsonSerializerConfig::default().build().into(), - ), - ), - reserved_attributes: RESERVED_ATTRIBUTES.iter().copied().collect(), - id_rnd_bytes: thread_rng().gen::<[u8; 8]>(), - id_seq_number: AtomicU32::new(0), - } - } -} - -impl crate::sinks::util::encoding::Encoder> for DatadogArchivesEncoding { - /// Applies the following transformations to align event's schema with DD: - /// - (required) `_id` is generated in the sink(format described below); - /// - (required) `date` is set from the `timestamp` meaning or Global Log Schema mapping, or to the current time if missing; - /// - `message`,`host` are set from the corresponding meanings or Global Log Schema mappings; - /// - `source`, `service`, `status`, `tags` and other reserved attributes are left as is; - /// - the rest of the fields is moved to `attributes`. - // TODO: All reserved attributes could have specific meanings, rather than specific paths - fn encode_input(&self, mut input: Vec, writer: &mut dyn Write) -> io::Result { - for event in input.iter_mut() { - let log_event = event.as_mut_log(); - - log_event.insert("_id", self.generate_log_id()); - - let timestamp = log_event - .remove_timestamp() - .unwrap_or_else(|| Utc::now().timestamp_millis().into()); - log_event.insert( - "date", - timestamp - .as_timestamp() - .cloned() - .unwrap_or_else(Utc::now) - .to_rfc3339_opts(SecondsFormat::Millis, true), - ); - - if let Some(message_path) = log_event.message_path() { - log_event.rename_key(message_path.as_str(), event_path!("message")); - } - - if let Some(host_path) = log_event.host_path() { - log_event.rename_key(host_path.as_str(), event_path!("host")); - } - - let mut attributes = BTreeMap::new(); - - let custom_attributes = if let Some(map) = log_event.as_map() { - map.keys() - .filter(|&path| !self.reserved_attributes.contains(path.as_str())) - .map(|v| v.to_owned()) - .collect() - } else { - vec![] - }; - - for path in custom_attributes { - if let Some(value) = log_event.remove(path.as_str()) { - attributes.insert(path, value); - } - } - log_event.insert("attributes", attributes); - } - - self.encoder.encode_input(input, writer) - } -} -#[derive(Debug)] -struct DatadogS3RequestBuilder { - bucket: String, - key_prefix: Option, - config: S3Config, - encoding: DatadogArchivesEncoding, -} - -impl DatadogS3RequestBuilder { - pub fn new( - bucket: String, - key_prefix: Option, - config: S3Config, - transformer: Transformer, - ) -> Self { - Self { - bucket, - key_prefix, - config, - encoding: DatadogArchivesEncoding::new(transformer), - } - } -} - -impl RequestBuilder<(S3PartitionKey, Vec)> for DatadogS3RequestBuilder { - type Metadata = S3Metadata; - type Events = Vec; - type Encoder = DatadogArchivesEncoding; - type Payload = Bytes; - type Request = S3Request; - type Error = io::Error; - - fn compression(&self) -> Compression { - DEFAULT_COMPRESSION - } - - fn encoder(&self) -> &Self::Encoder { - &self.encoding - } - - fn split_input( - &self, - input: (S3PartitionKey, Vec), - ) -> (Self::Metadata, RequestMetadataBuilder, Self::Events) { - let (partition_key, mut events) = input; - let finalizers = events.take_finalizers(); - let s3_key_prefix = partition_key.key_prefix.clone(); - - let builder = RequestMetadataBuilder::from_events(&events); - - let s3metadata = S3Metadata { - partition_key, - s3_key: s3_key_prefix, - finalizers, - }; - - (s3metadata, builder, events) - } - - fn build_request( - &self, - mut metadata: Self::Metadata, - request_metadata: RequestMetadata, - payload: EncodeResult, - ) -> Self::Request { - metadata.s3_key = generate_object_key(self.key_prefix.clone(), metadata.s3_key); - - let body = payload.into_payload(); - trace!( - message = "Sending events.", - bytes = ?body.len(), - events_len = ?request_metadata.events_byte_size(), - bucket = ?self.bucket, - key = ?metadata.partition_key - ); - - let s3_options = self.config.options.clone(); - S3Request { - body, - bucket: self.bucket.clone(), - metadata, - request_metadata, - content_encoding: DEFAULT_COMPRESSION.content_encoding(), - options: s3_common::config::S3Options { - acl: s3_options.acl, - grant_full_control: s3_options.grant_full_control, - grant_read: s3_options.grant_read, - grant_read_acp: s3_options.grant_read_acp, - grant_write_acp: s3_options.grant_write_acp, - server_side_encryption: s3_options.server_side_encryption, - ssekms_key_id: s3_options.ssekms_key_id, - storage_class: s3_options.storage_class, - tags: s3_options.tags.map(|tags| tags.into_iter().collect()), - content_encoding: None, - content_type: None, - }, - } - } -} - -#[derive(Debug)] -struct DatadogGcsRequestBuilder { - bucket: String, - key_prefix: Option, - acl: Option, - storage_class: HeaderValue, - metadata: Vec<(HeaderName, HeaderValue)>, - encoding: DatadogArchivesEncoding, - compression: Compression, -} - -impl RequestBuilder<(String, Vec)> for DatadogGcsRequestBuilder { - type Metadata = (String, EventFinalizers); - type Events = Vec; - type Payload = Bytes; - type Request = GcsRequest; - type Encoder = DatadogArchivesEncoding; - type Error = io::Error; - - fn split_input( - &self, - input: (String, Vec), - ) -> (Self::Metadata, RequestMetadataBuilder, Self::Events) { - let (partition_key, mut events) = input; - let metadata_builder = RequestMetadataBuilder::from_events(&events); - let finalizers = events.take_finalizers(); - - ((partition_key, finalizers), metadata_builder, events) - } - - fn build_request( - &self, - dd_metadata: Self::Metadata, - metadata: RequestMetadata, - payload: EncodeResult, - ) -> Self::Request { - let (key, finalizers) = dd_metadata; - - let key = generate_object_key(self.key_prefix.clone(), key); - - let body = payload.into_payload(); - - trace!( - message = "Sending events.", - bytes = body.len(), - events_len = metadata.event_count(), - bucket = %self.bucket, - ?key - ); - - let content_type = HeaderValue::from_str(self.encoding.encoder.1.content_type()).unwrap(); - let content_encoding = DEFAULT_COMPRESSION - .content_encoding() - .map(|ce| HeaderValue::from_str(&to_string(ce)).unwrap()); - - GcsRequest { - key, - body, - finalizers, - settings: GcsRequestSettings { - acl: self.acl.clone(), - content_type, - content_encoding, - storage_class: self.storage_class.clone(), - headers: self.metadata.clone(), - }, - metadata, - } - } - - fn compression(&self) -> Compression { - self.compression - } - - fn encoder(&self) -> &Self::Encoder { - &self.encoding - } -} - -fn generate_object_key(key_prefix: Option, partition_key: String) -> String { - let filename = Uuid::new_v4().to_string(); - - format!( - "{}/{}/archive_{}.{}", - key_prefix.unwrap_or_default(), - partition_key, - filename, - "json.gz" - ) - .replace("//", "/") -} - -#[derive(Debug)] -struct DatadogAzureRequestBuilder { - container_name: String, - blob_prefix: Option, - encoding: DatadogArchivesEncoding, -} - -impl RequestBuilder<(String, Vec)> for DatadogAzureRequestBuilder { - type Metadata = AzureBlobMetadata; - type Events = Vec; - type Encoder = DatadogArchivesEncoding; - type Payload = Bytes; - type Request = AzureBlobRequest; - type Error = io::Error; - - fn compression(&self) -> Compression { - DEFAULT_COMPRESSION - } - - fn encoder(&self) -> &Self::Encoder { - &self.encoding - } - - fn split_input( - &self, - input: (String, Vec), - ) -> (Self::Metadata, RequestMetadataBuilder, Self::Events) { - let (partition_key, mut events) = input; - let finalizers = events.take_finalizers(); - let metadata = AzureBlobMetadata { - partition_key, - count: events.len(), - byte_size: events.size_of(), - finalizers, - }; - let builder = RequestMetadataBuilder::from_events(&events); - - (metadata, builder, events) - } - - fn build_request( - &self, - mut metadata: Self::Metadata, - request_metadata: RequestMetadata, - payload: EncodeResult, - ) -> Self::Request { - metadata.partition_key = - generate_object_key(self.blob_prefix.clone(), metadata.partition_key); - - let blob_data = payload.into_payload(); - - trace!( - message = "Sending events.", - bytes = ?blob_data.len(), - events_len = ?metadata.count, - container = ?self.container_name, - blob = ?metadata.partition_key - ); - - AzureBlobRequest { - blob_data, - content_encoding: DEFAULT_COMPRESSION.content_encoding(), - content_type: "application/gzip", - metadata, - request_metadata, - } - } -} - -// This is implemented manually to satisfy `SinkConfig`, because if we derive it automatically via -// `#[configurable_component(sink("..."))]`, it would register the sink in a way that allowed it to -// be used in `vector generate`, etc... and we don't want that. -// -// TODO: When the sink is fully supported and we expose it for use/within the docs, remove this. -impl NamedComponent for DatadogArchivesSinkConfig { - fn get_component_name(&self) -> &'static str { - "datadog_archives" - } -} - -#[async_trait::async_trait] -impl SinkConfig for DatadogArchivesSinkConfig { - async fn build(&self, cx: SinkContext) -> crate::Result<(VectorSink, super::Healthcheck)> { - let sink_and_healthcheck = self.build_sink(cx).await?; - Ok(sink_and_healthcheck) - } - - fn input(&self) -> Input { - let requirements = schema::Requirement::empty() - .optional_meaning("host", Kind::bytes()) - .optional_meaning("message", Kind::bytes()) - .optional_meaning("source", Kind::bytes()) - .optional_meaning("service", Kind::bytes()) - .optional_meaning("severity", Kind::bytes()) - // TODO: A `timestamp` is required for rehydration, however today we generate a `Utc::now()` - // timestamp if it's not found in the event. We could require this meaning instead. - .optional_meaning("timestamp", Kind::timestamp()) - .optional_meaning("trace_id", Kind::bytes()); - - Input::log().with_schema_requirement(requirements) - } - - fn acknowledgements(&self) -> &AcknowledgementsConfig { - &self.acknowledgements - } -} - -// Make a header pair from a key-value string pair -fn make_header((name, value): (&String, &String)) -> crate::Result<(HeaderName, HeaderValue)> { - Ok(( - HeaderName::from_bytes(name.as_bytes())?, - HeaderValue::from_str(value)?, - )) -} - -#[cfg(test)] -mod tests { - #![allow(clippy::print_stdout)] // tests - - use std::{collections::BTreeMap, io::Cursor}; - - use chrono::DateTime; - use vector_core::partition::Partitioner; - - use super::*; - use crate::{event::LogEvent, sinks::util::encoding::Encoder as _}; - - #[test] - fn generate_config() { - crate::test_util::test_generate_config::(); - } - - #[test] - fn encodes_event() { - let mut event = Event::Log(LogEvent::from("test message")); - let log_mut = event.as_mut_log(); - log_mut.insert("service", "test-service"); - log_mut.insert("not_a_reserved_attribute", "value"); - log_mut.insert("tags", vec!["tag1:value1", "tag2:value2"]); - let timestamp = DateTime::parse_from_rfc3339("2021-08-23T18:00:27.879+02:00") - .expect("invalid test case") - .with_timezone(&Utc); - log_mut.insert("timestamp", timestamp); - - let mut writer = Cursor::new(Vec::new()); - let encoding = DatadogArchivesEncoding::new(Default::default()); - _ = encoding.encode_input(vec![event], &mut writer); - - let encoded = writer.into_inner(); - let json: BTreeMap = - serde_json::from_slice(encoded.as_slice()).unwrap(); - - validate_event_id( - json.get("_id") - .expect("_id not found") - .as_str() - .expect("_id is not a string"), - ); - - assert_eq!(json.len(), 6); // _id, message, date, service, attributes - assert_eq!( - json.get("message") - .expect("message not found") - .as_str() - .expect("message is not a string"), - "test message" - ); - assert_eq!( - json.get("date") - .expect("date not found") - .as_str() - .expect("date is not a string"), - "2021-08-23T16:00:27.879Z" - ); - assert_eq!( - json.get("service") - .expect("service not found") - .as_str() - .expect("service is not a string"), - "test-service" - ); - - assert_eq!( - json.get("tags") - .expect("tags not found") - .as_array() - .expect("service is not an array") - .to_owned(), - vec!["tag1:value1", "tag2:value2"] - ); - - let attributes = json - .get("attributes") - .expect("attributes not found") - .as_object() - .expect("attributes is not an object"); - assert_eq!(attributes.len(), 1); - assert_eq!( - String::from_utf8_lossy( - attributes - .get("not_a_reserved_attribute") - .expect("not_a_reserved_attribute wasn't moved to attributes") - .as_str() - .expect("not_a_reserved_attribute is not a string") - .as_ref() - ), - "value" - ); - } - - #[test] - fn generates_valid_key_for_an_event() { - let mut log = LogEvent::from("test message"); - - let timestamp = DateTime::parse_from_rfc3339("2021-08-23T18:00:27.879+02:00") - .expect("invalid test case") - .with_timezone(&Utc); - log.insert("timestamp", timestamp); - - let partitioner = DatadogArchivesSinkConfig::build_partitioner(); - let key = partitioner - .partition(&log.into()) - .expect("key wasn't provided"); - - assert_eq!(key, "/dt=20210823/hour=16/"); - } - - #[test] - fn generates_valid_id() { - let log1 = Event::Log(LogEvent::from("test event 1")); - let mut writer = Cursor::new(Vec::new()); - let encoding = DatadogArchivesEncoding::new(Default::default()); - _ = encoding.encode_input(vec![log1], &mut writer); - let encoded = writer.into_inner(); - let json: BTreeMap = - serde_json::from_slice(encoded.as_slice()).unwrap(); - let id1 = json - .get("_id") - .expect("_id not found") - .as_str() - .expect("_id is not a string"); - validate_event_id(id1); - - // check that id is different for the next event - let log2 = Event::Log(LogEvent::from("test event 2")); - let mut writer = Cursor::new(Vec::new()); - _ = encoding.encode_input(vec![log2], &mut writer); - let encoded = writer.into_inner(); - let json: BTreeMap = - serde_json::from_slice(encoded.as_slice()).unwrap(); - let id2 = json - .get("_id") - .expect("_id not found") - .as_str() - .expect("_id is not a string"); - validate_event_id(id2); - assert_ne!(id1, id2) - } - - #[test] - fn generates_date_if_missing() { - let log = Event::Log(LogEvent::from("test message")); - let mut writer = Cursor::new(Vec::new()); - let encoding = DatadogArchivesEncoding::new(Default::default()); - _ = encoding.encode_input(vec![log], &mut writer); - let encoded = writer.into_inner(); - let json: BTreeMap = - serde_json::from_slice(encoded.as_slice()).unwrap(); - - let date = DateTime::parse_from_rfc3339( - json.get("date") - .expect("date not found") - .as_str() - .expect("date is not a string"), - ) - .expect("date is not in an rfc3339 format"); - - // check that it is a recent timestamp - assert!(Utc::now().timestamp() - date.timestamp() < 1000); - } - - /// check that _id is: - /// - 18 bytes, - /// - base64-encoded, - /// - first 6 bytes - a "now" timestamp in millis - fn validate_event_id(id: &str) { - let bytes = BASE64_STANDARD - .decode(id) - .expect("_id is not base64-encoded"); - assert_eq!(bytes.len(), 18); - let mut timestamp: [u8; 8] = [0; 8]; - for (i, b) in bytes[..6].iter().enumerate() { - timestamp[i + 2] = *b; - } - let timestamp = i64::from_be_bytes(timestamp); - // check that it is a recent timestamp in millis - assert!(Utc::now().timestamp_millis() - timestamp < 1000); - } - - #[test] - fn s3_build_request() { - let fake_buf = Bytes::new(); - let mut log = Event::Log(LogEvent::from("test message")); - let timestamp = DateTime::parse_from_rfc3339("2021-08-23T18:00:27.879+02:00") - .expect("invalid test case") - .with_timezone(&Utc); - log.as_mut_log().insert("timestamp", timestamp); - let partitioner = S3KeyPartitioner::new( - Template::try_from(KEY_TEMPLATE).expect("invalid object key format"), - None, - ); - let key = partitioner.partition(&log).expect("key wasn't provided"); - - let request_builder = DatadogS3RequestBuilder::new( - "dd-logs".into(), - Some("audit".into()), - S3Config::default(), - Default::default(), - ); - - let (metadata, metadata_request_builder, _events) = - request_builder.split_input((key, vec![log])); - - let payload = EncodeResult::uncompressed(fake_buf.clone()); - let request_metadata = metadata_request_builder.build(&payload); - let req = request_builder.build_request(metadata, request_metadata, payload); - - let expected_key_prefix = "audit/dt=20210823/hour=16/archive_"; - let expected_key_ext = ".json.gz"; - println!("{}", req.metadata.s3_key); - assert!(req.metadata.s3_key.starts_with(expected_key_prefix)); - assert!(req.metadata.s3_key.ends_with(expected_key_ext)); - let uuid1 = &req.metadata.s3_key - [expected_key_prefix.len()..req.metadata.s3_key.len() - expected_key_ext.len()]; - assert_eq!(uuid1.len(), 36); - - // check that the second batch has a different UUID - let log2 = LogEvent::default().into(); - - let key = partitioner.partition(&log2).expect("key wasn't provided"); - let (metadata, metadata_request_builder, _events) = - request_builder.split_input((key, vec![log2])); - let payload = EncodeResult::uncompressed(fake_buf); - let request_metadata = metadata_request_builder.build(&payload); - let req = request_builder.build_request(metadata, request_metadata, payload); - - let uuid2 = &req.metadata.s3_key - [expected_key_prefix.len()..req.metadata.s3_key.len() - expected_key_ext.len()]; - - assert_ne!(uuid1, uuid2); - } - - #[tokio::test] - async fn error_if_unsupported_s3_storage_class() { - for (class, supported) in [ - (S3StorageClass::Standard, true), - (S3StorageClass::StandardIa, true), - (S3StorageClass::IntelligentTiering, true), - (S3StorageClass::OnezoneIa, true), - (S3StorageClass::ReducedRedundancy, true), - (S3StorageClass::DeepArchive, false), - (S3StorageClass::Glacier, false), - ] { - let config = DatadogArchivesSinkConfig { - service: "aws_s3".to_owned(), - bucket: "vector-datadog-archives".to_owned(), - key_prefix: Some("logs/".to_owned()), - request: TowerRequestConfig::default(), - aws_s3: Some(S3Config { - options: S3Options { - storage_class: class, - ..Default::default() - }, - region: RegionOrEndpoint::with_region("us-east-1".to_owned()), - auth: Default::default(), - }), - azure_blob: None, - gcp_cloud_storage: None, - tls: None, - encoding: Default::default(), - acknowledgements: Default::default(), - }; - - let res = config.build_sink(SinkContext::new_test()).await; - - if supported { - assert!(res.is_ok()); - } else { - assert_eq!( - res.err().unwrap().to_string(), - format!(r#"Unsupported storage class: {:?}"#, class) - ); - } - } - } -} diff --git a/src/sinks/elasticsearch/common.rs b/src/sinks/elasticsearch/common.rs index c4da722838543..d5c5b214b5449 100644 --- a/src/sinks/elasticsearch/common.rs +++ b/src/sinks/elasticsearch/common.rs @@ -1,6 +1,6 @@ use std::collections::HashMap; -use aws_types::credentials::SharedCredentialsProvider; +use aws_credential_types::provider::SharedCredentialsProvider; use aws_types::region::Region; use bytes::{Buf, Bytes}; use http::{Response, StatusCode, Uri}; @@ -238,7 +238,7 @@ impl ElasticsearchCommon { #[cfg(test)] pub async fn parse_single(config: &ElasticsearchConfig) -> crate::Result { let mut commons = - Self::parse_many(config, crate::config::SinkContext::new_test().proxy()).await?; + Self::parse_many(config, crate::config::SinkContext::default().proxy()).await?; assert_eq!(commons.len(), 1); Ok(commons.remove(0)) } diff --git a/src/sinks/elasticsearch/config.rs b/src/sinks/elasticsearch/config.rs index 87d8c9a122eb9..20746e9cd8762 100644 --- a/src/sinks/elasticsearch/config.rs +++ b/src/sinks/elasticsearch/config.rs @@ -40,7 +40,7 @@ use vrl::value::Kind; pub const DATA_STREAM_TIMESTAMP_KEY: &str = "@timestamp"; /// Configuration for the `elasticsearch` sink. -#[configurable_component(sink("elasticsearch"))] +#[configurable_component(sink("elasticsearch", "Index observability events in Elasticsearch."))] #[derive(Clone, Debug)] #[serde(deny_unknown_fields)] pub struct ElasticsearchConfig { @@ -466,6 +466,7 @@ impl DataStreamConfig { } #[async_trait::async_trait] +#[typetag::serde(name = "elasticsearch")] impl SinkConfig for ElasticsearchConfig { async fn build(&self, cx: SinkContext) -> crate::Result<(VectorSink, Healthcheck)> { let commons = ElasticsearchCommon::parse_many(self, cx.proxy()).await?; diff --git a/src/sinks/elasticsearch/encoder.rs b/src/sinks/elasticsearch/encoder.rs index 0558e8c44684c..f5b39a52b23a0 100644 --- a/src/sinks/elasticsearch/encoder.rs +++ b/src/sinks/elasticsearch/encoder.rs @@ -2,6 +2,10 @@ use std::{io, io::Write}; use serde::Serialize; use vector_buffers::EventCount; +use vector_common::{ + json_size::JsonSize, + request_metadata::{EventCountTags, GetEventCountTags}, +}; use vector_core::{event::Event, ByteSizeOf, EstimatedJsonEncodedSizeOf}; use crate::{ @@ -34,7 +38,7 @@ impl ByteSizeOf for ProcessedEvent { } impl EstimatedJsonEncodedSizeOf for ProcessedEvent { - fn estimated_json_encoded_size_of(&self) -> usize { + fn estimated_json_encoded_size_of(&self) -> JsonSize { self.log.estimated_json_encoded_size_of() } } @@ -46,6 +50,12 @@ impl EventCount for ProcessedEvent { } } +impl GetEventCountTags for ProcessedEvent { + fn get_tags(&self) -> EventCountTags { + self.log.get_tags() + } +} + #[derive(PartialEq, Eq, Default, Clone, Debug)] pub struct ElasticsearchEncoder { pub transformer: Transformer, diff --git a/src/sinks/elasticsearch/integration_tests.rs b/src/sinks/elasticsearch/integration_tests.rs index 8766efb95af57..53ea850ed19e9 100644 --- a/src/sinks/elasticsearch/integration_tests.rs +++ b/src/sinks/elasticsearch/integration_tests.rs @@ -146,7 +146,7 @@ async fn structures_events_correctly() { .expect("Config error"); let base_url = common.base_url.clone(); - let cx = SinkContext::new_test(); + let cx = SinkContext::default(); let (sink, _hc) = config.build(cx.clone()).await.unwrap(); let (batch, mut receiver) = BatchNotifier::new_with_receiver(); @@ -555,7 +555,7 @@ async fn run_insert_tests_with_config( }; let base_url = common.base_url.clone(); - let cx = SinkContext::new_test(); + let cx = SinkContext::default(); let (sink, healthcheck) = config .build(cx.clone()) .await @@ -639,7 +639,7 @@ async fn run_insert_tests_with_config( } async fn run_insert_tests_with_multiple_endpoints(config: &ElasticsearchConfig) { - let cx = SinkContext::new_test(); + let cx = SinkContext::default(); let commons = ElasticsearchCommon::parse_many(config, cx.proxy()) .await .expect("Config error"); diff --git a/src/sinks/elasticsearch/request_builder.rs b/src/sinks/elasticsearch/request_builder.rs index f6327f34ca50b..c5918df6e384c 100644 --- a/src/sinks/elasticsearch/request_builder.rs +++ b/src/sinks/elasticsearch/request_builder.rs @@ -1,6 +1,6 @@ use bytes::Bytes; -use vector_common::request_metadata::RequestMetadata; -use vector_core::ByteSizeOf; +use vector_common::{json_size::JsonSize, request_metadata::RequestMetadata}; +use vector_core::EstimatedJsonEncodedSizeOf; use crate::{ event::{EventFinalizers, Finalizable}, @@ -25,7 +25,7 @@ pub struct ElasticsearchRequestBuilder { pub struct Metadata { finalizers: EventFinalizers, batch_size: usize, - events_byte_size: usize, + events_byte_size: JsonSize, } impl RequestBuilder> for ElasticsearchRequestBuilder { @@ -50,9 +50,9 @@ impl RequestBuilder> for ElasticsearchRequestBuilder { ) -> (Self::Metadata, RequestMetadataBuilder, Self::Events) { let events_byte_size = events .iter() - .map(|x| x.log.size_of()) + .map(|x| x.log.estimated_json_encoded_size_of()) .reduce(|a, b| a + b) - .unwrap_or(0); + .unwrap_or(JsonSize::zero()); let metadata_builder = RequestMetadataBuilder::from_events(&events); diff --git a/src/sinks/elasticsearch/retry.rs b/src/sinks/elasticsearch/retry.rs index bd6035087c91e..bf40d1751f051 100644 --- a/src/sinks/elasticsearch/retry.rs +++ b/src/sinks/elasticsearch/retry.rs @@ -160,6 +160,7 @@ mod tests { use bytes::Bytes; use http::Response; use similar_asserts::assert_eq; + use vector_common::{internal_event::CountByteSize, json_size::JsonSize}; use super::*; use crate::event::EventStatus; @@ -179,7 +180,7 @@ mod tests { http_response: response, event_status: EventStatus::Rejected, batch_size: 1, - events_byte_size: 1, + events_byte_size: CountByteSize(1, JsonSize::new(1)).into(), }), RetryAction::DontRetry(_) )); @@ -200,7 +201,7 @@ mod tests { http_response: response, event_status: EventStatus::Errored, batch_size: 1, - events_byte_size: 1, + events_byte_size: CountByteSize(1, JsonSize::new(1)).into(), }), RetryAction::Retry(_) )); diff --git a/src/sinks/elasticsearch/service.rs b/src/sinks/elasticsearch/service.rs index 99ef680d5c2c6..25095dd390c38 100644 --- a/src/sinks/elasticsearch/service.rs +++ b/src/sinks/elasticsearch/service.rs @@ -4,15 +4,18 @@ use std::{ task::{Context, Poll}, }; -use aws_types::credentials::SharedCredentialsProvider; +use aws_credential_types::provider::SharedCredentialsProvider; use aws_types::region::Region; use bytes::Bytes; use futures::future::BoxFuture; use http::{Response, Uri}; use hyper::{service::Service, Body, Request}; use tower::ServiceExt; -use vector_common::request_metadata::{MetaDescriptive, RequestMetadata}; -use vector_core::{internal_event::CountByteSize, stream::DriverResponse, ByteSizeOf}; +use vector_common::{ + json_size::JsonSize, + request_metadata::{GroupedCountByteSize, MetaDescriptive, RequestMetadata}, +}; +use vector_core::{stream::DriverResponse, ByteSizeOf}; use crate::sinks::elasticsearch::sign_request; use crate::{ @@ -33,7 +36,7 @@ pub struct ElasticsearchRequest { pub payload: Bytes, pub finalizers: EventFinalizers, pub batch_size: usize, - pub events_byte_size: usize, + pub events_byte_size: JsonSize, pub metadata: RequestMetadata, } @@ -56,8 +59,12 @@ impl Finalizable for ElasticsearchRequest { } impl MetaDescriptive for ElasticsearchRequest { - fn get_metadata(&self) -> RequestMetadata { - self.metadata + fn get_metadata(&self) -> &RequestMetadata { + &self.metadata + } + + fn metadata_mut(&mut self) -> &mut RequestMetadata { + &mut self.metadata } } @@ -153,7 +160,7 @@ pub struct ElasticsearchResponse { pub http_response: Response, pub event_status: EventStatus, pub batch_size: usize, - pub events_byte_size: usize, + pub events_byte_size: GroupedCountByteSize, } impl DriverResponse for ElasticsearchResponse { @@ -161,8 +168,8 @@ impl DriverResponse for ElasticsearchResponse { self.event_status } - fn events_sent(&self) -> CountByteSize { - CountByteSize(self.batch_size, self.events_byte_size) + fn events_sent(&self) -> &GroupedCountByteSize { + &self.events_byte_size } } @@ -177,12 +184,13 @@ impl Service for ElasticsearchService { } // Emission of internal events for errors and dropped events is handled upstream by the caller. - fn call(&mut self, req: ElasticsearchRequest) -> Self::Future { + fn call(&mut self, mut req: ElasticsearchRequest) -> Self::Future { let mut http_service = self.batch_service.clone(); Box::pin(async move { http_service.ready().await?; let batch_size = req.batch_size; - let events_byte_size = req.events_byte_size; + let events_byte_size = + std::mem::take(req.metadata_mut()).into_events_estimated_json_encoded_byte_size(); let http_response = http_service.call(req).await?; let event_status = get_event_status(&http_response); diff --git a/src/sinks/file/mod.rs b/src/sinks/file/mod.rs index d2063938d360d..04f166aa986fb 100644 --- a/src/sinks/file/mod.rs +++ b/src/sinks/file/mod.rs @@ -40,7 +40,7 @@ use bytes_path::BytesPath; /// Configuration for the `file` sink. #[serde_as] -#[configurable_component(sink("file"))] +#[configurable_component(sink("file", "Output observability events into files."))] #[derive(Clone, Debug)] #[serde(deny_unknown_fields)] pub struct FileSinkConfig { @@ -61,6 +61,7 @@ pub struct FileSinkConfig { #[serde_as(as = "serde_with::DurationSeconds")] #[serde(rename = "idle_timeout_secs")] #[configurable(metadata(docs::examples = 600))] + #[configurable(metadata(docs::human_name = "Idle Timeout"))] pub idle_timeout: Duration, #[serde(flatten)] @@ -169,6 +170,7 @@ impl OutFile { } #[async_trait::async_trait] +#[typetag::serde(name = "file")] impl SinkConfig for FileSinkConfig { async fn build( &self, diff --git a/src/sinks/gcp/chronicle_unstructured.rs b/src/sinks/gcp/chronicle_unstructured.rs index 17b5485dd3c1a..817f2249a1ba2 100644 --- a/src/sinks/gcp/chronicle_unstructured.rs +++ b/src/sinks/gcp/chronicle_unstructured.rs @@ -101,7 +101,10 @@ impl SinkBatchSettings for ChronicleUnstructuredDefaultBatchSettings { } /// Configuration for the `gcp_chronicle_unstructured` sink. -#[configurable_component(sink("gcp_chronicle_unstructured"))] +#[configurable_component(sink( + "gcp_chronicle_unstructured", + "Store unstructured log events in Google Chronicle." +))] #[derive(Clone, Debug)] pub struct ChronicleUnstructuredConfig { /// The endpoint to send data to. @@ -200,6 +203,7 @@ pub enum ChronicleError { } #[async_trait::async_trait] +#[typetag::serde(name = "gcp_chronicle_unstructured")] impl SinkConfig for ChronicleUnstructuredConfig { async fn build(&self, cx: SinkContext) -> crate::Result<(VectorSink, Healthcheck)> { // Unlike Vector's upstream behavior, if the initial `auth` result is an error @@ -301,8 +305,12 @@ impl Finalizable for ChronicleRequest { } impl MetaDescriptive for ChronicleRequest { - fn get_metadata(&self) -> RequestMetadata { - self.metadata + fn get_metadata(&self) -> &RequestMetadata { + &self.metadata + } + + fn metadata_mut(&mut self) -> &mut RequestMetadata { + &mut self.metadata } } @@ -489,7 +497,7 @@ impl Service for ChronicleService { HeaderValue::from_str(&request.body.len().to_string()).unwrap(), ); - let metadata = request.get_metadata(); + let metadata = request.get_metadata().clone(); let mut http_request = builder.body(Body::from(request.body)).unwrap(); @@ -565,7 +573,7 @@ mod integration_tests { log_type: &str, auth_path: &str, ) -> crate::Result<(VectorSink, crate::sinks::Healthcheck)> { - let cx = SinkContext::new_test(); + let cx = SinkContext::default(); config(log_type, auth_path).build(cx).await } diff --git a/src/sinks/gcp/cloud_storage.rs b/src/sinks/gcp/cloud_storage.rs index a62239a32327b..ae64e14e9dd46 100644 --- a/src/sinks/gcp/cloud_storage.rs +++ b/src/sinks/gcp/cloud_storage.rs @@ -50,7 +50,10 @@ pub enum GcsHealthcheckError { } /// Configuration for the `gcp_cloud_storage` sink. -#[configurable_component(sink("gcp_cloud_storage"))] +#[configurable_component(sink( + "gcp_cloud_storage", + "Store observability events in GCP Cloud Storage." +))] #[derive(Clone, Debug)] #[serde(deny_unknown_fields)] pub struct GcsSinkConfig { @@ -202,6 +205,7 @@ impl GenerateConfig for GcsSinkConfig { } #[async_trait::async_trait] +#[typetag::serde(name = "gcp_cloud_storage")] impl SinkConfig for GcsSinkConfig { async fn build(&self, cx: SinkContext) -> crate::Result<(VectorSink, Healthcheck)> { // Unlike Vector's upstream behavior, if the initial `auth` result is an error @@ -447,7 +451,7 @@ mod tests { async fn component_spec_compliance() { let mock_endpoint = spawn_blackhole_http_server(always_200_response).await; - let context = SinkContext::new_test(); + let context = SinkContext::default(); let tls = TlsSettings::default(); let client = diff --git a/src/sinks/gcp/pubsub.rs b/src/sinks/gcp/pubsub.rs index 64b07bf6eb7ab..6647ce2fb005a 100644 --- a/src/sinks/gcp/pubsub.rs +++ b/src/sinks/gcp/pubsub.rs @@ -48,7 +48,10 @@ impl SinkBatchSettings for PubsubDefaultBatchSettings { } /// Configuration for the `gcp_pubsub` sink. -#[configurable_component(sink("gcp_pubsub"))] +#[configurable_component(sink( + "gcp_pubsub", + "Publish observability events to GCP's Pub/Sub messaging system." +))] #[derive(Clone, Debug)] pub struct PubsubConfig { /// The project name to which to publish events. @@ -114,6 +117,7 @@ impl GenerateConfig for PubsubConfig { } #[async_trait::async_trait] +#[typetag::serde(name = "gcp_pubsub")] impl SinkConfig for PubsubConfig { async fn build(&self, cx: SinkContext) -> crate::Result<(VectorSink, Healthcheck)> { let sink = PubsubSink::from_config(self).await?; @@ -141,6 +145,7 @@ impl SinkConfig for PubsubConfig { ) .sink_map_err(|error| error!(message = "Fatal gcp_pubsub sink error.", %error)); + #[allow(deprecated)] Ok((VectorSink::from_event_sink(sink), healthcheck)) } @@ -332,7 +337,7 @@ mod integration_tests { } async fn config_build(topic: &str) -> (VectorSink, crate::sinks::Healthcheck) { - let cx = SinkContext::new_test(); + let cx = SinkContext::default(); config(topic).build(cx).await.expect("Building sink failed") } diff --git a/src/sinks/gcp/stackdriver_logs.rs b/src/sinks/gcp/stackdriver_logs.rs index 736659c240d7a..04c21e9c57e76 100644 --- a/src/sinks/gcp/stackdriver_logs.rs +++ b/src/sinks/gcp/stackdriver_logs.rs @@ -39,7 +39,10 @@ enum HealthcheckError { } /// Configuration for the `gcp_stackdriver_logs` sink. -#[configurable_component(sink("gcp_stackdriver_logs"))] +#[configurable_component(sink( + "gcp_stackdriver_logs", + "Deliver logs to GCP's Cloud Operations suite." +))] #[derive(Clone, Debug, Default)] #[serde(deny_unknown_fields)] pub struct StackdriverConfig { @@ -205,6 +208,7 @@ fn label_examples() -> HashMap { impl_generate_config_from_default!(StackdriverConfig); #[async_trait::async_trait] +#[typetag::serde(name = "gcp_stackdriver_logs")] impl SinkConfig for StackdriverConfig { async fn build(&self, cx: SinkContext) -> crate::Result<(VectorSink, Healthcheck)> { // Unlike Vector's upstream behavior, if the initial `auth` result is an error @@ -250,6 +254,7 @@ impl SinkConfig for StackdriverConfig { ) .sink_map_err(|error| error!(message = "Fatal gcp_stackdriver_logs sink error.", %error)); + #[allow(deprecated)] Ok((VectorSink::from_event_sink(sink), healthcheck)) } @@ -462,7 +467,7 @@ mod tests { config.auth.api_key = Some("fake".to_string().into()); config.endpoint = mock_endpoint.to_string(); - let context = SinkContext::new_test(); + let context = SinkContext::default(); let (sink, _healthcheck) = config.build(context).await.unwrap(); let event = Event::Log(LogEvent::from("simple message")); diff --git a/src/sinks/gcp/stackdriver_metrics.rs b/src/sinks/gcp/stackdriver_metrics.rs index 293b1f38e2415..46846da347a5d 100644 --- a/src/sinks/gcp/stackdriver_metrics.rs +++ b/src/sinks/gcp/stackdriver_metrics.rs @@ -36,7 +36,10 @@ impl SinkBatchSettings for StackdriverMetricsDefaultBatchSettings { } /// Configuration for the `gcp_stackdriver_metrics` sink. -#[configurable_component(sink("gcp_stackdriver_metrics"))] +#[configurable_component(sink( + "gcp_stackdriver_metrics", + "Deliver metrics to GCP's Cloud Monitoring system." +))] #[derive(Clone, Debug, Default)] pub struct StackdriverConfig { #[serde(skip, default = "default_endpoint")] @@ -93,6 +96,7 @@ fn default_endpoint() -> String { impl_generate_config_from_default!(StackdriverConfig); #[async_trait::async_trait] +#[typetag::serde(name = "gcp_stackdriver_metrics")] impl SinkConfig for StackdriverConfig { async fn build(&self, cx: SinkContext) -> crate::Result<(VectorSink, Healthcheck)> { // Unlike Vector's upstream behavior, if the initial `auth` result is an error @@ -136,6 +140,7 @@ impl SinkConfig for StackdriverConfig { |error| error!(message = "Fatal gcp_stackdriver_metrics sink error.", %error), ); + #[allow(deprecated)] Ok((VectorSink::from_event_sink(sink), healthcheck)) } @@ -303,7 +308,7 @@ mod tests { config.auth.api_key = Some("fake".to_string().into()); config.endpoint = mock_endpoint.to_string(); - let context = SinkContext::new_test(); + let context = SinkContext::default(); let (sink, _healthcheck) = config.build(context).await.unwrap(); let event = Event::Metric(Metric::new( diff --git a/src/sinks/gcs_common/service.rs b/src/sinks/gcs_common/service.rs index f107a5d4157f1..9af9c72427d04 100644 --- a/src/sinks/gcs_common/service.rs +++ b/src/sinks/gcs_common/service.rs @@ -1,11 +1,6 @@ use std::task::Poll; -use crate::{ - event::{EventFinalizers, EventStatus, Finalizable}, - gcp::GcpAuthenticator, - http::{get_http_scheme_from_uri, HttpClient, HttpError}, - mezmo::user_trace::UserLoggingResponse, -}; +use crate::{http::get_http_scheme_from_uri, mezmo::user_trace::UserLoggingResponse}; use bytes::Bytes; use futures::future; use futures::future::BoxFuture; @@ -15,10 +10,16 @@ use http::{ }; use hyper::Body; use tower::Service; -use vector_common::request_metadata::{MetaDescriptive, RequestMetadata}; -use vector_core::{internal_event::CountByteSize, stream::DriverResponse}; +use vector_common::request_metadata::{GroupedCountByteSize, MetaDescriptive, RequestMetadata}; +use vector_core::stream::DriverResponse; use vrl::value::Value; +use crate::{ + event::{EventFinalizers, EventStatus, Finalizable}, + gcp::GcpAuthenticator, + http::{HttpClient, HttpError}, +}; + #[derive(Debug, Clone)] pub struct GcsService { client: HttpClient, @@ -56,8 +57,12 @@ impl Finalizable for GcsRequest { } impl MetaDescriptive for GcsRequest { - fn get_metadata(&self) -> RequestMetadata { - self.metadata + fn get_metadata(&self) -> &RequestMetadata { + &self.metadata + } + + fn metadata_mut(&mut self) -> &mut RequestMetadata { + &mut self.metadata } } @@ -96,11 +101,8 @@ impl DriverResponse for GcsResponse { } } - fn events_sent(&self) -> CountByteSize { - CountByteSize( - self.metadata.event_count(), - self.metadata.events_estimated_json_encoded_byte_size(), - ) + fn events_sent(&self) -> &GroupedCountByteSize { + self.metadata.events_estimated_json_encoded_byte_size() } fn bytes_sent(&self) -> Option { diff --git a/src/sinks/honeycomb.rs b/src/sinks/honeycomb.rs index cd3c68dd96030..0fd1ee8459c83 100644 --- a/src/sinks/honeycomb.rs +++ b/src/sinks/honeycomb.rs @@ -19,7 +19,7 @@ use crate::{ }; /// Configuration for the `honeycomb` sink. -#[configurable_component(sink("honeycomb"))] +#[configurable_component(sink("honeycomb", "Deliver log events to Honeycomb."))] #[derive(Clone, Debug)] pub struct HoneycombConfig { // This endpoint is not user-configurable and only exists for testing purposes @@ -85,6 +85,7 @@ impl GenerateConfig for HoneycombConfig { } #[async_trait::async_trait] +#[typetag::serde(name = "honeycomb")] impl SinkConfig for HoneycombConfig { async fn build( &self, @@ -109,6 +110,7 @@ impl SinkConfig for HoneycombConfig { let healthcheck = healthcheck(self.clone(), client).boxed(); + #[allow(deprecated)] Ok((super::VectorSink::from_event_sink(sink), healthcheck)) } @@ -244,7 +246,7 @@ mod test { .expect("config should be valid"); config.endpoint = mock_endpoint.to_string(); - let context = SinkContext::new_test(); + let context = SinkContext::default(); let (sink, _healthcheck) = config.build(context).await.unwrap(); let event = Event::Log(LogEvent::from("simple message")); diff --git a/src/sinks/http.rs b/src/sinks/http.rs index 042f952e8f0c3..f938714d43f56 100644 --- a/src/sinks/http.rs +++ b/src/sinks/http.rs @@ -2,7 +2,6 @@ use std::io::Write; use bytes::{BufMut, Bytes, BytesMut}; use codecs::encoding::{CharacterDelimitedEncoder, Framer, Serializer}; -use flate2::write::{GzEncoder, ZlibEncoder}; use futures::{future, FutureExt, SinkExt}; use http::{ header::{HeaderName, HeaderValue, AUTHORIZATION}, @@ -23,14 +22,14 @@ use crate::{ sinks::util::{ self, http::{BatchedHttpSink, HttpEventEncoder, RequestConfig}, - BatchConfig, Buffer, Compression, RealtimeSizeBasedDefaultBatchSettings, + BatchConfig, Buffer, Compression, Compressor, RealtimeSizeBasedDefaultBatchSettings, TowerRequestConfig, UriSerde, }, tls::{TlsConfig, TlsSettings}, }; /// Configuration for the `http` sink. -#[configurable_component(sink("http"))] +#[configurable_component(sink("http", "Deliver observability event data to an HTTP server."))] #[derive(Clone, Debug)] #[serde(deny_unknown_fields)] pub struct HttpSinkConfig { @@ -202,6 +201,7 @@ fn default_sink(encoding: EncodingConfigWithFraming) -> HttpSink { } #[async_trait::async_trait] +#[typetag::serde(name = "http")] impl SinkConfig for HttpSinkConfig { async fn build( &self, @@ -256,6 +256,7 @@ impl SinkConfig for HttpSinkConfig { ) .sink_map_err(|error| error!(message = "Fatal HTTP sink error.", %error)); + #[allow(deprecated)] let sink = super::VectorSink::from_event_sink(sink); Ok((sink, healthcheck)) @@ -379,24 +380,21 @@ impl util::http::HttpSink for HttpSink { builder = builder.header("Content-Type", content_type); } - match self.compression { - Compression::Gzip(level) => { - builder = builder.header("Content-Encoding", "gzip"); - - let buffer = BytesMut::new(); - let mut w = GzEncoder::new(buffer.writer(), level.as_flate2()); - w.write_all(&body).expect("Writing to Vec can't fail"); - body = w.finish().expect("Writing to Vec can't fail").into_inner(); - } - Compression::Zlib(level) => { - builder = builder.header("Content-Encoding", "deflate"); - - let buffer = BytesMut::new(); - let mut w = ZlibEncoder::new(buffer.writer(), level.as_flate2()); - w.write_all(&body).expect("Writing to Vec can't fail"); - body = w.finish().expect("Writing to Vec can't fail").into_inner(); - } - Compression::None => {} + let compression = self.compression; + + if compression.is_compressed() { + builder = builder.header( + "Content-Encoding", + compression + .content_encoding() + .expect("Encoding should be specified."), + ); + + let mut compressor = Compressor::from(compression); + compressor + .write_all(&body) + .expect("Writing to Vec can't fail."); + body = compressor.finish().expect("Writing to Vec can't fail."); } let headers = builder @@ -482,12 +480,12 @@ mod tests { encoding::FramingConfig, JsonSerializerConfig, NewlineDelimitedEncoderConfig, TextSerializerConfig, }; - use flate2::read::MultiGzDecoder; + use flate2::{read::MultiGzDecoder, read::ZlibDecoder}; use futures::{channel::mpsc, stream, StreamExt}; use headers::{Authorization, HeaderMapExt}; use http::request::Parts; use hyper::{Method, Response, StatusCode}; - use serde::Deserialize; + use serde::{de, Deserialize}; use vector_core::event::{BatchNotifier, BatchStatus, LogEvent}; use super::*; @@ -635,7 +633,7 @@ mod tests { "#; let config: HttpSinkConfig = toml::from_str(config).unwrap(); - let cx = SinkContext::new_test(); + let cx = SinkContext::default(); _ = config.build(cx).await.unwrap(); } @@ -818,7 +816,36 @@ mod tests { } #[tokio::test] - async fn json_compression() { + async fn json_gzip_compression() { + json_compression("gzip").await; + } + + #[tokio::test] + async fn json_zstd_compression() { + json_compression("zstd").await; + } + + #[tokio::test] + async fn json_zlib_compression() { + json_compression("zlib").await; + } + + #[tokio::test] + async fn json_gzip_compression_with_payload_wrapper() { + json_compression_with_payload_wrapper("gzip").await; + } + + #[tokio::test] + async fn json_zlib_compression_with_payload_wrapper() { + json_compression_with_payload_wrapper("zlib").await; + } + + #[tokio::test] + async fn json_zstd_compression_with_payload_wrapper() { + json_compression_with_payload_wrapper("zstd").await; + } + + async fn json_compression(compression: &str) { components::assert_sink_compliance(&HTTP_SINK_TAGS, async { let num_lines = 1000; @@ -826,7 +853,7 @@ mod tests { let config = r#" uri = "http://$IN_ADDR/frames" - compression = "gzip" + compression = "$COMPRESSION" encoding.codec = "json" method = "post" @@ -835,10 +862,12 @@ mod tests { user = "waldo" password = "hunter2" "# - .replace("$IN_ADDR", &in_addr.to_string()); + .replace("$IN_ADDR", &in_addr.to_string()) + .replace("$COMPRESSION", compression); + let config: HttpSinkConfig = toml::from_str(&config).unwrap(); - let cx = SinkContext::new_test(); + let cx = SinkContext::default(); let (sink, _) = config.build(cx).await.unwrap(); let (rx, trigger, server) = build_test_server(in_addr); @@ -862,8 +891,7 @@ mod tests { Some(Authorization::basic("waldo", "hunter2")), parts.headers.typed_get() ); - let lines: Vec = - serde_json::from_reader(MultiGzDecoder::new(body.reader())).unwrap(); + let lines: Vec = parse_compressed_json(compression, body); stream::iter(lines) }) .map(|line| line.get("message").unwrap().as_str().unwrap().to_owned()) @@ -876,8 +904,7 @@ mod tests { .await; } - #[tokio::test] - async fn json_compression_with_payload_wrapper() { + async fn json_compression_with_payload_wrapper(compression: &str) { components::assert_sink_compliance(&HTTP_SINK_TAGS, async { let num_lines = 1000; @@ -885,7 +912,7 @@ mod tests { let config = r#" uri = "http://$IN_ADDR/frames" - compression = "gzip" + compression = "$COMPRESSION" encoding.codec = "json" payload_prefix = '{"data":' payload_suffix = "}" @@ -896,10 +923,12 @@ mod tests { user = "waldo" password = "hunter2" "# - .replace("$IN_ADDR", &in_addr.to_string()); + .replace("$IN_ADDR", &in_addr.to_string()) + .replace("$COMPRESSION", compression); + let config: HttpSinkConfig = toml::from_str(&config).unwrap(); - let cx = SinkContext::new_test(); + let cx = SinkContext::default(); let (sink, _) = config.build(cx).await.unwrap(); let (rx, trigger, server) = build_test_server(in_addr); @@ -924,8 +953,8 @@ mod tests { parts.headers.typed_get() ); - let message: serde_json::Value = - serde_json::from_reader(MultiGzDecoder::new(body.reader())).unwrap(); + let message: serde_json::Value = parse_compressed_json(compression, body); + let lines: Vec = message["data"].as_array().unwrap().to_vec(); stream::iter(lines) @@ -940,6 +969,18 @@ mod tests { .await; } + fn parse_compressed_json(compression: &str, buf: Bytes) -> T + where + T: de::DeserializeOwned, + { + match compression { + "gzip" => serde_json::from_reader(MultiGzDecoder::new(buf.reader())).unwrap(), + "zstd" => serde_json::from_reader(zstd::Decoder::new(buf.reader()).unwrap()).unwrap(), + "zlib" => serde_json::from_reader(ZlibDecoder::new(buf.reader())).unwrap(), + _ => panic!("undefined compression: {}", compression), + } + } + async fn get_received( rx: mpsc::Receiver<(Parts, Bytes)>, assert_parts: impl Fn(Parts), @@ -994,7 +1035,7 @@ mod tests { ); let config: HttpSinkConfig = toml::from_str(&config).unwrap(); - let cx = SinkContext::new_test(); + let cx = SinkContext::default(); let (sink, _) = config.build(cx).await.unwrap(); (in_addr, sink) diff --git a/src/sinks/humio/logs.rs b/src/sinks/humio/logs.rs index 6af56db905115..b87446ff015f7 100644 --- a/src/sinks/humio/logs.rs +++ b/src/sinks/humio/logs.rs @@ -26,7 +26,7 @@ use crate::{ pub(super) const HOST: &str = "https://cloud.humio.com"; /// Configuration for the `humio_logs` sink. -#[configurable_component(sink("humio_logs"))] +#[configurable_component(sink("humio_logs", "Deliver log event data to Humio."))] #[derive(Clone, Debug)] #[serde(deny_unknown_fields)] pub struct HumioLogsConfig { @@ -168,6 +168,7 @@ impl GenerateConfig for HumioLogsConfig { } #[async_trait::async_trait] +#[typetag::serde(name = "humio_logs")] impl SinkConfig for HumioLogsConfig { async fn build(&self, cx: SinkContext) -> crate::Result<(VectorSink, Healthcheck)> { self.build_hec_config().build(cx).await @@ -250,7 +251,7 @@ mod integration_tests { async fn humio_insert_message() { wait_ready().await; - let cx = SinkContext::new_test(); + let cx = SinkContext::default(); let repo = create_repository().await; @@ -300,7 +301,7 @@ mod integration_tests { async fn humio_insert_source() { wait_ready().await; - let cx = SinkContext::new_test(); + let cx = SinkContext::default(); let repo = create_repository().await; @@ -336,7 +337,7 @@ mod integration_tests { let mut config = config(&repo.default_ingest_token); config.event_type = Template::try_from("json".to_string()).ok(); - let (sink, _) = config.build(SinkContext::new_test()).await.unwrap(); + let (sink, _) = config.build(SinkContext::default()).await.unwrap(); let message = random_string(100); let mut event = LogEvent::from(message.clone()); @@ -362,7 +363,7 @@ mod integration_tests { { let config = config(&repo.default_ingest_token); - let (sink, _) = config.build(SinkContext::new_test()).await.unwrap(); + let (sink, _) = config.build(SinkContext::default()).await.unwrap(); let message = random_string(100); let event = LogEvent::from(message.clone()); diff --git a/src/sinks/humio/metrics.rs b/src/sinks/humio/metrics.rs index c79aa6b852d1a..a336f60590aa9 100644 --- a/src/sinks/humio/metrics.rs +++ b/src/sinks/humio/metrics.rs @@ -38,7 +38,7 @@ use crate::{ // `humio_logs` config here. // // [1]: https://github.com/serde-rs/serde/issues/1504 -#[configurable_component(sink("humio_metrics"))] +#[configurable_component(sink("humio_metrics", "Deliver metric event data to Humio."))] #[derive(Clone, Debug)] #[serde(deny_unknown_fields)] pub struct HumioMetricsConfig { @@ -151,6 +151,7 @@ impl GenerateConfig for HumioMetricsConfig { } #[async_trait::async_trait] +#[typetag::serde(name = "humio_metrics")] impl SinkConfig for HumioMetricsConfig { async fn build(&self, cx: SinkContext) -> crate::Result<(VectorSink, Healthcheck)> { let transform = self diff --git a/src/sinks/influxdb/logs.rs b/src/sinks/influxdb/logs.rs index 9c5f8e3a8674d..35127a59a3fc7 100644 --- a/src/sinks/influxdb/logs.rs +++ b/src/sinks/influxdb/logs.rs @@ -41,7 +41,7 @@ impl SinkBatchSettings for InfluxDbLogsDefaultBatchSettings { } /// Configuration for the `influxdb_logs` sink. -#[configurable_component(sink("influxdb_logs"))] +#[configurable_component(sink("influxdb_logs", "Deliver log event data to InfluxDB."))] #[derive(Clone, Debug, Default)] #[serde(deny_unknown_fields)] pub struct InfluxDbLogsConfig { @@ -157,6 +157,7 @@ impl GenerateConfig for InfluxDbLogsConfig { } #[async_trait::async_trait] +#[typetag::serde(name = "influxdb_logs")] impl SinkConfig for InfluxDbLogsConfig { async fn build(&self, cx: SinkContext) -> crate::Result<(VectorSink, Healthcheck)> { let measurement = self.get_measurement()?; @@ -233,6 +234,7 @@ impl SinkConfig for InfluxDbLogsConfig { ) .sink_map_err(|error| error!(message = "Fatal influxdb_logs sink error.", %error)); + #[allow(deprecated)] Ok((VectorSink::from_event_sink(sink), healthcheck)) } @@ -884,7 +886,7 @@ mod integration_tests { use std::sync::Arc; use vector_core::config::{LegacyKey, LogNamespace}; use vector_core::event::{BatchNotifier, BatchStatus, Event, LogEvent}; - use vrl::value::value; + use vrl::value; use super::*; use crate::{ @@ -905,7 +907,7 @@ mod integration_tests { let now = Utc::now(); let measure = format!("vector-{}", now.timestamp_nanos()); - let cx = SinkContext::new_test(); + let cx = SinkContext::default(); let config = InfluxDbLogsConfig { namespace: None, diff --git a/src/sinks/influxdb/metrics.rs b/src/sinks/influxdb/metrics.rs index 41c8559963cc5..68dd5182cadf1 100644 --- a/src/sinks/influxdb/metrics.rs +++ b/src/sinks/influxdb/metrics.rs @@ -7,7 +7,7 @@ use tower::Service; use vector_config::configurable_component; use vector_core::{ event::metric::{MetricSketch, MetricTags, Quantile}, - ByteSizeOf, + ByteSizeOf, EstimatedJsonEncodedSizeOf, }; use crate::{ @@ -52,7 +52,7 @@ impl SinkBatchSettings for InfluxDbDefaultBatchSettings { } /// Configuration for the `influxdb_metrics` sink. -#[configurable_component(sink("influxdb_metrics"))] +#[configurable_component(sink("influxdb_metrics", "Deliver metric event data to InfluxDB."))] #[derive(Clone, Debug, Default)] #[serde(deny_unknown_fields)] pub struct InfluxDbConfig { @@ -122,6 +122,7 @@ struct InfluxDbRequest { impl_generate_config_from_default!(InfluxDbConfig); #[async_trait::async_trait] +#[typetag::serde(name = "influxdb_metrics")] impl SinkConfig for InfluxDbConfig { async fn build(&self, cx: SinkContext) -> crate::Result<(VectorSink, Healthcheck)> { let tls_settings = TlsSettings::from_options(&self.tls)?; @@ -184,13 +185,16 @@ impl InfluxDbSvc { .with_flat_map(move |event: Event| { stream::iter({ let byte_size = event.size_of(); + let json_size = event.estimated_json_encoded_size_of(); + normalizer .normalize(event.into_metric()) - .map(|metric| Ok(EncodedEvent::new(metric, byte_size))) + .map(|metric| Ok(EncodedEvent::new(metric, byte_size, json_size))) }) }) .sink_map_err(|error| error!(message = "Fatal influxdb sink error.", %error)); + #[allow(deprecated)] Ok(VectorSink::from_event_sink(sink)) } } @@ -991,7 +995,7 @@ mod integration_tests { crate::test_util::trace_init(); let database = onboarding_v1(url).await; - let cx = SinkContext::new_test(); + let cx = SinkContext::default(); let config = InfluxDbConfig { endpoint: url.to_string(), @@ -1086,7 +1090,7 @@ mod integration_tests { let endpoint = address_v2(); onboarding_v2(&endpoint).await; - let cx = SinkContext::new_test(); + let cx = SinkContext::default(); let config = InfluxDbConfig { endpoint, diff --git a/src/sinks/kafka/config.rs b/src/sinks/kafka/config.rs index 2ca9f7d49dabc..6d03f9326ef2b 100644 --- a/src/sinks/kafka/config.rs +++ b/src/sinks/kafka/config.rs @@ -5,27 +5,25 @@ use futures::FutureExt; use rdkafka::ClientConfig; use serde_with::serde_as; use vector_config::configurable_component; -use vector_core::schema::Requirement; use vrl::value::Kind; use crate::{ - codecs::EncodingConfig, - config::{AcknowledgementsConfig, DataType, GenerateConfig, Input, SinkConfig, SinkContext}, kafka::{KafkaAuthConfig, KafkaCompression}, serde::json::to_string, sinks::{ kafka::sink::{healthcheck, KafkaSink}, - util::{BatchConfig, NoDefaultsBatchSettings}, - Healthcheck, VectorSink, + prelude::*, }, - template::Template, }; pub(crate) const QUEUED_MIN_MESSAGES: u64 = 100000; /// Configuration for the `kafka` sink. #[serde_as] -#[configurable_component(sink("kafka"))] +#[configurable_component(sink( + "kafka", + "Publish observability event data to Apache Kafka topics." +))] #[derive(Clone, Debug)] #[serde(deny_unknown_fields)] pub struct KafkaSinkConfig { @@ -80,12 +78,14 @@ pub struct KafkaSinkConfig { #[serde(default = "default_socket_timeout_ms")] #[configurable(metadata(docs::examples = 30000, docs::examples = 60000))] #[configurable(metadata(docs::advanced))] + #[configurable(metadata(docs::human_name = "Socket Timeout"))] pub socket_timeout_ms: Duration, /// Local message timeout, in milliseconds. #[serde_as(as = "serde_with::DurationMilliSeconds")] #[configurable(metadata(docs::examples = 150000, docs::examples = 450000))] #[serde(default = "default_message_timeout_ms")] + #[configurable(metadata(docs::human_name = "Message Timeout"))] #[configurable(metadata(docs::advanced))] pub message_timeout_ms: Duration, @@ -265,6 +265,7 @@ impl GenerateConfig for KafkaSinkConfig { } #[async_trait::async_trait] +#[typetag::serde(name = "kafka")] impl SinkConfig for KafkaSinkConfig { async fn build(&self, cx: SinkContext) -> crate::Result<(VectorSink, Healthcheck)> { let sink = KafkaSink::new(self.clone(), cx)?; diff --git a/src/sinks/kafka/request_builder.rs b/src/sinks/kafka/request_builder.rs index 794de6174396e..9d1edd0d97c43 100644 --- a/src/sinks/kafka/request_builder.rs +++ b/src/sinks/kafka/request_builder.rs @@ -37,7 +37,7 @@ impl KafkaRequestBuilder { }) .ok()?; - let metadata_builder = RequestMetadataBuilder::from_events(&event); + let metadata_builder = RequestMetadataBuilder::from_event(&event); let metadata = KafkaRequestMetadata { finalizers: event.take_finalizers(), diff --git a/src/sinks/kafka/service.rs b/src/sinks/kafka/service.rs index 47b0d5460bb51..ab09edd75945a 100644 --- a/src/sinks/kafka/service.rs +++ b/src/sinks/kafka/service.rs @@ -1,28 +1,18 @@ use std::task::{Context, Poll}; use bytes::Bytes; -use futures::future::BoxFuture; use rdkafka::{ error::KafkaError, message::OwnedHeaders, producer::{FutureProducer, FutureRecord}, util::Timeout, }; -use tower::Service; -use vector_common::request_metadata::{MetaDescriptive, RequestMetadata}; -use vector_core::{ - internal_event::{ - ByteSize, BytesSent, CountByteSize, InternalEventHandle as _, Protocol, Registered, - }, - stream::DriverResponse, +use vector_core::internal_event::{ + ByteSize, BytesSent, InternalEventHandle as _, Protocol, Registered, }; use vrl::value::Value; -use crate::{ - event::{EventFinalizers, EventStatus, Finalizable}, - kafka::KafkaStatisticsContext, - mezmo::user_trace::{UserLoggingError, UserLoggingResponse}, -}; +use crate::{kafka::KafkaStatisticsContext, sinks::prelude::*}; pub struct KafkaRequest { pub body: Bytes, @@ -39,7 +29,7 @@ pub struct KafkaRequestMetadata { } pub struct KafkaResponse { - event_byte_size: usize, + event_byte_size: GroupedCountByteSize, } impl DriverResponse for KafkaResponse { @@ -47,8 +37,8 @@ impl DriverResponse for KafkaResponse { EventStatus::Delivered } - fn events_sent(&self) -> CountByteSize { - CountByteSize(1, self.event_byte_size) + fn events_sent(&self) -> &GroupedCountByteSize { + &self.event_byte_size } } @@ -59,8 +49,12 @@ impl Finalizable for KafkaRequest { } impl MetaDescriptive for KafkaRequest { - fn get_metadata(&self) -> RequestMetadata { - self.request_metadata + fn get_metadata(&self) -> &RequestMetadata { + &self.request_metadata + } + + fn metadata_mut(&mut self) -> &mut RequestMetadata { + &mut self.request_metadata } } @@ -112,7 +106,9 @@ impl Service for KafkaService { let this = self.clone(); Box::pin(async move { - let event_byte_size = request.get_metadata().events_byte_size(); + let event_byte_size = request + .request_metadata + .into_events_estimated_json_encoded_byte_size(); let mut record = FutureRecord::to(&request.metadata.topic).payload(request.body.as_ref()); diff --git a/src/sinks/kafka/sink.rs b/src/sinks/kafka/sink.rs index fbc0ee98f244f..fcd661495874d 100644 --- a/src/sinks/kafka/sink.rs +++ b/src/sinks/kafka/sink.rs @@ -1,5 +1,4 @@ -use async_trait::async_trait; -use futures::{future, stream::BoxStream, StreamExt}; +use futures::future; use rdkafka::{ consumer::{BaseConsumer, Consumer}, error::KafkaError, @@ -12,19 +11,11 @@ use tower::limit::ConcurrencyLimit; use super::config::{KafkaRole, KafkaSinkConfig}; use crate::{ - codecs::{Encoder, Transformer}, - config::SinkContext, - event::{Event, LogEvent}, kafka::KafkaStatisticsContext, - mezmo::user_trace::MezmoLoggingService, - sinks::{ - kafka::{ - config::QUEUED_MIN_MESSAGES, request_builder::KafkaRequestBuilder, - service::KafkaService, - }, - util::{builder::SinkBuilderExt, StreamSink}, + sinks::kafka::{ + config::QUEUED_MIN_MESSAGES, request_builder::KafkaRequestBuilder, service::KafkaService, }, - template::{Template, TemplateParseError}, + sinks::prelude::*, }; #[derive(Debug, Snafu)] diff --git a/src/sinks/kafka/tests.rs b/src/sinks/kafka/tests.rs index 9b6d747826f6d..ac128a9974ed4 100644 --- a/src/sinks/kafka/tests.rs +++ b/src/sinks/kafka/tests.rs @@ -18,7 +18,10 @@ mod integration_test { message::Headers, Message, Offset, TopicPartitionList, }; - use vector_core::event::{BatchNotifier, BatchStatus}; + use vector_core::{ + config::{init_telemetry, Tags, Telemetry}, + event::{BatchNotifier, BatchStatus}, + }; use crate::{ config::SinkContext, @@ -30,12 +33,13 @@ mod integration_test { sink::KafkaSink, *, }, - util::{BatchConfig, NoDefaultsBatchSettings}, - VectorSink, + prelude::*, }, - template::Template, test_util::{ - components::{assert_sink_compliance, SINK_TAGS}, + components::{ + assert_data_volume_sink_compliance, assert_sink_compliance, DATA_VOLUME_SINK_TAGS, + SINK_TAGS, + }, random_lines_with_stream, random_string, wait_for, }, tls::{TlsConfig, TlsEnableableConfig, TEST_PEM_INTERMEDIATE_CA_PATH}, @@ -75,25 +79,61 @@ mod integration_test { #[tokio::test] async fn kafka_happy_path_plaintext() { crate::test_util::trace_init(); - kafka_happy_path(kafka_address(9091), None, None, KafkaCompression::None).await; + kafka_happy_path( + kafka_address(9091), + None, + None, + KafkaCompression::None, + true, + ) + .await; + kafka_happy_path( + kafka_address(9091), + None, + None, + KafkaCompression::None, + false, + ) + .await; } #[tokio::test] async fn kafka_happy_path_gzip() { crate::test_util::trace_init(); - kafka_happy_path(kafka_address(9091), None, None, KafkaCompression::Gzip).await; + kafka_happy_path( + kafka_address(9091), + None, + None, + KafkaCompression::Gzip, + false, + ) + .await; } #[tokio::test] async fn kafka_happy_path_lz4() { crate::test_util::trace_init(); - kafka_happy_path(kafka_address(9091), None, None, KafkaCompression::Lz4).await; + kafka_happy_path( + kafka_address(9091), + None, + None, + KafkaCompression::Lz4, + false, + ) + .await; } #[tokio::test] async fn kafka_happy_path_snappy() { crate::test_util::trace_init(); - kafka_happy_path(kafka_address(9091), None, None, KafkaCompression::Snappy).await; + kafka_happy_path( + kafka_address(9091), + None, + None, + KafkaCompression::Snappy, + false, + ) + .await; } // rdkafka zstd feature is not enabled @@ -102,7 +142,14 @@ mod integration_test { #[tokio::test] async fn kafka_happy_path_zstd() { crate::test_util::trace_init(); - kafka_happy_path(kafka_address(9091), None, None, KafkaCompression::Zstd).await; + kafka_happy_path( + kafka_address(9091), + None, + None, + KafkaCompression::Zstd, + false, + ) + .await; } async fn kafka_batch_options_overrides( @@ -214,6 +261,7 @@ mod integration_test { options: TlsConfig::test_config(), }), KafkaCompression::None, + false, ) .await; } @@ -231,6 +279,7 @@ mod integration_test { }), None, KafkaCompression::None, + false, ) .await; } @@ -240,7 +289,22 @@ mod integration_test { sasl: Option, tls: Option, compression: KafkaCompression, + test_telemetry_tags: bool, ) { + if test_telemetry_tags { + // We need to configure Vector to emit the service and source tags. + // The default is to not emit these. + init_telemetry( + Telemetry { + tags: Tags { + emit_service: true, + emit_source: true, + }, + }, + true, + ); + } + let topic = format!("test-{}", random_string(10)); let headers_key = "headers_key".to_string(); let kafka_auth = KafkaAuthConfig { sasl, tls }; @@ -279,13 +343,24 @@ mod integration_test { }); events }); - assert_sink_compliance(&SINK_TAGS, async move { - let sink = KafkaSink::new(config, SinkContext::new_test()).unwrap(); - let sink = VectorSink::from_event_streamsink(sink); - sink.run(input_events).await - }) - .await - .expect("Running sink failed"); + + if test_telemetry_tags { + assert_data_volume_sink_compliance(&DATA_VOLUME_SINK_TAGS, async move { + let sink = KafkaSink::new(config, SinkContext::new_test()).unwrap(); + let sink = VectorSink::from_event_streamsink(sink); + sink.run(input_events).await + }) + .await + .expect("Running sink failed"); + } else { + assert_sink_compliance(&SINK_TAGS, async move { + let sink = KafkaSink::new(config, SinkContext::new_test()).unwrap(); + let sink = VectorSink::from_event_streamsink(sink); + sink.run(input_events).await + }) + .await + .expect("Running sink failed"); + } assert_eq!(receiver.try_recv(), Ok(BatchStatus::Delivered)); // read back everything from the beginning diff --git a/src/sinks/loki/config.rs b/src/sinks/loki/config.rs index 108cfa0aa3f99..f72c68c964007 100644 --- a/src/sinks/loki/config.rs +++ b/src/sinks/loki/config.rs @@ -1,21 +1,12 @@ use std::collections::HashMap; -use futures::future::FutureExt; -use vector_config::configurable_component; use vrl::value::Kind; use super::{healthcheck::healthcheck, sink::LokiSink}; use crate::{ - codecs::EncodingConfig, - config::{AcknowledgementsConfig, DataType, GenerateConfig, Input, SinkConfig, SinkContext}, http::{Auth, HttpClient, MaybeAuth}, schema, - sinks::{ - util::{BatchConfig, Compression, SinkBatchSettings, TowerRequestConfig, UriSerde}, - VectorSink, - }, - template::Template, - tls::{TlsConfig, TlsSettings}, + sinks::{prelude::*, util::UriSerde}, }; /// Loki-specific compression. @@ -61,7 +52,7 @@ fn default_loki_path() -> String { } /// Configuration for the `loki` sink. -#[configurable_component(sink("loki"))] +#[configurable_component(sink("loki", "Deliver log event data to the Loki aggregation system."))] #[derive(Clone, Debug)] #[serde(deny_unknown_fields)] pub struct LokiConfig { @@ -220,6 +211,7 @@ impl LokiConfig { } #[async_trait::async_trait] +#[typetag::serde(name = "loki")] impl SinkConfig for LokiConfig { async fn build( &self, diff --git a/src/sinks/loki/event.rs b/src/sinks/loki/event.rs index 93d92b8498ab1..22d399f970710 100644 --- a/src/sinks/loki/event.rs +++ b/src/sinks/loki/event.rs @@ -1,14 +1,8 @@ use std::{collections::HashMap, io}; +use crate::sinks::{prelude::*, util::encoding::Encoder}; use bytes::Bytes; use serde::{ser::SerializeSeq, Serialize}; -use vector_buffers::EventCount; -use vector_core::{ - event::{EventFinalizers, Finalizable}, - ByteSizeOf, EstimatedJsonEncodedSizeOf, -}; - -use crate::sinks::util::encoding::{write_all, Encoder}; pub type Labels = Vec<(String, String)>; @@ -138,21 +132,6 @@ impl ByteSizeOf for LokiEvent { } } -/// This implementation approximates the `Serialize` implementation below, without any allocations. -impl EstimatedJsonEncodedSizeOf for LokiEvent { - fn estimated_json_encoded_size_of(&self) -> usize { - static BRACKETS_SIZE: usize = 2; - static COLON_SIZE: usize = 1; - static QUOTES_SIZE: usize = 2; - - BRACKETS_SIZE - + QUOTES_SIZE - + self.timestamp.estimated_json_encoded_size_of() - + COLON_SIZE - + self.event.estimated_json_encoded_size_of() - } -} - impl Serialize for LokiEvent { fn serialize(&self, serializer: S) -> Result where @@ -171,7 +150,9 @@ pub struct LokiRecord { pub partition: PartitionKey, pub labels: Labels, pub event: LokiEvent, + pub json_byte_size: JsonSize, pub finalizers: EventFinalizers, + pub event_count_tags: EventCountTags, } impl ByteSizeOf for LokiRecord { @@ -185,8 +166,8 @@ impl ByteSizeOf for LokiRecord { } impl EstimatedJsonEncodedSizeOf for LokiRecord { - fn estimated_json_encoded_size_of(&self) -> usize { - self.event.estimated_json_encoded_size_of() + fn estimated_json_encoded_size_of(&self) -> JsonSize { + self.json_byte_size } } @@ -203,6 +184,12 @@ impl Finalizable for LokiRecord { } } +impl GetEventCountTags for LokiRecord { + fn get_tags(&self) -> EventCountTags { + self.event_count_tags.clone() + } +} + #[derive(Hash, Eq, PartialEq, Clone, Debug)] pub struct PartitionKey { pub tenant_id: Option, diff --git a/src/sinks/loki/service.rs b/src/sinks/loki/service.rs index ec62cb690e432..edcc762042fba 100644 --- a/src/sinks/loki/service.rs +++ b/src/sinks/loki/service.rs @@ -1,22 +1,14 @@ use std::task::{Context, Poll}; use bytes::Bytes; -use futures::future::BoxFuture; use http::StatusCode; use snafu::Snafu; -use tower::Service; use tracing::Instrument; -use vector_common::request_metadata::{MetaDescriptive, RequestMetadata}; -use vector_core::{ - event::{EventFinalizers, EventStatus, Finalizable}, - internal_event::CountByteSize, - stream::DriverResponse, -}; use crate::sinks::loki::config::{CompressionConfigAdapter, ExtendedCompression}; use crate::{ http::{Auth, HttpClient}, - sinks::util::{retries::RetryLogic, UriSerde}, + sinks::{prelude::*, util::UriSerde}, }; #[derive(Clone)] @@ -57,11 +49,8 @@ impl DriverResponse for LokiResponse { EventStatus::Delivered } - fn events_sent(&self) -> CountByteSize { - CountByteSize( - self.metadata.event_count(), - self.metadata.events_estimated_json_encoded_byte_size(), - ) + fn events_sent(&self) -> &GroupedCountByteSize { + self.metadata.events_estimated_json_encoded_byte_size() } fn bytes_sent(&self) -> Option { @@ -85,8 +74,12 @@ impl Finalizable for LokiRequest { } impl MetaDescriptive for LokiRequest { - fn get_metadata(&self) -> RequestMetadata { - self.metadata + fn get_metadata(&self) -> &RequestMetadata { + &self.metadata + } + + fn metadata_mut(&mut self) -> &mut RequestMetadata { + &mut self.metadata } } @@ -127,7 +120,7 @@ impl Service for LokiService { }; let mut req = http::Request::post(&self.endpoint.uri).header("Content-Type", content_type); - let metadata = request.get_metadata(); + let metadata = request.get_metadata().clone(); if let Some(tenant_id) = request.tenant_id { req = req.header("X-Scope-OrgID", tenant_id); diff --git a/src/sinks/loki/sink.rs b/src/sinks/loki/sink.rs index 367a41ea8c6de..74e133887b6b4 100644 --- a/src/sinks/loki/sink.rs +++ b/src/sinks/loki/sink.rs @@ -1,44 +1,25 @@ use std::{collections::HashMap, num::NonZeroUsize}; use bytes::{Bytes, BytesMut}; -use futures::{stream::BoxStream, StreamExt}; use once_cell::sync::Lazy; use regex::Regex; use snafu::Snafu; use tokio_util::codec::Encoder as _; -use vector_common::request_metadata::RequestMetadata; -use vector_core::{ - event::{Event, EventFinalizers, Finalizable, Value}, - partition::Partitioner, - sink::StreamSink, - stream::BatcherSettings, - ByteSizeOf, -}; use super::{ config::{LokiConfig, OutOfOrderAction}, event::{LokiBatchEncoder, LokiEvent, LokiRecord, PartitionKey}, service::{LokiRequest, LokiRetryLogic, LokiService}, }; +use crate::sinks::loki::config::{CompressionConfigAdapter, ExtendedCompression}; use crate::sinks::loki::event::LokiBatchEncoding; -use crate::sinks::{ - loki::config::{CompressionConfigAdapter, ExtendedCompression}, - util::metadata::RequestMetadataBuilder, -}; use crate::{ - codecs::{Encoder, Transformer}, http::{get_http_scheme_from_uri, HttpClient}, internal_events::{ - LokiEventUnlabeled, LokiOutOfOrderEventDropped, LokiOutOfOrderEventRewritten, - SinkRequestBuildError, TemplateRenderingError, + LokiEventUnlabeledError, LokiOutOfOrderEventDroppedError, LokiOutOfOrderEventRewritten, + SinkRequestBuildError, }, - sinks::util::{ - builder::SinkBuilderExt, - request_builder::EncodeResult, - service::{ServiceBuilderExt, Svc}, - Compression, RequestBuilder, - }, - template::Template, + sinks::prelude::*, }; #[derive(Clone)] @@ -268,6 +249,7 @@ impl EventEncoder { pub(super) fn encode_event(&mut self, mut event: Event) -> Option { let tenant_id = self.key_partitioner.partition(&event); let finalizers = event.take_finalizers(); + let json_byte_size = event.estimated_json_encoded_size_of(); let mut labels = self.build_labels(&event); self.remove_label_fields(&mut event); @@ -280,6 +262,8 @@ impl EventEncoder { event.as_mut_log().remove_timestamp(); } + let event_count_tags = event.get_tags(); + self.transformer.transform(&mut event); let mut bytes = BytesMut::new(); self.encoder.encode(event, &mut bytes).ok(); @@ -288,7 +272,7 @@ impl EventEncoder { // `{agent="vector"}` label. This can happen if the only // label is a templatable one but the event doesn't match. if labels.is_empty() { - emit!(LokiEventUnlabeled); + emit!(LokiEventUnlabeledError); labels = vec![("agent".to_string(), "vector".to_string())] } @@ -302,6 +286,8 @@ impl EventEncoder { }, partition, finalizers, + json_byte_size, + event_count_tags, }) } } @@ -486,7 +472,7 @@ impl LokiSink { } Some((partition, result)) } else { - emit!(LokiOutOfOrderEventDropped { count: batch.len() }); + emit!(LokiOutOfOrderEventDroppedError { count: batch.len() }); None } }) diff --git a/src/sinks/loki/tests.rs b/src/sinks/loki/tests.rs index 5661b0c6ec8b8..cf34b729684c1 100644 --- a/src/sinks/loki/tests.rs +++ b/src/sinks/loki/tests.rs @@ -1,13 +1,11 @@ -use futures::StreamExt; +use vector_core::config::proxy::ProxyConfig; use super::{config::LokiConfig, healthcheck::healthcheck, sink::LokiSink}; use crate::{ - config::ProxyConfig, - event::{Event, LogEvent}, http::HttpClient, + sinks::prelude::*, sinks::util::test::{build_test_server, load_sink}, test_util, - tls::TlsSettings, }; #[test] diff --git a/src/sinks/mezmo.rs b/src/sinks/mezmo.rs index 636c6374b629c..6139952fe3a95 100644 --- a/src/sinks/mezmo.rs +++ b/src/sinks/mezmo.rs @@ -35,7 +35,7 @@ const ENV_KEY: &str = "env"; const DEFAULT_VALUE: Value = Value::Null; /// Configuration for the `logdna` sink. -#[configurable_component(sink("logdna"))] +#[configurable_component(sink("logdna", "Deliver log event data to LogDNA."))] #[configurable(metadata( deprecated = "The `logdna` sink has been renamed. Please use `mezmo` instead." ))] @@ -49,6 +49,7 @@ impl GenerateConfig for LogdnaConfig { } #[async_trait::async_trait] +#[typetag::serde(name = "logdna")] impl SinkConfig for LogdnaConfig { async fn build( &self, @@ -68,7 +69,7 @@ impl SinkConfig for LogdnaConfig { } /// Configuration for the `mezmo` (formerly `logdna`) sink. -#[configurable_component(sink("mezmo"))] +#[configurable_component(sink("mezmo", "Deliver log event data to Mezmo."))] #[derive(Clone, Debug)] pub struct MezmoConfig { /// Connection config @@ -120,13 +121,21 @@ pub struct MezmoConfig { #[configurable(metadata(docs::examples = "my-local-machine"))] hostname: Template, + /// Template used for MAC addressing + mac_template: Option