From d1e558800a570556372949fd332097c3e138a2e8 Mon Sep 17 00:00:00 2001 From: Jesse Szwedko Date: Fri, 12 May 2023 09:14:02 -0500 Subject: [PATCH] chore(docs): Clarify `key_field` for `sample` and `throttle` transforms (#17372) * chore(docs): Clarify `key_field` for `sample` and `throttle` transforms The current wording is somewhat confusing. I'm hopeful that this new wording is more clear. Signed-off-by: Jesse Szwedko * Apply suggestions from code review Co-authored-by: neuronull --------- Signed-off-by: Jesse Szwedko Co-authored-by: neuronull --- src/transforms/sample.rs | 15 ++++++++++----- src/transforms/throttle.rs | 6 ++---- .../components/transforms/base/sample.cue | 15 ++++++++++----- .../components/transforms/base/throttle.cue | 6 ++---- 4 files changed, 24 insertions(+), 18 deletions(-) diff --git a/src/transforms/sample.rs b/src/transforms/sample.rs index 4238ab6e146d2..c703e56ead9e0 100644 --- a/src/transforms/sample.rs +++ b/src/transforms/sample.rs @@ -27,12 +27,17 @@ pub struct SampleConfig { /// dropped. pub rate: u64, - /// The name of the log field whose value is hashed to determine if the event should be - /// passed. + /// The name of the field whose value is hashed to determine if the event should be + /// sampled. /// - /// Consistently samples the same events. Actual rate of sampling may differ from the configured - /// one if values in the field are not uniformly distributed. If left unspecified, or if the - /// event doesn't have `key_field`, then events are count rated. + /// Each unique value for the key creates a bucket of related events to be sampled together + /// and the rate is applied to the buckets themselves to sample `1/N` buckets. The overall rate + /// of sampling may differ from the configured one if values in the field are not uniformly + /// distributed. If left unspecified, or if the event doesn’t have `key_field`, then the + /// event is sampled independently. + /// + /// This can be useful to, for example, ensure that all logs for a given transaction are + /// sampled together, but that overall `1/N` transactions are sampled. #[configurable(metadata(docs::examples = "message",))] pub key_field: Option, diff --git a/src/transforms/throttle.rs b/src/transforms/throttle.rs index f1563ad7e5435..2eba0b532910b 100644 --- a/src/transforms/throttle.rs +++ b/src/transforms/throttle.rs @@ -33,11 +33,9 @@ pub struct ThrottleConfig { #[serde_as(as = "serde_with::DurationSeconds")] window_secs: Duration, - /// The name of the log field whose value is hashed to determine if the event should be - /// rate limited. + /// The value to group events into separate buckets to be rate limited independently. /// - /// Each unique key creates a bucket of related events to be rate limited separately. If - /// left unspecified, or if the event doesn't have `key_field`, then the event is not rate + /// If left unspecified, or if the event doesn't have `key_field`, then the event is not rate /// limited separately. #[configurable(metadata(docs::examples = "{{ message }}", docs::examples = "{{ hostname }}",))] key_field: Option