Skip to content

Commit

Permalink
ref(metrics): Add normalization and update set metrics hashing
Browse files Browse the repository at this point in the history
  • Loading branch information
Elias Ram committed May 23, 2024
1 parent 8c701e8 commit 5f41028
Show file tree
Hide file tree
Showing 9 changed files with 1,335 additions and 88 deletions.
994 changes: 975 additions & 19 deletions Cargo.lock

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions sentry-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,16 @@ UNSTABLE_cadence = ["dep:cadence", "UNSTABLE_metrics"]

[dependencies]
cadence = { version = "0.29.0", optional = true }
crc32fast = "1.4.0"
itertools = "0.10.5"
log = { version = "0.4.8", optional = true, features = ["std"] }
once_cell = "1"
rand = { version = "0.8.1", optional = true }
regex = "1.7.3"
sentry-types = { version = "0.32.3", path = "../sentry-types" }
serde = { version = "1.0.104", features = ["derive"] }
serde_json = { version = "1.0.46" }
unicode-segmentation = "1.11.0"
uuid = { version = "1.0.0", features = ["v4", "serde"], optional = true }

[dev-dependencies]
Expand Down
138 changes: 69 additions & 69 deletions sentry-core/src/metrics.rs → sentry-core/src/metrics/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,19 @@
//!
//! [our docs]: https://develop.sentry.dev/delightful-developer-metrics/
mod normalization;

use std::borrow::Cow;
use std::collections::hash_map::{DefaultHasher, Entry};
use std::collections::hash_map::Entry;
use std::collections::{BTreeMap, BTreeSet, HashMap};
use std::fmt::{self, Write};
use std::fmt::{self, Display};
use std::sync::{Arc, Mutex};
use std::thread::{self, JoinHandle};
use std::time::{Duration, SystemTime, UNIX_EPOCH};

use normalization::normalized_name::NormalizedName;
use normalization::normalized_tags::NormalizedTags;
use normalization::normalized_unit::NormalizedUnit;
use sentry_types::protocol::latest::{Envelope, EnvelopeItem};

use crate::client::TransportArc;
Expand Down Expand Up @@ -168,15 +173,23 @@ impl MetricValue {
}
}

impl Display for MetricValue {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Counter(v) => write!(f, "{}", v),
Self::Distribution(v) => write!(f, "{}", v),
Self::Gauge(v) => write!(f, "{}", v),
Self::Set(v) => write!(f, "{}", v),
}
}
}

/// Hashes the given set value.
///
/// Sets only guarantee 32-bit accuracy, but arbitrary strings are allowed on the protocol. Upon
/// parsing, they are hashed and only used as hashes subsequently.
fn hash_set_value(string: &str) -> u32 {
use std::hash::Hasher;
let mut hasher = DefaultHasher::default();
hasher.write(string.as_bytes());
hasher.finish() as u32
crc32fast::hash(string.as_bytes())
}

#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
Expand Down Expand Up @@ -510,6 +523,24 @@ impl Metric {
client.add_metric(self);
}
}

/// Convert the metric into an [`Envelope`] containing a single [`EnvelopeItem::Statsd`].
pub fn to_envelope(self) -> Envelope {
let timestamp = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_secs();
let data = format!(
"{}@{}:{}|{}|#{}|T{}",
NormalizedName::from(self.name.as_ref()),
NormalizedUnit::from(self.unit),
self.value,
self.value.ty(),
NormalizedTags::from(self.tags),
timestamp
);
Envelope::from_item(EnvelopeItem::Statsd(data.into_bytes()))
}
}

/// A builder for metrics.
Expand Down Expand Up @@ -550,6 +581,26 @@ impl MetricBuilder {
self
}

/// Adds multiple tags to the metric.
///
/// Tags allow you to add dimensions to metrics. They are key-value pairs that can be filtered
/// or grouped by in Sentry.
///
/// When sent to Sentry via [`MetricBuilder::send`] or when added to a
/// [`Client`](crate::Client), the client may add default tags to the metrics, such as the
/// `release` or the `environment` from the Scope.
pub fn with_tags<T, K, V>(mut self, tags: T) -> Self
where
T: IntoIterator<Item = (K, V)>,
K: Into<MetricStr>,
V: Into<MetricStr>,
{
tags.into_iter().for_each(|(k, v)| {
self.metric.tags.insert(k.into(), v.into());
});
self
}

/// Sets the timestamp for the metric.
///
/// By default, the timestamp is set to the current time when the metric is built or sent.
Expand Down Expand Up @@ -723,9 +774,13 @@ fn get_default_tags(options: &ClientOptions) -> TagMap {
if let Some(ref release) = options.release {
tags.insert("release".into(), release.clone());
}
if let Some(ref environment) = options.environment {
tags.insert("environment".into(), environment.clone());
}
tags.insert(
"environment".into(),
options
.environment
.clone()
.unwrap_or(Cow::Borrowed("production")),
);
tags
}

Expand Down Expand Up @@ -778,11 +833,8 @@ impl Worker {

for (timestamp, buckets) in buckets {
for (key, value) in buckets {
write!(&mut out, "{}", SafeKey(key.name.as_ref()))?;
if key.unit != MetricUnit::None {
write!(&mut out, "@{}", key.unit)?;
}

write!(&mut out, "{}", NormalizedName::from(key.name.as_ref()))?;
write!(&mut out, "@{}", NormalizedUnit::from(key.unit))?;
match value {
BucketValue::Counter(c) => {
write!(&mut out, ":{}", c)?;
Expand All @@ -807,16 +859,9 @@ impl Worker {
}

write!(&mut out, "|{}", key.ty.as_str())?;

for (i, (k, v)) in key.tags.iter().chain(&self.default_tags).enumerate() {
match i {
0 => write!(&mut out, "|#")?,
_ => write!(&mut out, ",")?,
}

write!(&mut out, "{}:{}", SafeKey(k.as_ref()), SafeVal(v.as_ref()))?;
}

let normalized_tags =
NormalizedTags::from(key.tags).with_default_tags(&self.default_tags);
write!(&mut out, "|#{}", normalized_tags)?;
writeln!(&mut out, "|T{}", timestamp)?;
}
}
Expand Down Expand Up @@ -922,51 +967,6 @@ impl Drop for MetricAggregator {
}
}

fn safe_fmt<F>(f: &mut fmt::Formatter<'_>, string: &str, mut check: F) -> fmt::Result
where
F: FnMut(char) -> bool,
{
let mut valid = true;

for c in string.chars() {
if check(c) {
valid = true;
f.write_char(c)?;
} else if valid {
valid = false;
f.write_char('_')?;
}
}

Ok(())
}

// Helper that serializes a string into a safe format for metric names or tag keys.
struct SafeKey<'s>(&'s str);

impl<'s> fmt::Display for SafeKey<'s> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
safe_fmt(f, self.0, |c| {
c.is_ascii_alphanumeric() || matches!(c, '_' | '-' | '.' | '/')
})
}
}

// Helper that serializes a string into a safe format for tag values.
struct SafeVal<'s>(&'s str);

impl<'s> fmt::Display for SafeVal<'s> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
safe_fmt(f, self.0, |c| {
c.is_alphanumeric()
|| matches!(
c,
'_' | ':' | '/' | '@' | '.' | '{' | '}' | '[' | ']' | '$' | '-'
)
})
}
}

#[cfg(test)]
mod tests {
use crate::test::{with_captured_envelopes, with_captured_envelopes_options};
Expand Down
3 changes: 3 additions & 0 deletions sentry-core/src/metrics/normalization/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
pub mod normalized_name;
pub mod normalized_tags;
pub mod normalized_unit;
36 changes: 36 additions & 0 deletions sentry-core/src/metrics/normalization/normalized_name.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
use regex::Regex;
use std::borrow::Cow;

pub struct NormalizedName<'a> {
name: Cow<'a, str>,
}

impl<'a> From<&'a str> for NormalizedName<'a> {
fn from(name: &'a str) -> Self {
Self {
name: Regex::new(r"[^a-zA-Z0-9_\-.]")
.expect("Regex should compile")
.replace_all(name, "_"),
}
}
}

impl std::fmt::Display for NormalizedName<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.name)
}
}

#[cfg(test)]
mod test {
use crate::metrics::NormalizedName;

#[test]
fn test_from() {
let expected = "aA1_-.____________";

let actual = NormalizedName::from("aA1_-./+ö{😀\n\t\r\\| ,").to_string();

assert_eq!(expected, actual);
}
}
Loading

0 comments on commit 5f41028

Please sign in to comment.