From e1037cc14436c4e60b5ef54a7190edfad3146293 Mon Sep 17 00:00:00 2001 From: Arpad Borsos Date: Fri, 21 Jul 2023 11:03:03 +0200 Subject: [PATCH] Improve file-stem trimming (#1267) The original idea of file-stem trimming was to find both `foo.min.js` and `foo.map` with just one request, as the stem of those two is `foo`. However the simplistic "split at first `.`" fails for situations when we have `main.$HASH.min.js`, where the `$HASH` part was being trimmed off. However the API would have way too many matches for just `main` in that case, and might thus not return the match that we are interested in. Therefore, we are only trimming known suffixes, and keep other `.`-separated parts intact. This might increase the number of API requests we do, but should yield better results. --- .../src/services/sourcemap_lookup.rs | 28 +++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/crates/symbolicator-service/src/services/sourcemap_lookup.rs b/crates/symbolicator-service/src/services/sourcemap_lookup.rs index 215d9f8f9..ec0a48794 100644 --- a/crates/symbolicator-service/src/services/sourcemap_lookup.rs +++ b/crates/symbolicator-service/src/services/sourcemap_lookup.rs @@ -1289,12 +1289,36 @@ fn extract_file_stem(path: &str) -> String { path.rsplit_once('/') .map(|(prefix, name)| { - let name = name.split_once('.').map(|(stem, _)| stem).unwrap_or(name); + // trim query strings and fragments + let name = name.split_once('?').map(|(name, _)| name).unwrap_or(name); + let name = name.split_once('#').map(|(name, _)| name).unwrap_or(name); + + // then, trim all the suffixes as often as they occurr + let name = trim_all_end_matches(name, FILE_SUFFIX_PATTERNS); + format!("{prefix}/{name}") }) .unwrap_or(path.to_owned()) } +const FILE_SUFFIX_PATTERNS: &[&str] = &[ + ".min", ".js", ".map", ".cjs", ".mjs", ".ts", ".d", ".jsx", ".tsx", +]; + +/// Trims the different `patterns` from the end of the `input` string as often as possible. +fn trim_all_end_matches<'a>(mut input: &'a str, patterns: &[&str]) -> &'a str { + loop { + let mut trimmed = input; + for pattern in patterns { + trimmed = trimmed.trim_end_matches(pattern); + } + if trimmed == input { + return trimmed; + } + input = trimmed; + } +} + /// Transforms a full absolute url into 2 or 4 generalized options. // Based on `ReleaseFile.normalize`, see: // https://github.com/getsentry/sentry/blob/master/src/sentry/models/releasefile.py @@ -1468,7 +1492,7 @@ mod tests { ); assert_eq!( extract_file_stem("app:///polyfills.e9f8f1606b76a9c9.js"), - "/polyfills" + "/polyfills.e9f8f1606b76a9c9" ); }