From 8218752074d97bc981c1fdbdb21d4c93eee226d1 Mon Sep 17 00:00:00 2001 From: Arpad Borsos Date: Wed, 31 Jan 2024 16:31:18 +0100 Subject: [PATCH] Add another Cache for accessing files in `ArtifactBundle`s (#1354) When profiling (via `symbolicator-stress`) processing of javascript events, the majority of the time is spent extracting minified / sourcemap files from artifact bundles (which are essentially zip files). We introduce yet another cache which can avoid this costly unzip process by keeping these unzipped files in memory. For the above mentioned stresstest, this has lead to a throughput increase of 1000x. Though not surprisingly, the stresstest processes the exact same event over and over and has thus a 100% cache hit rate. Co-authored-by: Sebastian Zivota --- Cargo.toml | 9 ++- crates/symbolicator-js/src/bundle_lookup.rs | 77 +++++++++++++++++++++ crates/symbolicator-js/src/lib.rs | 1 + crates/symbolicator-js/src/lookup.rs | 27 ++++++-- crates/symbolicator-js/src/service.rs | 3 + 5 files changed, 111 insertions(+), 6 deletions(-) create mode 100644 crates/symbolicator-js/src/bundle_lookup.rs diff --git a/Cargo.toml b/Cargo.toml index dfae412c6..9ef520ac6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,12 @@ [workspace] resolver = "2" members = ["crates/*"] -default-members = ["crates/symbolicator", "crates/symbolicli", "crates/symsorter", "crates/wasm-split"] +default-members = [ + "crates/symbolicator", + "crates/symbolicli", + "crates/symsorter", + "crates/wasm-split", +] [profile.dev] # For debug builds, we do want to optimize for both debug-ability, and fast iteration times. @@ -47,4 +52,4 @@ cpp_demangle = { git = "https://github.com/getsentry/cpp_demangle", branch = "se # symbolic = { path = "../../../symbolic/symbolic", features = ["common-serde", "debuginfo", "demangle", "minidump-serde", "symcache"] } # See also https://doc.rust-lang.org/cargo/reference/overriding-dependencies.html # [patch.crates-io] -# symbolic = { path = "../symbolic/symbolic" } +# symbolic-debuginfo = { path = "../symbolic/symbolic-debuginfo" } diff --git a/crates/symbolicator-js/src/bundle_lookup.rs b/crates/symbolicator-js/src/bundle_lookup.rs new file mode 100644 index 000000000..8aa8dd83e --- /dev/null +++ b/crates/symbolicator-js/src/bundle_lookup.rs @@ -0,0 +1,77 @@ +use std::fmt; +use std::time::Duration; + +use symbolicator_sources::RemoteFileUri; + +use crate::lookup::{CachedFileEntry, FileKey}; + +type FileInBundleCacheInner = moka::sync::Cache<(RemoteFileUri, FileKey), CachedFileEntry>; + +/// A cache that memoizes looking up files in artifact bundles. +#[derive(Clone)] +pub struct FileInBundleCache { + cache: FileInBundleCacheInner, +} + +impl std::fmt::Debug for FileInBundleCache { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("FileInBundleCache").finish() + } +} + +impl FileInBundleCache { + /// Creates a new `FileInBundleCache` with a maximum size of 2GiB and + /// idle time of 1h. + pub fn new() -> Self { + // NOTE: We size the cache at 2 GiB which is quite an arbitrary pick. + // As all the files are living in memory, we return the size of the contents + // from the `weigher` which is responsible for this accounting. + const GIGS: u64 = 1 << 30; + let cache = FileInBundleCacheInner::builder() + .max_capacity(2 * GIGS) + .time_to_idle(Duration::from_secs(60 * 60)) + .name("file-in-bundle") + .weigher(|_k, v| { + let content_size = v + .entry + .as_ref() + .map(|cached_file| cached_file.contents.len()) + .unwrap_or_default(); + (std::mem::size_of_val(v) + content_size) + .try_into() + .unwrap_or(u32::MAX) + }) + .build(); + Self { cache } + } + + /// Tries to retrieve a file from the cache. + /// + /// We look for the file under `(bundle_uri, key)` for `bundle_uri` in `bundle_uris`. + /// Retrieval is limited to a specific list of bundles so that e.g. files with the same + /// `abs_path` belonging to different events are disambiguated. + pub fn try_get( + &self, + bundle_uris: impl Iterator, + mut key: FileKey, + ) -> Option { + for bundle_uri in bundle_uris { + // XXX: this is a really horrible workaround for not being able to look up things via `(&A, &B)` instead of `&(A, B)`. + let lookup_key = (bundle_uri, key); + if let Some(file_entry) = self.cache.get(&lookup_key) { + return Some(file_entry); + } + key = lookup_key.1; + } + None + } + + /// Inserts `file_entry` into the cache under `(bundle_uri, key)`. + /// + /// Files are inserted under a specific bundle so that e.g. files with the same + /// `abs_path` belonging to different events are disambiguated. + pub fn insert(&self, bundle_uri: &RemoteFileUri, key: &FileKey, file_entry: &CachedFileEntry) { + let key = (bundle_uri.clone(), key.clone()); + self.cache.insert(key, file_entry.clone()) + } +} diff --git a/crates/symbolicator-js/src/lib.rs b/crates/symbolicator-js/src/lib.rs index 9172406bd..673500a8d 100644 --- a/crates/symbolicator-js/src/lib.rs +++ b/crates/symbolicator-js/src/lib.rs @@ -1,6 +1,7 @@ mod api_lookup; mod bundle_index; mod bundle_index_cache; +mod bundle_lookup; pub mod interface; mod lookup; mod metrics; diff --git a/crates/symbolicator-js/src/lookup.rs b/crates/symbolicator-js/src/lookup.rs index 65fcd0a09..3b36477dc 100644 --- a/crates/symbolicator-js/src/lookup.rs +++ b/crates/symbolicator-js/src/lookup.rs @@ -59,6 +59,7 @@ use symbolicator_service::utils::http::is_valid_origin; use crate::api_lookup::{ArtifactHeaders, JsLookupResult, SentryLookupApi}; use crate::bundle_index::BundleIndex; use crate::bundle_index_cache::BundleIndexCache; +use crate::bundle_lookup::FileInBundleCache; use crate::interface::{ JsScrapingAttempt, JsScrapingFailureReason, JsStacktrace, ResolvedWith, SymbolicateJsStacktraces, @@ -163,6 +164,7 @@ impl SourceMapLookup { pub async fn new(service: SourceMapService, request: SymbolicateJsStacktraces) -> Self { let SourceMapService { objects, + files_in_bundles, sourcefiles_cache, bundle_index_cache, sourcemap_caches, @@ -213,6 +215,7 @@ impl SourceMapLookup { let fetcher = ArtifactFetcher { objects, + files_in_bundles, sourcefiles_cache, sourcemap_caches, api_lookup, @@ -559,6 +562,10 @@ struct ArtifactFetcher { metrics: JsMetrics, // other services: + /// Cache for looking up files in artifact bundles. + /// + /// This cache is shared between all JS symbolication requests. + files_in_bundles: FileInBundleCache, objects: ObjectsActor, sourcefiles_cache: Arc, sourcemap_caches: Arc>, @@ -973,6 +980,14 @@ impl ArtifactFetcher { return None; } + // First see if we have already cached this file for any of this event's bundles. + if let Some(file_entry) = self + .files_in_bundles + .try_get(self.artifact_bundles.keys().rev().cloned(), key.clone()) + { + return Some(file_entry); + } + // If we have a `DebugId`, we try a lookup based on that. if let Some(debug_id) = key.debug_id() { let ty = key.as_type(); @@ -988,11 +1003,13 @@ impl ArtifactFetcher { *bundle_resolved_with, ); tracing::trace!(?key, "Found file in artifact bundles by debug-id"); - return Some(CachedFileEntry { + let file_entry = CachedFileEntry { uri: CachedFileUri::Bundled(bundle_uri.clone(), key.clone()), entry: CachedFile::from_descriptor(key.abs_path(), descriptor), resolved_with: ResolvedWith::DebugId, - }); + }; + self.files_in_bundles.insert(bundle_uri, key, &file_entry); + return Some(file_entry); } } } @@ -1012,11 +1029,13 @@ impl ArtifactFetcher { *resolved_with, ); tracing::trace!(?key, url, "Found file in artifact bundles by url"); - return Some(CachedFileEntry { + let file_entry = CachedFileEntry { uri: CachedFileUri::Bundled(bundle_uri.clone(), key.clone()), entry: CachedFile::from_descriptor(Some(abs_path), descriptor), resolved_with: *resolved_with, - }); + }; + self.files_in_bundles.insert(bundle_uri, key, &file_entry); + return Some(file_entry); } } } diff --git a/crates/symbolicator-js/src/service.rs b/crates/symbolicator-js/src/service.rs index fcc3fdf80..4e5609f0e 100644 --- a/crates/symbolicator-js/src/service.rs +++ b/crates/symbolicator-js/src/service.rs @@ -10,11 +10,13 @@ use symbolicator_service::services::SharedServices; use crate::api_lookup::SentryLookupApi; use crate::bundle_index_cache::BundleIndexCache; +use crate::bundle_lookup::FileInBundleCache; use crate::lookup::FetchSourceMapCacheInternal; #[derive(Debug, Clone)] pub struct SourceMapService { pub(crate) objects: ObjectsActor, + pub(crate) files_in_bundles: FileInBundleCache, pub(crate) sourcefiles_cache: Arc, pub(crate) bundle_index_cache: Arc, pub(crate) sourcemap_caches: Arc>, @@ -46,6 +48,7 @@ impl SourceMapService { Self { objects, + files_in_bundles: FileInBundleCache::new(), sourcefiles_cache, bundle_index_cache: Arc::new(bundle_index_cache), sourcemap_caches: Arc::new(Cacher::new(caches.sourcemap_caches.clone(), shared_cache)),