Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid using shared cache for internally stored files #1320

Merged
merged 3 commits into from
Oct 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions crates/symbolicator-js/src/bundle_index_cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,4 +89,11 @@ impl CacheItemRequest for BundleIndexRequest {
fn weight(item: &Self::Item) -> u32 {
item.0.max(std::mem::size_of::<Self::Item>() as u32)
}

fn use_shared_cache(&self) -> bool {
// These change frequently, and are being downloaded from Sentry directly.
// Here again, the question is whether we want to waste a bit of storage on GCS vs doing
// more requests to Python.
true
}
}
2 changes: 1 addition & 1 deletion crates/symbolicator-service/src/caching/cache_error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ impl CacheError {
/// object could not be fetched or is otherwise unusable.
pub type CacheEntry<T = ()> = Result<T, CacheError>;

/// Parses a [`CacheEntry`] from a [`ByteView`](ByteView).
/// Parses a [`CacheEntry`] from a [`ByteView`].
pub fn cache_entry_from_bytes(bytes: ByteView<'static>) -> CacheEntry<ByteView<'static>> {
CacheError::from_bytes(&bytes).map(Err).unwrap_or(Ok(bytes))
}
2 changes: 1 addition & 1 deletion crates/symbolicator-service/src/caching/cache_key.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ impl CacheKey {

/// A builder for [`CacheKey`]s.
///
/// This builder implements the [`Write`](std::fmt::Write) trait, and the intention of it is to
/// This builder implements the [`Write`] trait, and the intention of it is to
/// accept human readable, but most importantly **stable**, input.
/// This input in then being hashed to form the [`CacheKey`], and can also be serialized alongside
/// the cache files to help debugging.
Expand Down
18 changes: 12 additions & 6 deletions crates/symbolicator-service/src/caching/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,13 @@ pub trait CacheItemRequest: 'static + Send + Sync + Clone {
}

impl<T: CacheItemRequest> Cacher<T> {
fn shared_cache(&self, request: &T) -> Option<&SharedCacheService> {
request
.use_shared_cache()
.then(|| self.shared_cache.get())
.flatten()
}

/// Compute an item.
///
/// The item is computed using [`T::compute`](CacheItemRequest::compute), and saved in the cache
Expand All @@ -182,9 +189,8 @@ impl<T: CacheItemRequest> Cacher<T> {
let cache_path = key.cache_path(T::VERSIONS.current);
let mut temp_file = self.config.tempfile()?;

let shared_cache_hit = if !request.use_shared_cache() {
false
} else if let Some(shared_cache) = self.shared_cache.get() {
let shared_cache = self.shared_cache(&request);
let shared_cache_hit = if let Some(shared_cache) = shared_cache {
let temp_fd = tokio::fs::File::from_std(temp_file.reopen()?);
shared_cache.fetch(name, &cache_path, temp_fd).await
} else {
Expand Down Expand Up @@ -263,9 +269,9 @@ impl<T: CacheItemRequest> Cacher<T> {

// TODO: Not handling negative caches probably has a huge perf impact. Need to
// figure out negative caches. Maybe put them in redis with a TTL?
if !shared_cache_hit && request.use_shared_cache() {
if !shared_cache_hit {
if let Ok(byteview) = &entry {
if let Some(shared_cache) = self.shared_cache.get() {
if let Some(shared_cache) = shared_cache {
shared_cache.store(name, &cache_path, byteview.clone(), CacheStoreReason::New);
}
}
Expand Down Expand Up @@ -293,7 +299,7 @@ impl<T: CacheItemRequest> Cacher<T> {
let init = Box::pin(async {
// cache_path is None when caching is disabled.
if let Some(cache_dir) = self.config.cache_dir() {
let shared_cache = self.shared_cache.get();
let shared_cache = self.shared_cache(&request);
let versions = std::iter::once(T::VERSIONS.current)
.chain(T::VERSIONS.fallbacks.iter().copied());

Expand Down
4 changes: 4 additions & 0 deletions crates/symbolicator-service/src/services/bitcode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,10 @@ impl CacheItemRequest for FetchFileRequest {
data,
}))
}

fn use_shared_cache(&self) -> bool {
self.file_source.worth_using_shared_cache()
}
}

#[derive(Debug, Clone)]
Expand Down
4 changes: 4 additions & 0 deletions crates/symbolicator-service/src/services/il2cpp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,10 @@ impl CacheItemRequest for FetchFileRequest {
data,
})
}

fn use_shared_cache(&self) -> bool {
self.file_source.worth_using_shared_cache()
}
}

#[derive(Debug, Clone)]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,10 @@ impl CacheItemRequest for FetchFileDataRequest {

Ok(Arc::new(object_handle))
}

fn use_shared_cache(&self) -> bool {
self.0.file_source.worth_using_shared_cache()
}
}

#[cfg(test)]
Expand Down
5 changes: 5 additions & 0 deletions crates/symbolicator-service/src/services/objects/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,11 @@ pub struct FindResult {

#[derive(Clone, Debug)]
pub struct ObjectsActor {
// FIXME(swatinem): Having a fully fledged filesystem and shared cache for these tiny file meta
// items is heavy handed and wasteful. However, we *do* want to have this in shared cache, as
// it is the primary thing that makes cold starts fast, as we do not need to fetch the whole
// objects, but just the derived caches. Some lighter weight solution, like Redis might be more
// appropriate at some point.
meta_cache: Arc<Cacher<FetchFileMetaRequest>>,
data_cache: Arc<Cacher<FetchFileDataRequest>>,
download_svc: Arc<DownloadService>,
Expand Down
17 changes: 17 additions & 0 deletions crates/symbolicator-sources/src/remotefile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,23 @@ impl RemoteFile {
RemoteFile::Sentry(source) => source.host(),
}
}

/// Determines if this [`RemoteFile`] is worth caching in shared cache or not.
///
/// This is `true` for all external symbol servers, but not for our internal source, as we would
/// just save the same bytes twice, with an extra roundtrip on fetches.
pub fn worth_using_shared_cache(&self) -> bool {
match self {
// This is our internal sentry source. It is debatable if we want to rather fetch
// the file through the Python->FileStore->GCS indirection,
// or rather pay to save it twice so we can access GCS (shared cache) directly.
RemoteFile::Sentry(_) => true,
// This is most likely our scraped apple symbols, which are hosted on GCS directly, no
// need to save them twice in shared cache.
RemoteFile::Gcs(source) if source.source.id.as_str().starts_with("sentry:") => false,
_ => true,
}
}
}

/// A URI representing an [`RemoteFile`].
Expand Down
Loading