Skip to content

Commit

Permalink
feat: Return "scraping attempts" from JS symbolication
Browse files Browse the repository at this point in the history
  • Loading branch information
loewenheim committed Sep 28, 2023
1 parent 7ce9f63 commit d6e0bb2
Show file tree
Hide file tree
Showing 17 changed files with 264 additions and 42 deletions.
111 changes: 83 additions & 28 deletions crates/symbolicator-service/src/services/sourcemap_lookup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ use crate::caching::{
use crate::services::download::DownloadService;
use crate::services::objects::ObjectMetaHandle;
use crate::services::symbolication::ScrapingConfig;
use crate::types::{JsStacktrace, ResolvedWith, Scope};
use crate::types::{JsScrapingAttempt, JsScrapingFailureReason, JsStacktrace, ResolvedWith, Scope};
use crate::utils::http::is_valid_origin;

use crate::js::JsMetrics;
Expand Down Expand Up @@ -227,6 +227,8 @@ impl SourceMapLookup {
used_artifact_bundles: Default::default(),

metrics: Default::default(),

scraping_attempts: Default::default(),
};

Self {
Expand Down Expand Up @@ -307,8 +309,11 @@ impl SourceMapLookup {
self.fetcher.record_metrics();
}

pub fn into_used_artifact_bundles(self) -> HashSet<SentryFileId> {
self.fetcher.used_artifact_bundles
pub fn into_records(self) -> (HashSet<SentryFileId>, Vec<JsScrapingAttempt>) {
(
self.fetcher.used_artifact_bundles,
self.fetcher.scraping_attempts,
)
}
}

Expand Down Expand Up @@ -631,6 +636,8 @@ struct ArtifactFetcher {
individual_artifacts: HashMap<String, IndividualArtifact>,

used_artifact_bundles: HashSet<SentryFileId>,

scraping_attempts: Vec<JsScrapingAttempt>,
}

impl ArtifactFetcher {
Expand Down Expand Up @@ -714,34 +721,43 @@ impl ArtifactFetcher {
/// (because multiple files can share one [`DebugId`]).
#[tracing::instrument(skip(self))]
pub async fn get_file(&mut self, key: &FileKey) -> CachedFileEntry {
if let Some(file) = self.try_get_file_using_index(key).await {
return file;
let mut file = self.try_get_file_using_index(key).await;

if file.is_none() {
// Try looking up the file in one of the artifact bundles that we know about.
file = self.try_get_file_from_bundles(key);
}

// Try looking up the file in one of the artifact bundles that we know about.
if let Some(file) = self.try_get_file_from_bundles(key) {
return file;
if file.is_none() {
// Otherwise, try to get the file from an individual artifact.
file = self.try_fetch_file_from_artifacts(key).await;
}

// Otherwise, try to get the file from an individual artifact.
if let Some(file) = self.try_fetch_file_from_artifacts(key).await {
return file;
if file.is_none() {
// Otherwise: Do a (cached) API lookup for the `abs_path` + `DebugId`
if self.query_sentry_for_file(key).await {
// At this point, *one* of our known artifacts includes the file we are looking for.
// So we do the whole dance yet again.
file = self.try_get_file_from_bundles(key);
if file.is_none() {
file = self.try_fetch_file_from_artifacts(key).await;
}
}
}

// Otherwise: Do a (cached) API lookup for the `abs_path` + `DebugId`
if self.query_sentry_for_file(key).await {
// At this point, *one* of our known artifacts includes the file we are looking for.
// So we do the whole dance yet again.
if let Some(file) = self.try_get_file_from_bundles(key) {
return file;
match file {
Some(file) => {
if let Some(url) = key.abs_path() {
self.scraping_attempts
.push(JsScrapingAttempt::not_attempted(url.to_owned()));
}
file
}
if let Some(file) = self.try_fetch_file_from_artifacts(key).await {
return file;
None => {
// Otherwise, fall back to scraping from the Web.
self.scrape(key).await
}
}

// Otherwise, fall back to scraping from the Web.
self.scrape(key).await
}

async fn try_get_file_using_index(&mut self, key: &FileKey) -> Option<CachedFileEntry> {
Expand Down Expand Up @@ -879,28 +895,53 @@ impl ArtifactFetcher {
};

if !self.scraping.enabled {
self.scraping_attempts.push(JsScrapingAttempt::failure(
abs_path.to_owned(),
JsScrapingFailureReason::Disabled,
String::new(),
));
return make_error("Scraping disabled".to_string());
}

// Only scrape from http sources
let scheme = url.scheme();
if !["http", "https"].contains(&scheme) {
self.scraping_attempts.push(JsScrapingAttempt::failure(
abs_path.to_owned(),
JsScrapingFailureReason::InvalidHost,
format!("`{scheme}` is not an allowed download scheme"),
));
return make_error(format!("`{scheme}` is not an allowed download scheme"));
}

if !is_valid_origin(&url, &self.scraping.allowed_origins) {
self.scraping_attempts.push(JsScrapingAttempt::failure(
abs_path.to_owned(),
JsScrapingFailureReason::InvalidHost,
format!("{abs_path} is not an allowed download origin"),
));
return make_error(format!("{abs_path} is not an allowed download origin"));
}

let host_string = match url.host_str() {
None => {
self.scraping_attempts.push(JsScrapingAttempt::failure(
abs_path.to_owned(),
JsScrapingFailureReason::InvalidHost,
String::new(),
));
return make_error("Invalid host".to_string());
}
Some(host @ ("localhost" | "127.0.0.1")) => {
// NOTE: the reserved IPs cover a lot more than just localhost.
if self.download_svc.can_connect_to_reserved_ips() {
host
} else {
self.scraping_attempts.push(JsScrapingAttempt::failure(
abs_path.to_owned(),
JsScrapingFailureReason::InvalidHost,
format!("Can't connect to restricted host {host}"),
));
return make_error("Invalid host".to_string());
}
}
Expand Down Expand Up @@ -947,21 +988,35 @@ impl ArtifactFetcher {
.await;

transaction.finish();
CachedFileEntry {
uri,
entry: scraped_file.map(|contents| {
let entry = match scraped_file {
Ok(contents) => {
self.metrics.record_file_scraped(key.as_type());
tracing::trace!(?key, "Found file by scraping the web");

let sourcemap_url = discover_sourcemaps_location(&contents)
.and_then(|sm_ref| SourceMapUrl::parse_with_prefix(abs_path, sm_ref).ok())
.map(Arc::new);

CachedFile {
self.scraping_attempts
.push(JsScrapingAttempt::success(abs_path.to_owned()));

Ok(CachedFile {
contents,
sourcemap_url,
}
}),
})
}
Err(e) => {
self.scraping_attempts.push(JsScrapingAttempt {
url: abs_path.to_owned(),
result: e.clone().into(),
});

Err(e)
}
};
CachedFileEntry {
uri,
entry,
resolved_with: ResolvedWith::Scraping,
}
}
Expand Down
3 changes: 2 additions & 1 deletion crates/symbolicator-service/src/services/symbolication/js.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,13 +125,14 @@ impl SymbolicationActor {
metric!(time_raw("js.unsymbolicated_frames") = unsymbolicated_frames);
metric!(time_raw("js.missing_sourcescontent") = missing_sourcescontent);

let used_artifact_bundles = lookup.into_used_artifact_bundles();
let (used_artifact_bundles, scraping_attempts) = lookup.into_records();

Ok(CompletedJsSymbolicationResponse {
stacktraces,
raw_stacktraces,
errors: errors.into_iter().collect(),
used_artifact_bundles,
scraping_attempts,
})
}
}
Expand Down
74 changes: 74 additions & 0 deletions crates/symbolicator-service/src/types/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ use serde::{Deserialize, Serialize};
use symbolic::common::{Arch, CodeId, DebugId, Language};
use symbolicator_sources::{ObjectType, SentryFileId};

use crate::caching::CacheError;
use crate::utils::addr::AddrMode;
use crate::utils::hex::HexValue;

Expand Down Expand Up @@ -636,6 +637,79 @@ pub struct CompletedJsSymbolicationResponse {
pub errors: Vec<JsModuleError>,
#[serde(skip_serializing_if = "HashSet::is_empty")]
pub used_artifact_bundles: HashSet<SentryFileId>,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub scraping_attempts: Vec<JsScrapingAttempt>,
}

#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct JsScrapingAttempt {
pub url: String,
pub result: JsScrapingResult,
}

impl JsScrapingAttempt {
pub fn success(url: String) -> Self {
Self {
url,
result: JsScrapingResult::Success,
}
}
pub fn not_attempted(url: String) -> Self {
Self {
url,
result: JsScrapingResult::NotAttempted,
}
}

pub fn failure(url: String, reason: JsScrapingFailureReason, details: String) -> Self {
Self {
url,
result: JsScrapingResult::Failure { reason, details },
}
}
}

#[derive(Debug, Clone, Deserialize, Serialize)]
pub enum JsScrapingResult {
NotAttempted,
Success,
Failure {
reason: JsScrapingFailureReason,
#[serde(skip_serializing_if = "String::is_empty")]
details: String,
},
}

impl From<CacheError> for JsScrapingResult {
fn from(value: CacheError) -> Self {
let (reason, details) = match value {
CacheError::NotFound => (JsScrapingFailureReason::NotFound, String::new()),
CacheError::PermissionDenied(details) => {
(JsScrapingFailureReason::PermissionDenied, details)
}
CacheError::Timeout(duration) => (
JsScrapingFailureReason::Timeout,
format!("Timeout after {}", humantime::format_duration(duration)),
),
CacheError::DownloadError(details) => (JsScrapingFailureReason::DownloadError, details),
CacheError::Malformed(details) => (JsScrapingFailureReason::Other, details),
CacheError::InternalError => (JsScrapingFailureReason::Other, String::new()),
};

Self::Failure { reason, details }
}
}

#[derive(Debug, Clone, Copy, Deserialize, Serialize)]
pub enum JsScrapingFailureReason {
NotFound,
Disabled,
InvalidHost,
PermissionDenied,
Timeout,
DownloadError,
Malformed,
Other,
}

/// Information about the operating system.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,9 @@ raw_stacktraces:
- "//# sourceMappingURL=test.min.js.map"
used_artifact_bundles:
- 02_correct.zip
scraping_attempts:
- url: "http://example.com/test.min.js"
result: NotAttempted
- url: "http://example.com/test.min.js.map"
result: NotAttempted

Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
source: crates/symbolicator-service/tests/integration/sourcemap.rs
assertion_line: 595
assertion_line: 636
expression: response.unwrap()
---
stacktraces:
Expand Down Expand Up @@ -39,4 +39,9 @@ raw_stacktraces:
- "//# sourceMappingURL=a.different.map"
- ""
- //@ sourceMappingURL=another.different.map
scraping_attempts:
- url: "http://localhost:56780/files/app.js"
result: Success
- url: "http://localhost:56780/files/app.js.map"
result: Success

Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
source: crates/symbolicator-service/tests/integration/sourcemap.rs
assertion_line: 508
assertion_line: 549
expression: response.unwrap()
---
stacktraces:
Expand Down Expand Up @@ -40,4 +40,9 @@ raw_stacktraces:
- exports.main = main;
- "//# sourceMappingURL=entrypoint1.js.map"
- ""
scraping_attempts:
- url: /Users/lucaforstner/code/github/getsentry/sentry-javascript-bundler-plugins/packages/playground/öut path/rollup/entrypoint1.js
result: NotAttempted
- url: /Users/lucaforstner/code/github/getsentry/sentry-javascript-bundler-plugins/packages/playground/öut path/rollup/entrypoint1.js.map
result: NotAttempted

Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
source: crates/symbolicator-service/tests/integration/sourcemap.rs
assertion_line: 572
assertion_line: 613
expression: response.unwrap()
---
stacktraces:
Expand All @@ -26,4 +26,9 @@ raw_stacktraces:
- abs_path: "app:///index.android.bundle"
lineno: 1
colno: 11940
scraping_attempts:
- url: "app:///index.android.bundle"
result: NotAttempted
- url: "app:///index.android.bundle.map"
result: NotAttempted

Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
source: crates/symbolicator-service/tests/integration/sourcemap.rs
assertion_line: 538
assertion_line: 579
expression: response.unwrap()
---
stacktraces:
Expand Down Expand Up @@ -37,4 +37,9 @@ raw_stacktraces:
context_line: "var makeAFailure=function(){function n(n){}function e(n){throw new Error(\"failed!\")}function r(r){var i=null;if(r.failed){i=e}else{i=n}i(r)} {snip}"
post_context:
- "//# sourceMappingURL=test.min.js.map"
scraping_attempts:
- url: "app:///_next/server/pages/_error.js"
result: NotAttempted
- url: "app:///_next/server/pages/test.min.js.map"
result: NotAttempted

Loading

0 comments on commit d6e0bb2

Please sign in to comment.