Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ref(proguard): Deobfuscate view hierarchies as part of symbolication #74196

Merged
merged 7 commits into from
Jul 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/sentry/lang/java/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from typing import Any

from sentry.lang.java.processing import deobfuscate_exception_value
from sentry.lang.java.utils import deobfuscate_view_hierarchy, has_proguard_file
from sentry.lang.java.utils import has_proguard_file
from sentry.plugins.base.v2 import EventPreprocessor, Plugin2


Expand All @@ -19,6 +19,6 @@ def get_stacktrace_processors(self, data, stacktrace_infos, platforms, **kwargs)

def get_event_preprocessors(self, data: Mapping[str, Any]) -> Sequence[EventPreprocessor]:
if has_proguard_file(data):
return [deobfuscate_exception_value, deobfuscate_view_hierarchy]
return [deobfuscate_exception_value]
else:
return []
89 changes: 88 additions & 1 deletion src/sentry/lang/java/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
from collections.abc import Mapping
from typing import Any

import orjson

from sentry.attachments import CachedAttachment, attachment_cache
from sentry.ingest.consumer.processors import CACHE_TIMEOUT
from sentry.lang.java.utils import get_jvm_images, get_proguard_images
from sentry.lang.native.error import SymbolicationFailed, write_error
from sentry.lang.native.symbolicator import Symbolicator
Expand All @@ -11,6 +15,7 @@
from sentry.models.release import Release
from sentry.stacktraces.processing import find_stacktraces_in_data
from sentry.utils import metrics
from sentry.utils.cache import cache_key_for_event
from sentry.utils.safe import get_path

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -139,6 +144,76 @@ def _get_release_package(project: Project, release_name: str | None) -> str | No
return release.package if release else None


def _get_window_class_names(attachments: list[CachedAttachment]) -> list[str]:
"""Returns the class names of all windows in all view hierarchies
contained in `attachments`."""

class_names = []
windows_to_deobfuscate = []

for attachment in attachments:
if attachment.type == "event.view_hierarchy":
view_hierarchy = orjson.loads(attachment_cache.get_data(attachment))
windows_to_deobfuscate.extend(view_hierarchy.get("windows"))

while windows_to_deobfuscate:
window = windows_to_deobfuscate.pop()
if window.get("type") is not None:
class_names.append(window["type"])
if children := window.get("children"):
windows_to_deobfuscate.extend(children)

return class_names


def _deobfuscate_view_hierarchy(view_hierarchy: Any, class_names: dict[str, str]) -> None:
"""Deobfuscates a view hierarchy in-place.

The `class_names` dict is used to resolve obfuscated to deobfuscated names. If
an obfuscated class name isn't present in `class_names`, it is left unchanged."""

windows_to_deobfuscate = [*view_hierarchy.get("windows")]

while windows_to_deobfuscate:
window = windows_to_deobfuscate.pop()
if (
window.get("type") is not None
and (mapped_type := class_names.get(window["type"])) is not None
):
window["type"] = mapped_type
if children := window.get("children"):
windows_to_deobfuscate.extend(children)


def _deobfuscate_view_hierarchies(
attachments: list[CachedAttachment], class_names: dict[str, str]
) -> list[CachedAttachment]:
"""Deobfuscates all view hierarchies contained in `attachments`, returning a new list of attachments.

Non-view-hierarchy attachments are unchanged.
"""
new_attachments = []
for attachment in attachments:
if attachment.type == "event.view_hierarchy":
view_hierarchy = orjson.loads(attachment_cache.get_data(attachment))
_deobfuscate_view_hierarchy(view_hierarchy, class_names)
# Reupload to cache as a unchunked data
new_attachments.append(
CachedAttachment(
type=attachment.type,
id=attachment.id,
name=attachment.name,
content_type=attachment.content_type,
data=orjson.dumps(view_hierarchy),
chunks=None,
)
)
else:
new_attachments.append(attachment)

return new_attachments


def map_symbolicator_process_jvm_errors(
errors: list[dict[str, Any]] | None,
) -> list[dict[str, Any]]:
Expand Down Expand Up @@ -195,10 +270,17 @@ def process_jvm_stacktraces(symbolicator: Symbolicator, data: Any) -> Any:
]

processable_exceptions = _get_exceptions_for_symbolication(data)
cache_key = cache_key_for_event(data)
attachments = [*attachment_cache.get(cache_key)]
window_class_names = _get_window_class_names(attachments)

metrics.incr("proguard.symbolicator.events")

if not any(stacktrace["frames"] for stacktrace in stacktraces) and not processable_exceptions:
if (
not any(stacktrace["frames"] for stacktrace in stacktraces)
and not processable_exceptions
and not window_class_names
):
metrics.incr("proguard.symbolicator.events.skipped")
return

Expand All @@ -211,6 +293,7 @@ def process_jvm_stacktraces(symbolicator: Symbolicator, data: Any) -> Any:
stacktraces=stacktraces,
modules=modules,
release_package=release_package,
classes=window_class_names,
)

if not _handle_response_status(data, response):
Expand Down Expand Up @@ -248,4 +331,8 @@ def process_jvm_stacktraces(symbolicator: Symbolicator, data: Any) -> Any:
raw_exc["module"] = exc["module"]
raw_exc["type"] = exc["type"]

classes = response.get("classes")
new_attachments = _deobfuscate_view_hierarchies(attachments, classes)
attachment_cache.set(cache_key, attachments=new_attachments, timeout=CACHE_TIMEOUT)
loewenheim marked this conversation as resolved.
Show resolved Hide resolved

return data
47 changes: 3 additions & 44 deletions src/sentry/lang/java/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from sentry.lang.java.proguard import open_proguard_mapper
from sentry.models.debugfile import ProjectDebugFile
from sentry.models.project import Project
from sentry.stacktraces.processing import StacktraceInfo
from sentry.utils.cache import cache_key_for_event
from sentry.utils.safe import get_path

Expand Down Expand Up @@ -70,31 +69,6 @@ def get_proguard_mapper(uuid: str, project: Project):
return mapper


def _deobfuscate_view_hierarchy(event_data: dict[str, Any], project: Project, view_hierarchy):
"""
Deobfuscates a view hierarchy in-place.

If we're unable to fetch a ProGuard uuid or unable to init the mapper,
then the view hierarchy remains unmodified.
"""
proguard_uuids = get_proguard_images(event_data)
if len(proguard_uuids) == 0:
return

with sentry_sdk.start_span(op="proguard.deobfuscate_view_hierarchy_data"):
for proguard_uuid in proguard_uuids:
mapper = get_proguard_mapper(proguard_uuid, project)
if mapper is None:
return

windows_to_deobfuscate = [*view_hierarchy.get("windows")]
while windows_to_deobfuscate:
window = windows_to_deobfuscate.pop()
window["type"] = mapper.remap_class(window.get("type")) or window.get("type")
if children := window.get("children"):
windows_to_deobfuscate.extend(children)


@sentry_sdk.trace
def deobfuscation_template(data, map_type, deobfuscation_fn):
"""
Expand Down Expand Up @@ -133,13 +107,8 @@ def deobfuscation_template(data, map_type, deobfuscation_fn):
attachment_cache.set(cache_key, attachments=new_attachments, timeout=CACHE_TIMEOUT)


def deobfuscate_view_hierarchy(data):
deobfuscation_template(data, "proguard", _deobfuscate_view_hierarchy)


def is_jvm_event(data: Any, stacktraces: list[StacktraceInfo]) -> bool:
"""Returns whether `data` is a JVM event, based on its platform, images, and
the supplied stacktraces."""
def is_jvm_event(data: Any) -> bool:
"""Returns whether `data` is a JVM event, based on its images."""

# check if there are any JVM or Proguard images
images = get_path(
Expand All @@ -149,14 +118,4 @@ def is_jvm_event(data: Any, stacktraces: list[StacktraceInfo]) -> bool:
filter=lambda x: is_valid_jvm_image(x) or is_valid_proguard_image(x),
default=(),
)
if not images:
return False

if data.get("platform") == "java":
return True

for stacktrace in stacktraces:
if any(x == "java" for x in stacktrace.platforms):
return True

return False
return bool(images)
2 changes: 2 additions & 0 deletions src/sentry/lang/native/symbolicator.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,7 @@ def process_jvm(
stacktraces,
modules,
release_package,
classes,
apply_source_context=True,
):
"""
Expand All @@ -262,6 +263,7 @@ def process_jvm(
"exceptions": exceptions,
"stacktraces": stacktraces,
"modules": modules,
"classes": classes,
"options": {"apply_source_context": apply_source_context},
}

Expand Down
1 change: 1 addition & 0 deletions src/sentry/profiles/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -489,6 +489,7 @@ def symbolicate(
modules=modules,
release_package=profile.get("transaction_metadata", {}).get("app.identifier"),
apply_source_context=False,
classes=[],
)
return symbolicator.process_payload(
stacktraces=stacktraces, modules=modules, apply_source_context=False
Expand Down
2 changes: 1 addition & 1 deletion src/sentry/tasks/symbolication.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def get_symbolication_platforms(

platforms = []

if is_jvm_event(data, stacktraces):
if is_jvm_event(data):
platforms.append(SymbolicatorPlatform.jvm)
if is_js_event(data, stacktraces):
platforms.append(SymbolicatorPlatform.js)
Expand Down
10 changes: 3 additions & 7 deletions tests/relay_integration/lang/java/test_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -1556,7 +1556,6 @@ def test_invalid_exception(self):

def test_is_jvm_event(self):
from sentry.lang.java.utils import is_jvm_event
from sentry.stacktraces.processing import find_stacktraces_in_data

event = {
"user": {"ip_address": "31.172.207.97"},
Expand Down Expand Up @@ -1586,8 +1585,7 @@ def test_is_jvm_event(self):
},
"timestamp": iso_format(before_now(seconds=1)),
}
stacktraces = find_stacktraces_in_data(event)
assert is_jvm_event(event, stacktraces)
assert is_jvm_event(event)

event = {
"user": {"ip_address": "31.172.207.97"},
Expand Down Expand Up @@ -1616,9 +1614,8 @@ def test_is_jvm_event(self):
},
"timestamp": iso_format(before_now(seconds=1)),
}
stacktraces = find_stacktraces_in_data(event)
# has no platform
assert not is_jvm_event(event, stacktraces)
assert is_jvm_event(event)

event = {
"user": {"ip_address": "31.172.207.97"},
Expand Down Expand Up @@ -1648,6 +1645,5 @@ def test_is_jvm_event(self):
},
"timestamp": iso_format(before_now(seconds=1)),
}
stacktraces = find_stacktraces_in_data(event)
# has no modules
assert not is_jvm_event(event, stacktraces)
assert not is_jvm_event(event)
Loading
Loading