From 3e675359b5b77a57255144dadb173aedcd601135 Mon Sep 17 00:00:00 2001 From: Tony Xiao Date: Mon, 13 Mar 2023 10:20:16 -0400 Subject: [PATCH] feat(profiling): Add profiler options to init (#1947) This adds the `profiles_sample_rate`, `profiles_sampler` and `profiler_mode` options to the top level of the init call. The `_experiment` options will still be available temporarily but is deprecated and will be removed in the future. --- sentry_sdk/_types.py | 2 + sentry_sdk/client.py | 5 +- sentry_sdk/consts.py | 7 +- sentry_sdk/profiler.py | 49 +++++++++++-- sentry_sdk/tracing.py | 5 +- sentry_sdk/tracing_utils.py | 36 ---------- sentry_sdk/utils.py | 34 +++++++++ tests/test_profiler.py | 124 +++++++++++++++++++++++++++++---- tests/test_utils.py | 39 ++++++++++- tests/tracing/test_sampling.py | 33 --------- 10 files changed, 239 insertions(+), 95 deletions(-) diff --git a/sentry_sdk/_types.py b/sentry_sdk/_types.py index 2c4a703cb5..cbead04e2e 100644 --- a/sentry_sdk/_types.py +++ b/sentry_sdk/_types.py @@ -85,3 +85,5 @@ FractionUnit = Literal["ratio", "percent"] MeasurementUnit = Union[DurationUnit, InformationUnit, FractionUnit, str] + + ProfilerMode = Literal["sleep", "thread", "gevent", "unknown"] diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index 38b64e3798..c4be3331fa 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -28,7 +28,7 @@ from sentry_sdk.utils import ContextVar from sentry_sdk.sessions import SessionFlusher from sentry_sdk.envelope import Envelope -from sentry_sdk.profiler import setup_profiler +from sentry_sdk.profiler import has_profiling_enabled, setup_profiler from sentry_sdk._types import TYPE_CHECKING @@ -174,8 +174,7 @@ def _capture_envelope(envelope): finally: _client_init_debug.set(old_debug) - profiles_sample_rate = self.options["_experiments"].get("profiles_sample_rate") - if profiles_sample_rate is not None and profiles_sample_rate > 0: + if has_profiling_enabled(self.options): try: setup_profiler(self.options) except ValueError as e: diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index 072b49ced7..1a8fc99e5d 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -19,6 +19,7 @@ BreadcrumbProcessor, Event, EventProcessor, + ProfilerMode, TracesSampler, TransactionProcessor, ) @@ -33,8 +34,9 @@ "max_spans": Optional[int], "record_sql_params": Optional[bool], "smart_transaction_trimming": Optional[bool], + # TODO: Remvoe these 2 profiling related experiments "profiles_sample_rate": Optional[float], - "profiler_mode": Optional[str], + "profiler_mode": Optional[ProfilerMode], }, total=False, ) @@ -115,6 +117,9 @@ def __init__( propagate_traces=True, # type: bool traces_sample_rate=None, # type: Optional[float] traces_sampler=None, # type: Optional[TracesSampler] + profiles_sample_rate=None, # type: Optional[float] + profiles_sampler=None, # type: Optional[TracesSampler] + profiler_mode=None, # type: Optional[ProfilerMode] auto_enabling_integrations=True, # type: bool auto_session_tracking=True, # type: bool send_client_reports=True, # type: bool diff --git a/sentry_sdk/profiler.py b/sentry_sdk/profiler.py index 1695fa34f1..f404fe2b35 100644 --- a/sentry_sdk/profiler.py +++ b/sentry_sdk/profiler.py @@ -27,6 +27,7 @@ from sentry_sdk._types import TYPE_CHECKING from sentry_sdk.utils import ( filename_for_module, + is_valid_sample_rate, logger, nanosecond_time, set_in_app_in_frames, @@ -46,7 +47,7 @@ from typing_extensions import TypedDict import sentry_sdk.tracing - from sentry_sdk._types import SamplingContext + from sentry_sdk._types import SamplingContext, ProfilerMode ThreadId = str @@ -148,6 +149,23 @@ def is_gevent(): PROFILE_MINIMUM_SAMPLES = 2 +def has_profiling_enabled(options): + # type: (Dict[str, Any]) -> bool + profiles_sampler = options["profiles_sampler"] + if profiles_sampler is not None: + return True + + profiles_sample_rate = options["profiles_sample_rate"] + if profiles_sample_rate is not None and profiles_sample_rate > 0: + return True + + profiles_sample_rate = options["_experiments"].get("profiles_sample_rate") + if profiles_sample_rate is not None and profiles_sample_rate > 0: + return True + + return False + + def setup_profiler(options): # type: (Dict[str, Any]) -> bool global _scheduler @@ -171,7 +189,13 @@ def setup_profiler(options): else: default_profiler_mode = ThreadScheduler.mode - profiler_mode = options["_experiments"].get("profiler_mode", default_profiler_mode) + if options.get("profiler_mode") is not None: + profiler_mode = options["profiler_mode"] + else: + profiler_mode = ( + options.get("_experiments", {}).get("profiler_mode") + or default_profiler_mode + ) if ( profiler_mode == ThreadScheduler.mode @@ -491,7 +515,13 @@ def _set_initial_sampling_decision(self, sampling_context): return options = client.options - sample_rate = options["_experiments"].get("profiles_sample_rate") + + if callable(options.get("profiles_sampler")): + sample_rate = options["profiles_sampler"](sampling_context) + elif options["profiles_sample_rate"] is not None: + sample_rate = options["profiles_sample_rate"] + else: + sample_rate = options["_experiments"].get("profiles_sample_rate") # The profiles_sample_rate option was not set, so profiling # was never enabled. @@ -502,6 +532,13 @@ def _set_initial_sampling_decision(self, sampling_context): self.sampled = False return + if not is_valid_sample_rate(sample_rate, source="Profiling"): + logger.warning( + "[Profiling] Discarding profile because of invalid sample rate." + ) + self.sampled = False + return + # Now we roll the dice. random.random is inclusive of 0, but not of 1, # so strict < is safe here. In case sample_rate is a boolean, cast it # to a float (True becomes 1.0 and False becomes 0.0) @@ -695,7 +732,7 @@ def valid(self): class Scheduler(object): - mode = "unknown" + mode = "unknown" # type: ProfilerMode def __init__(self, frequency): # type: (int) -> None @@ -824,7 +861,7 @@ class ThreadScheduler(Scheduler): the sampler at a regular interval. """ - mode = "thread" + mode = "thread" # type: ProfilerMode name = "sentry.profiler.ThreadScheduler" def __init__(self, frequency): @@ -905,7 +942,7 @@ class GeventScheduler(Scheduler): results in a sample containing only the sampler's code. """ - mode = "gevent" + mode = "gevent" # type: ProfilerMode name = "sentry.profiler.GeventScheduler" def __init__(self, frequency): diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py index efcfc165db..111dbe9b6a 100644 --- a/sentry_sdk/tracing.py +++ b/sentry_sdk/tracing.py @@ -5,7 +5,7 @@ import sentry_sdk from sentry_sdk.consts import INSTRUMENTER -from sentry_sdk.utils import logger, nanosecond_time +from sentry_sdk.utils import is_valid_sample_rate, logger, nanosecond_time from sentry_sdk._types import TYPE_CHECKING @@ -722,7 +722,7 @@ def _set_initial_sampling_decision(self, sampling_context): # Since this is coming from the user (or from a function provided by the # user), who knows what we might get. (The only valid values are # booleans or numbers between 0 and 1.) - if not is_valid_sample_rate(sample_rate): + if not is_valid_sample_rate(sample_rate, source="Tracing"): logger.warning( "[Tracing] Discarding {transaction_description} because of invalid sample rate.".format( transaction_description=transaction_description, @@ -810,6 +810,5 @@ def finish(self, hub=None, end_timestamp=None): EnvironHeaders, extract_sentrytrace_data, has_tracing_enabled, - is_valid_sample_rate, maybe_create_breadcrumbs_from_span, ) diff --git a/sentry_sdk/tracing_utils.py b/sentry_sdk/tracing_utils.py index 64155defdf..df1ac53c67 100644 --- a/sentry_sdk/tracing_utils.py +++ b/sentry_sdk/tracing_utils.py @@ -1,9 +1,5 @@ import re import contextlib -import math - -from numbers import Real -from decimal import Decimal import sentry_sdk from sentry_sdk.consts import OP @@ -11,7 +7,6 @@ from sentry_sdk.utils import ( capture_internal_exceptions, Dsn, - logger, to_string, ) from sentry_sdk._compat import PY2, iteritems @@ -100,37 +95,6 @@ def has_tracing_enabled(options): ) -def is_valid_sample_rate(rate): - # type: (Any) -> bool - """ - Checks the given sample rate to make sure it is valid type and value (a - boolean or a number between 0 and 1, inclusive). - """ - - # both booleans and NaN are instances of Real, so a) checking for Real - # checks for the possibility of a boolean also, and b) we have to check - # separately for NaN and Decimal does not derive from Real so need to check that too - if not isinstance(rate, (Real, Decimal)) or math.isnan(rate): - logger.warning( - "[Tracing] Given sample rate is invalid. Sample rate must be a boolean or a number between 0 and 1. Got {rate} of type {type}.".format( - rate=rate, type=type(rate) - ) - ) - return False - - # in case rate is a boolean, it will get cast to 1 if it's True and 0 if it's False - rate = float(rate) - if rate < 0 or rate > 1: - logger.warning( - "[Tracing] Given sample rate is invalid. Sample rate must be between 0 and 1. Got {rate}.".format( - rate=rate - ) - ) - return False - - return True - - @contextlib.contextmanager def record_sql_queries( hub, # type: sentry_sdk.Hub diff --git a/sentry_sdk/utils.py b/sentry_sdk/utils.py index 6f1a2cb80a..7091513ed9 100644 --- a/sentry_sdk/utils.py +++ b/sentry_sdk/utils.py @@ -2,6 +2,7 @@ import json import linecache import logging +import math import os import re import subprocess @@ -9,6 +10,8 @@ import threading import time from collections import namedtuple +from decimal import Decimal +from numbers import Real try: # Python 3 @@ -1260,6 +1263,37 @@ def parse_url(url, sanitize=True): return ParsedUrl(url=base_url, query=parsed_url.query, fragment=parsed_url.fragment) +def is_valid_sample_rate(rate, source): + # type: (Any, str) -> bool + """ + Checks the given sample rate to make sure it is valid type and value (a + boolean or a number between 0 and 1, inclusive). + """ + + # both booleans and NaN are instances of Real, so a) checking for Real + # checks for the possibility of a boolean also, and b) we have to check + # separately for NaN and Decimal does not derive from Real so need to check that too + if not isinstance(rate, (Real, Decimal)) or math.isnan(rate): + logger.warning( + "{source} Given sample rate is invalid. Sample rate must be a boolean or a number between 0 and 1. Got {rate} of type {type}.".format( + source=source, rate=rate, type=type(rate) + ) + ) + return False + + # in case rate is a boolean, it will get cast to 1 if it's True and 0 if it's False + rate = float(rate) + if rate < 0 or rate > 1: + logger.warning( + "{source} Given sample rate is invalid. Sample rate must be between 0 and 1. Got {rate}.".format( + source=source, rate=rate + ) + ) + return False + + return True + + if PY37: def nanosecond_time(): diff --git a/tests/test_profiler.py b/tests/test_profiler.py index c6f88fd531..dda982fd31 100644 --- a/tests/test_profiler.py +++ b/tests/test_profiler.py @@ -46,6 +46,16 @@ def process_test_sample(sample): return [(tid, (stack, stack)) for tid, stack in sample] +def non_experimental_options(mode=None, sample_rate=None): + return {"profiler_mode": mode, "profiles_sample_rate": sample_rate} + + +def experimental_options(mode=None, sample_rate=None): + return { + "_experiments": {"profiler_mode": mode, "profiles_sample_rate": sample_rate} + } + + @requires_python_version(3, 3) @pytest.mark.parametrize( "mode", @@ -57,9 +67,16 @@ def process_test_sample(sample): ), ], ) -def test_profiler_invalid_mode(mode, teardown_profiling): +@pytest.mark.parametrize( + "make_options", + [ + pytest.param(experimental_options, id="experiment"), + pytest.param(non_experimental_options, id="non experimental"), + ], +) +def test_profiler_invalid_mode(mode, make_options, teardown_profiling): with pytest.raises(ValueError): - setup_profiler({"_experiments": {"profiler_mode": mode}}) + setup_profiler(make_options(mode)) @pytest.mark.parametrize( @@ -70,17 +87,31 @@ def test_profiler_invalid_mode(mode, teardown_profiling): pytest.param("gevent", marks=requires_gevent), ], ) -def test_profiler_valid_mode(mode, teardown_profiling): +@pytest.mark.parametrize( + "make_options", + [ + pytest.param(experimental_options, id="experiment"), + pytest.param(non_experimental_options, id="non experimental"), + ], +) +def test_profiler_valid_mode(mode, make_options, teardown_profiling): # should not raise any exceptions - setup_profiler({"_experiments": {"profiler_mode": mode}}) + setup_profiler(make_options(mode)) @requires_python_version(3, 3) -def test_profiler_setup_twice(teardown_profiling): +@pytest.mark.parametrize( + "make_options", + [ + pytest.param(experimental_options, id="experiment"), + pytest.param(non_experimental_options, id="non experimental"), + ], +) +def test_profiler_setup_twice(make_options, teardown_profiling): # setting up the first time should return True to indicate success - assert setup_profiler({"_experiments": {}}) + assert setup_profiler(make_options()) # setting up the second time should return False to indicate no-op - assert not setup_profiler({"_experiments": {}}) + assert not setup_profiler(make_options()) @pytest.mark.parametrize( @@ -100,21 +131,90 @@ def test_profiler_setup_twice(teardown_profiling): pytest.param(None, 0, id="profiler not enabled"), ], ) +@pytest.mark.parametrize( + "make_options", + [ + pytest.param(experimental_options, id="experiment"), + pytest.param(non_experimental_options, id="non experimental"), + ], +) @mock.patch("sentry_sdk.profiler.PROFILE_MINIMUM_SAMPLES", 0) -def test_profiled_transaction( +def test_profiles_sample_rate( sentry_init, capture_envelopes, teardown_profiling, profiles_sample_rate, profile_count, + make_options, + mode, +): + options = make_options(mode=mode, sample_rate=profiles_sample_rate) + sentry_init( + traces_sample_rate=1.0, + profiler_mode=options.get("profiler_mode"), + profiles_sample_rate=options.get("profiles_sample_rate"), + _experiments=options.get("_experiments", {}), + ) + + envelopes = capture_envelopes() + + with mock.patch("sentry_sdk.profiler.random.random", return_value=0.5): + with start_transaction(name="profiling"): + pass + + items = defaultdict(list) + for envelope in envelopes: + for item in envelope.items: + items[item.type].append(item) + + assert len(items["transaction"]) == 1 + assert len(items["profile"]) == profile_count + + +@pytest.mark.parametrize( + "mode", + [ + pytest.param("thread"), + pytest.param("gevent", marks=requires_gevent), + ], +) +@pytest.mark.parametrize( + ("profiles_sampler", "profile_count"), + [ + pytest.param(lambda _: 1.00, 1, id="profiler sampled at 1.00"), + pytest.param(lambda _: 0.75, 1, id="profiler sampled at 0.75"), + pytest.param(lambda _: 0.25, 0, id="profiler sampled at 0.25"), + pytest.param(lambda _: 0.00, 0, id="profiler sampled at 0.00"), + pytest.param(lambda _: None, 0, id="profiler not enabled"), + pytest.param( + lambda ctx: 1 if ctx["transaction_context"]["name"] == "profiling" else 0, + 1, + id="profiler sampled for transaction name", + ), + pytest.param( + lambda ctx: 0 if ctx["transaction_context"]["name"] == "profiling" else 1, + 0, + id="profiler not sampled for transaction name", + ), + pytest.param( + lambda _: "1", 0, id="profiler not sampled because string sample rate" + ), + pytest.param(lambda _: True, 1, id="profiler sampled at True"), + pytest.param(lambda _: False, 0, id="profiler sampled at False"), + ], +) +@mock.patch("sentry_sdk.profiler.PROFILE_MINIMUM_SAMPLES", 0) +def test_profiles_sampler( + sentry_init, + capture_envelopes, + teardown_profiling, + profiles_sampler, + profile_count, mode, ): sentry_init( traces_sample_rate=1.0, - _experiments={ - "profiles_sample_rate": profiles_sample_rate, - "profiler_mode": mode, - }, + profiles_sampler=profiles_sampler, ) envelopes = capture_envelopes() diff --git a/tests/test_utils.py b/tests/test_utils.py index 2e266c7600..7578e6255b 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,7 +1,12 @@ import pytest import re -from sentry_sdk.utils import parse_url, sanitize_url +from sentry_sdk.utils import is_valid_sample_rate, logger, parse_url, sanitize_url + +try: + from unittest import mock # python 3.3 and above +except ImportError: + import mock # python < 3.3 @pytest.mark.parametrize( @@ -184,3 +189,35 @@ def test_parse_url(url, sanitize, expected_url, expected_query, expected_fragmen expected_query_parts = sorted(re.split(r"\&|\?|\#", expected_query)) assert query_parts == expected_query_parts + + +@pytest.mark.parametrize( + "rate", + [0.0, 0.1231, 1.0, True, False], +) +def test_accepts_valid_sample_rate(rate): + with mock.patch.object(logger, "warning", mock.Mock()): + result = is_valid_sample_rate(rate, source="Testing") + assert logger.warning.called is False + assert result is True + + +@pytest.mark.parametrize( + "rate", + [ + "dogs are great", # wrong type + (0, 1), # wrong type + {"Maisey": "Charllie"}, # wrong type + [True, True], # wrong type + {0.2012}, # wrong type + float("NaN"), # wrong type + None, # wrong type + -1.121, # wrong value + 1.231, # wrong value + ], +) +def test_warns_on_invalid_sample_rate(rate, StringContaining): # noqa: N803 + with mock.patch.object(logger, "warning", mock.Mock()): + result = is_valid_sample_rate(rate, source="Testing") + logger.warning.assert_any_call(StringContaining("Given sample rate is invalid")) + assert result is False diff --git a/tests/tracing/test_sampling.py b/tests/tracing/test_sampling.py index 9975abad5d..6391aeee76 100644 --- a/tests/tracing/test_sampling.py +++ b/tests/tracing/test_sampling.py @@ -4,7 +4,6 @@ from sentry_sdk import Hub, start_span, start_transaction from sentry_sdk.tracing import Transaction -from sentry_sdk.tracing_utils import is_valid_sample_rate from sentry_sdk.utils import logger try: @@ -51,38 +50,6 @@ def test_no_double_sampling(sentry_init, capture_events): assert len(events) == 1 -@pytest.mark.parametrize( - "rate", - [0.0, 0.1231, 1.0, True, False], -) -def test_accepts_valid_sample_rate(rate): - with mock.patch.object(logger, "warning", mock.Mock()): - result = is_valid_sample_rate(rate) - assert logger.warning.called is False - assert result is True - - -@pytest.mark.parametrize( - "rate", - [ - "dogs are great", # wrong type - (0, 1), # wrong type - {"Maisey": "Charllie"}, # wrong type - [True, True], # wrong type - {0.2012}, # wrong type - float("NaN"), # wrong type - None, # wrong type - -1.121, # wrong value - 1.231, # wrong value - ], -) -def test_warns_on_invalid_sample_rate(rate, StringContaining): # noqa: N803 - with mock.patch.object(logger, "warning", mock.Mock()): - result = is_valid_sample_rate(rate) - logger.warning.assert_any_call(StringContaining("Given sample rate is invalid")) - assert result is False - - @pytest.mark.parametrize("sampling_decision", [True, False]) def test_get_transaction_and_span_from_scope_regardless_of_sampling_decision( sentry_init, sampling_decision