From 3e675359b5b77a57255144dadb173aedcd601135 Mon Sep 17 00:00:00 2001
From: Tony Xiao <txiao@sentry.io>
Date: Mon, 13 Mar 2023 10:20:16 -0400
Subject: [PATCH] feat(profiling): Add profiler options to init (#1947)

This adds the `profiles_sample_rate`, `profiles_sampler` and `profiler_mode`
options to the top level of the init call. The `_experiment` options will still
be available temporarily but is deprecated and will be removed in the future.
---
 sentry_sdk/_types.py           |   2 +
 sentry_sdk/client.py           |   5 +-
 sentry_sdk/consts.py           |   7 +-
 sentry_sdk/profiler.py         |  49 +++++++++++--
 sentry_sdk/tracing.py          |   5 +-
 sentry_sdk/tracing_utils.py    |  36 ----------
 sentry_sdk/utils.py            |  34 +++++++++
 tests/test_profiler.py         | 124 +++++++++++++++++++++++++++++----
 tests/test_utils.py            |  39 ++++++++++-
 tests/tracing/test_sampling.py |  33 ---------
 10 files changed, 239 insertions(+), 95 deletions(-)

diff --git a/sentry_sdk/_types.py b/sentry_sdk/_types.py
index 2c4a703cb5..cbead04e2e 100644
--- a/sentry_sdk/_types.py
+++ b/sentry_sdk/_types.py
@@ -85,3 +85,5 @@
 
     FractionUnit = Literal["ratio", "percent"]
     MeasurementUnit = Union[DurationUnit, InformationUnit, FractionUnit, str]
+
+    ProfilerMode = Literal["sleep", "thread", "gevent", "unknown"]
diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py
index 38b64e3798..c4be3331fa 100644
--- a/sentry_sdk/client.py
+++ b/sentry_sdk/client.py
@@ -28,7 +28,7 @@
 from sentry_sdk.utils import ContextVar
 from sentry_sdk.sessions import SessionFlusher
 from sentry_sdk.envelope import Envelope
-from sentry_sdk.profiler import setup_profiler
+from sentry_sdk.profiler import has_profiling_enabled, setup_profiler
 
 from sentry_sdk._types import TYPE_CHECKING
 
@@ -174,8 +174,7 @@ def _capture_envelope(envelope):
         finally:
             _client_init_debug.set(old_debug)
 
-        profiles_sample_rate = self.options["_experiments"].get("profiles_sample_rate")
-        if profiles_sample_rate is not None and profiles_sample_rate > 0:
+        if has_profiling_enabled(self.options):
             try:
                 setup_profiler(self.options)
             except ValueError as e:
diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py
index 072b49ced7..1a8fc99e5d 100644
--- a/sentry_sdk/consts.py
+++ b/sentry_sdk/consts.py
@@ -19,6 +19,7 @@
         BreadcrumbProcessor,
         Event,
         EventProcessor,
+        ProfilerMode,
         TracesSampler,
         TransactionProcessor,
     )
@@ -33,8 +34,9 @@
             "max_spans": Optional[int],
             "record_sql_params": Optional[bool],
             "smart_transaction_trimming": Optional[bool],
+            # TODO: Remvoe these 2 profiling related experiments
             "profiles_sample_rate": Optional[float],
-            "profiler_mode": Optional[str],
+            "profiler_mode": Optional[ProfilerMode],
         },
         total=False,
     )
@@ -115,6 +117,9 @@ def __init__(
         propagate_traces=True,  # type: bool
         traces_sample_rate=None,  # type: Optional[float]
         traces_sampler=None,  # type: Optional[TracesSampler]
+        profiles_sample_rate=None,  # type: Optional[float]
+        profiles_sampler=None,  # type: Optional[TracesSampler]
+        profiler_mode=None,  # type: Optional[ProfilerMode]
         auto_enabling_integrations=True,  # type: bool
         auto_session_tracking=True,  # type: bool
         send_client_reports=True,  # type: bool
diff --git a/sentry_sdk/profiler.py b/sentry_sdk/profiler.py
index 1695fa34f1..f404fe2b35 100644
--- a/sentry_sdk/profiler.py
+++ b/sentry_sdk/profiler.py
@@ -27,6 +27,7 @@
 from sentry_sdk._types import TYPE_CHECKING
 from sentry_sdk.utils import (
     filename_for_module,
+    is_valid_sample_rate,
     logger,
     nanosecond_time,
     set_in_app_in_frames,
@@ -46,7 +47,7 @@
     from typing_extensions import TypedDict
 
     import sentry_sdk.tracing
-    from sentry_sdk._types import SamplingContext
+    from sentry_sdk._types import SamplingContext, ProfilerMode
 
     ThreadId = str
 
@@ -148,6 +149,23 @@ def is_gevent():
 PROFILE_MINIMUM_SAMPLES = 2
 
 
+def has_profiling_enabled(options):
+    # type: (Dict[str, Any]) -> bool
+    profiles_sampler = options["profiles_sampler"]
+    if profiles_sampler is not None:
+        return True
+
+    profiles_sample_rate = options["profiles_sample_rate"]
+    if profiles_sample_rate is not None and profiles_sample_rate > 0:
+        return True
+
+    profiles_sample_rate = options["_experiments"].get("profiles_sample_rate")
+    if profiles_sample_rate is not None and profiles_sample_rate > 0:
+        return True
+
+    return False
+
+
 def setup_profiler(options):
     # type: (Dict[str, Any]) -> bool
     global _scheduler
@@ -171,7 +189,13 @@ def setup_profiler(options):
     else:
         default_profiler_mode = ThreadScheduler.mode
 
-    profiler_mode = options["_experiments"].get("profiler_mode", default_profiler_mode)
+    if options.get("profiler_mode") is not None:
+        profiler_mode = options["profiler_mode"]
+    else:
+        profiler_mode = (
+            options.get("_experiments", {}).get("profiler_mode")
+            or default_profiler_mode
+        )
 
     if (
         profiler_mode == ThreadScheduler.mode
@@ -491,7 +515,13 @@ def _set_initial_sampling_decision(self, sampling_context):
             return
 
         options = client.options
-        sample_rate = options["_experiments"].get("profiles_sample_rate")
+
+        if callable(options.get("profiles_sampler")):
+            sample_rate = options["profiles_sampler"](sampling_context)
+        elif options["profiles_sample_rate"] is not None:
+            sample_rate = options["profiles_sample_rate"]
+        else:
+            sample_rate = options["_experiments"].get("profiles_sample_rate")
 
         # The profiles_sample_rate option was not set, so profiling
         # was never enabled.
@@ -502,6 +532,13 @@ def _set_initial_sampling_decision(self, sampling_context):
             self.sampled = False
             return
 
+        if not is_valid_sample_rate(sample_rate, source="Profiling"):
+            logger.warning(
+                "[Profiling] Discarding profile because of invalid sample rate."
+            )
+            self.sampled = False
+            return
+
         # Now we roll the dice. random.random is inclusive of 0, but not of 1,
         # so strict < is safe here. In case sample_rate is a boolean, cast it
         # to a float (True becomes 1.0 and False becomes 0.0)
@@ -695,7 +732,7 @@ def valid(self):
 
 
 class Scheduler(object):
-    mode = "unknown"
+    mode = "unknown"  # type: ProfilerMode
 
     def __init__(self, frequency):
         # type: (int) -> None
@@ -824,7 +861,7 @@ class ThreadScheduler(Scheduler):
     the sampler at a regular interval.
     """
 
-    mode = "thread"
+    mode = "thread"  # type: ProfilerMode
     name = "sentry.profiler.ThreadScheduler"
 
     def __init__(self, frequency):
@@ -905,7 +942,7 @@ class GeventScheduler(Scheduler):
        results in a sample containing only the sampler's code.
     """
 
-    mode = "gevent"
+    mode = "gevent"  # type: ProfilerMode
     name = "sentry.profiler.GeventScheduler"
 
     def __init__(self, frequency):
diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py
index efcfc165db..111dbe9b6a 100644
--- a/sentry_sdk/tracing.py
+++ b/sentry_sdk/tracing.py
@@ -5,7 +5,7 @@
 
 import sentry_sdk
 from sentry_sdk.consts import INSTRUMENTER
-from sentry_sdk.utils import logger, nanosecond_time
+from sentry_sdk.utils import is_valid_sample_rate, logger, nanosecond_time
 from sentry_sdk._types import TYPE_CHECKING
 
 
@@ -722,7 +722,7 @@ def _set_initial_sampling_decision(self, sampling_context):
         # Since this is coming from the user (or from a function provided by the
         # user), who knows what we might get. (The only valid values are
         # booleans or numbers between 0 and 1.)
-        if not is_valid_sample_rate(sample_rate):
+        if not is_valid_sample_rate(sample_rate, source="Tracing"):
             logger.warning(
                 "[Tracing] Discarding {transaction_description} because of invalid sample rate.".format(
                     transaction_description=transaction_description,
@@ -810,6 +810,5 @@ def finish(self, hub=None, end_timestamp=None):
     EnvironHeaders,
     extract_sentrytrace_data,
     has_tracing_enabled,
-    is_valid_sample_rate,
     maybe_create_breadcrumbs_from_span,
 )
diff --git a/sentry_sdk/tracing_utils.py b/sentry_sdk/tracing_utils.py
index 64155defdf..df1ac53c67 100644
--- a/sentry_sdk/tracing_utils.py
+++ b/sentry_sdk/tracing_utils.py
@@ -1,9 +1,5 @@
 import re
 import contextlib
-import math
-
-from numbers import Real
-from decimal import Decimal
 
 import sentry_sdk
 from sentry_sdk.consts import OP
@@ -11,7 +7,6 @@
 from sentry_sdk.utils import (
     capture_internal_exceptions,
     Dsn,
-    logger,
     to_string,
 )
 from sentry_sdk._compat import PY2, iteritems
@@ -100,37 +95,6 @@ def has_tracing_enabled(options):
     )
 
 
-def is_valid_sample_rate(rate):
-    # type: (Any) -> bool
-    """
-    Checks the given sample rate to make sure it is valid type and value (a
-    boolean or a number between 0 and 1, inclusive).
-    """
-
-    # both booleans and NaN are instances of Real, so a) checking for Real
-    # checks for the possibility of a boolean also, and b) we have to check
-    # separately for NaN and Decimal does not derive from Real so need to check that too
-    if not isinstance(rate, (Real, Decimal)) or math.isnan(rate):
-        logger.warning(
-            "[Tracing] Given sample rate is invalid. Sample rate must be a boolean or a number between 0 and 1. Got {rate} of type {type}.".format(
-                rate=rate, type=type(rate)
-            )
-        )
-        return False
-
-    # in case rate is a boolean, it will get cast to 1 if it's True and 0 if it's False
-    rate = float(rate)
-    if rate < 0 or rate > 1:
-        logger.warning(
-            "[Tracing] Given sample rate is invalid. Sample rate must be between 0 and 1. Got {rate}.".format(
-                rate=rate
-            )
-        )
-        return False
-
-    return True
-
-
 @contextlib.contextmanager
 def record_sql_queries(
     hub,  # type: sentry_sdk.Hub
diff --git a/sentry_sdk/utils.py b/sentry_sdk/utils.py
index 6f1a2cb80a..7091513ed9 100644
--- a/sentry_sdk/utils.py
+++ b/sentry_sdk/utils.py
@@ -2,6 +2,7 @@
 import json
 import linecache
 import logging
+import math
 import os
 import re
 import subprocess
@@ -9,6 +10,8 @@
 import threading
 import time
 from collections import namedtuple
+from decimal import Decimal
+from numbers import Real
 
 try:
     # Python 3
@@ -1260,6 +1263,37 @@ def parse_url(url, sanitize=True):
     return ParsedUrl(url=base_url, query=parsed_url.query, fragment=parsed_url.fragment)
 
 
+def is_valid_sample_rate(rate, source):
+    # type: (Any, str) -> bool
+    """
+    Checks the given sample rate to make sure it is valid type and value (a
+    boolean or a number between 0 and 1, inclusive).
+    """
+
+    # both booleans and NaN are instances of Real, so a) checking for Real
+    # checks for the possibility of a boolean also, and b) we have to check
+    # separately for NaN and Decimal does not derive from Real so need to check that too
+    if not isinstance(rate, (Real, Decimal)) or math.isnan(rate):
+        logger.warning(
+            "{source} Given sample rate is invalid. Sample rate must be a boolean or a number between 0 and 1. Got {rate} of type {type}.".format(
+                source=source, rate=rate, type=type(rate)
+            )
+        )
+        return False
+
+    # in case rate is a boolean, it will get cast to 1 if it's True and 0 if it's False
+    rate = float(rate)
+    if rate < 0 or rate > 1:
+        logger.warning(
+            "{source} Given sample rate is invalid. Sample rate must be between 0 and 1. Got {rate}.".format(
+                source=source, rate=rate
+            )
+        )
+        return False
+
+    return True
+
+
 if PY37:
 
     def nanosecond_time():
diff --git a/tests/test_profiler.py b/tests/test_profiler.py
index c6f88fd531..dda982fd31 100644
--- a/tests/test_profiler.py
+++ b/tests/test_profiler.py
@@ -46,6 +46,16 @@ def process_test_sample(sample):
     return [(tid, (stack, stack)) for tid, stack in sample]
 
 
+def non_experimental_options(mode=None, sample_rate=None):
+    return {"profiler_mode": mode, "profiles_sample_rate": sample_rate}
+
+
+def experimental_options(mode=None, sample_rate=None):
+    return {
+        "_experiments": {"profiler_mode": mode, "profiles_sample_rate": sample_rate}
+    }
+
+
 @requires_python_version(3, 3)
 @pytest.mark.parametrize(
     "mode",
@@ -57,9 +67,16 @@ def process_test_sample(sample):
         ),
     ],
 )
-def test_profiler_invalid_mode(mode, teardown_profiling):
+@pytest.mark.parametrize(
+    "make_options",
+    [
+        pytest.param(experimental_options, id="experiment"),
+        pytest.param(non_experimental_options, id="non experimental"),
+    ],
+)
+def test_profiler_invalid_mode(mode, make_options, teardown_profiling):
     with pytest.raises(ValueError):
-        setup_profiler({"_experiments": {"profiler_mode": mode}})
+        setup_profiler(make_options(mode))
 
 
 @pytest.mark.parametrize(
@@ -70,17 +87,31 @@ def test_profiler_invalid_mode(mode, teardown_profiling):
         pytest.param("gevent", marks=requires_gevent),
     ],
 )
-def test_profiler_valid_mode(mode, teardown_profiling):
+@pytest.mark.parametrize(
+    "make_options",
+    [
+        pytest.param(experimental_options, id="experiment"),
+        pytest.param(non_experimental_options, id="non experimental"),
+    ],
+)
+def test_profiler_valid_mode(mode, make_options, teardown_profiling):
     # should not raise any exceptions
-    setup_profiler({"_experiments": {"profiler_mode": mode}})
+    setup_profiler(make_options(mode))
 
 
 @requires_python_version(3, 3)
-def test_profiler_setup_twice(teardown_profiling):
+@pytest.mark.parametrize(
+    "make_options",
+    [
+        pytest.param(experimental_options, id="experiment"),
+        pytest.param(non_experimental_options, id="non experimental"),
+    ],
+)
+def test_profiler_setup_twice(make_options, teardown_profiling):
     # setting up the first time should return True to indicate success
-    assert setup_profiler({"_experiments": {}})
+    assert setup_profiler(make_options())
     # setting up the second time should return False to indicate no-op
-    assert not setup_profiler({"_experiments": {}})
+    assert not setup_profiler(make_options())
 
 
 @pytest.mark.parametrize(
@@ -100,21 +131,90 @@ def test_profiler_setup_twice(teardown_profiling):
         pytest.param(None, 0, id="profiler not enabled"),
     ],
 )
+@pytest.mark.parametrize(
+    "make_options",
+    [
+        pytest.param(experimental_options, id="experiment"),
+        pytest.param(non_experimental_options, id="non experimental"),
+    ],
+)
 @mock.patch("sentry_sdk.profiler.PROFILE_MINIMUM_SAMPLES", 0)
-def test_profiled_transaction(
+def test_profiles_sample_rate(
     sentry_init,
     capture_envelopes,
     teardown_profiling,
     profiles_sample_rate,
     profile_count,
+    make_options,
+    mode,
+):
+    options = make_options(mode=mode, sample_rate=profiles_sample_rate)
+    sentry_init(
+        traces_sample_rate=1.0,
+        profiler_mode=options.get("profiler_mode"),
+        profiles_sample_rate=options.get("profiles_sample_rate"),
+        _experiments=options.get("_experiments", {}),
+    )
+
+    envelopes = capture_envelopes()
+
+    with mock.patch("sentry_sdk.profiler.random.random", return_value=0.5):
+        with start_transaction(name="profiling"):
+            pass
+
+    items = defaultdict(list)
+    for envelope in envelopes:
+        for item in envelope.items:
+            items[item.type].append(item)
+
+    assert len(items["transaction"]) == 1
+    assert len(items["profile"]) == profile_count
+
+
+@pytest.mark.parametrize(
+    "mode",
+    [
+        pytest.param("thread"),
+        pytest.param("gevent", marks=requires_gevent),
+    ],
+)
+@pytest.mark.parametrize(
+    ("profiles_sampler", "profile_count"),
+    [
+        pytest.param(lambda _: 1.00, 1, id="profiler sampled at 1.00"),
+        pytest.param(lambda _: 0.75, 1, id="profiler sampled at 0.75"),
+        pytest.param(lambda _: 0.25, 0, id="profiler sampled at 0.25"),
+        pytest.param(lambda _: 0.00, 0, id="profiler sampled at 0.00"),
+        pytest.param(lambda _: None, 0, id="profiler not enabled"),
+        pytest.param(
+            lambda ctx: 1 if ctx["transaction_context"]["name"] == "profiling" else 0,
+            1,
+            id="profiler sampled for transaction name",
+        ),
+        pytest.param(
+            lambda ctx: 0 if ctx["transaction_context"]["name"] == "profiling" else 1,
+            0,
+            id="profiler not sampled for transaction name",
+        ),
+        pytest.param(
+            lambda _: "1", 0, id="profiler not sampled because string sample rate"
+        ),
+        pytest.param(lambda _: True, 1, id="profiler sampled at True"),
+        pytest.param(lambda _: False, 0, id="profiler sampled at False"),
+    ],
+)
+@mock.patch("sentry_sdk.profiler.PROFILE_MINIMUM_SAMPLES", 0)
+def test_profiles_sampler(
+    sentry_init,
+    capture_envelopes,
+    teardown_profiling,
+    profiles_sampler,
+    profile_count,
     mode,
 ):
     sentry_init(
         traces_sample_rate=1.0,
-        _experiments={
-            "profiles_sample_rate": profiles_sample_rate,
-            "profiler_mode": mode,
-        },
+        profiles_sampler=profiles_sampler,
     )
 
     envelopes = capture_envelopes()
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 2e266c7600..7578e6255b 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,7 +1,12 @@
 import pytest
 import re
 
-from sentry_sdk.utils import parse_url, sanitize_url
+from sentry_sdk.utils import is_valid_sample_rate, logger, parse_url, sanitize_url
+
+try:
+    from unittest import mock  # python 3.3 and above
+except ImportError:
+    import mock  # python < 3.3
 
 
 @pytest.mark.parametrize(
@@ -184,3 +189,35 @@ def test_parse_url(url, sanitize, expected_url, expected_query, expected_fragmen
     expected_query_parts = sorted(re.split(r"\&|\?|\#", expected_query))
 
     assert query_parts == expected_query_parts
+
+
+@pytest.mark.parametrize(
+    "rate",
+    [0.0, 0.1231, 1.0, True, False],
+)
+def test_accepts_valid_sample_rate(rate):
+    with mock.patch.object(logger, "warning", mock.Mock()):
+        result = is_valid_sample_rate(rate, source="Testing")
+        assert logger.warning.called is False
+        assert result is True
+
+
+@pytest.mark.parametrize(
+    "rate",
+    [
+        "dogs are great",  # wrong type
+        (0, 1),  # wrong type
+        {"Maisey": "Charllie"},  # wrong type
+        [True, True],  # wrong type
+        {0.2012},  # wrong type
+        float("NaN"),  # wrong type
+        None,  # wrong type
+        -1.121,  # wrong value
+        1.231,  # wrong value
+    ],
+)
+def test_warns_on_invalid_sample_rate(rate, StringContaining):  # noqa: N803
+    with mock.patch.object(logger, "warning", mock.Mock()):
+        result = is_valid_sample_rate(rate, source="Testing")
+        logger.warning.assert_any_call(StringContaining("Given sample rate is invalid"))
+        assert result is False
diff --git a/tests/tracing/test_sampling.py b/tests/tracing/test_sampling.py
index 9975abad5d..6391aeee76 100644
--- a/tests/tracing/test_sampling.py
+++ b/tests/tracing/test_sampling.py
@@ -4,7 +4,6 @@
 
 from sentry_sdk import Hub, start_span, start_transaction
 from sentry_sdk.tracing import Transaction
-from sentry_sdk.tracing_utils import is_valid_sample_rate
 from sentry_sdk.utils import logger
 
 try:
@@ -51,38 +50,6 @@ def test_no_double_sampling(sentry_init, capture_events):
     assert len(events) == 1
 
 
-@pytest.mark.parametrize(
-    "rate",
-    [0.0, 0.1231, 1.0, True, False],
-)
-def test_accepts_valid_sample_rate(rate):
-    with mock.patch.object(logger, "warning", mock.Mock()):
-        result = is_valid_sample_rate(rate)
-        assert logger.warning.called is False
-        assert result is True
-
-
-@pytest.mark.parametrize(
-    "rate",
-    [
-        "dogs are great",  # wrong type
-        (0, 1),  # wrong type
-        {"Maisey": "Charllie"},  # wrong type
-        [True, True],  # wrong type
-        {0.2012},  # wrong type
-        float("NaN"),  # wrong type
-        None,  # wrong type
-        -1.121,  # wrong value
-        1.231,  # wrong value
-    ],
-)
-def test_warns_on_invalid_sample_rate(rate, StringContaining):  # noqa: N803
-    with mock.patch.object(logger, "warning", mock.Mock()):
-        result = is_valid_sample_rate(rate)
-        logger.warning.assert_any_call(StringContaining("Given sample rate is invalid"))
-        assert result is False
-
-
 @pytest.mark.parametrize("sampling_decision", [True, False])
 def test_get_transaction_and_span_from_scope_regardless_of_sampling_decision(
     sentry_init, sampling_decision