-
Notifications
You must be signed in to change notification settings - Fork 764
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Example / proof of concept to achieve a combination of head-based sam…
…pling + a basic form of tail-based sampling at a span level. (#4206) Co-authored-by: Timothy Mothra <tilee@microsoft.com> Co-authored-by: Cijo Thomas <cithomas@microsoft.com>
- Loading branch information
1 parent
a1ea6d6
commit 3b1ceba
Showing
6 changed files
with
313 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
61 changes: 61 additions & 0 deletions
61
docs/trace/tail-based-sampling-span-level/ParentBasedElseAlwaysRecordSampler.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
// <copyright file="ParentBasedElseAlwaysRecordSampler.cs" company="OpenTelemetry Authors"> | ||
// Copyright The OpenTelemetry Authors | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
// </copyright> | ||
|
||
using OpenTelemetry.Trace; | ||
|
||
namespace SDKBasedSpanLevelTailSamplingSample; | ||
|
||
/// <summary> | ||
/// Note: This is a proof-of-concept and is not meant to be used directly in production. | ||
/// This is a composite sampler used to achieve a combination of parent-based sampling | ||
/// and SDK-side "span-level" tail-based sampling. | ||
/// It first invokes a head-sampling mechanism using the parent based sampling approach. | ||
/// If the parent based sampler's decision is to sample it (i.e., record and export the span), | ||
/// it retains that decision. If not, it returns a "record-only" sampling result that can be | ||
/// changed later by a span processor based on span attributes (e.g., failure) that become | ||
/// available only by the end of the span. | ||
/// </summary> | ||
internal class ParentBasedElseAlwaysRecordSampler : Sampler | ||
{ | ||
private const double DefaultSamplingProbabilityForRootSpan = 0.1; | ||
private readonly ParentBasedSampler parentBasedSampler; | ||
|
||
public ParentBasedElseAlwaysRecordSampler(double samplingProbabilityForRootSpan = DefaultSamplingProbabilityForRootSpan) | ||
{ | ||
this.parentBasedSampler = new ParentBasedSampler(new TraceIdRatioBasedSampler(samplingProbabilityForRootSpan)); | ||
} | ||
|
||
public override SamplingResult ShouldSample(in SamplingParameters samplingParameters) | ||
{ | ||
// First, let's sample using the parentbased sampler. | ||
var samplingResult = this.parentBasedSampler.ShouldSample(samplingParameters); | ||
|
||
if (samplingResult.Decision != SamplingDecision.Drop) | ||
{ | ||
// Parentbased sampler decided not to drop it, so we will sample this. | ||
return samplingResult; | ||
} | ||
|
||
// Parentbased sampler decided to drop it. We will return a RecordOnly | ||
// decision so that the span filtering processors later in the pipeline | ||
// can apply tailbased sampling rules (e.g., to sample all failed spans). | ||
// Returning a RecordOnly decision is relevant because: | ||
// 1. It causes the Processor pipeline to be invoked. | ||
// 2. It causes activity.IsAllDataRequested to return true, so most | ||
// instrumentations end up populating the required attributes. | ||
return new SamplingResult(SamplingDecision.RecordOnly); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
// <copyright file="Program.cs" company="OpenTelemetry Authors"> | ||
// Copyright The OpenTelemetry Authors | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
// </copyright> | ||
|
||
using System.Diagnostics; | ||
using OpenTelemetry; | ||
using OpenTelemetry.Trace; | ||
|
||
namespace SDKBasedSpanLevelTailSamplingSample; | ||
|
||
internal class Program | ||
{ | ||
private static readonly ActivitySource MyActivitySource = new("SDK.TailSampling.POC"); | ||
|
||
public static void Main(string[] args) | ||
{ | ||
using var tracerProvider = Sdk.CreateTracerProviderBuilder() | ||
.SetSampler(new ParentBasedElseAlwaysRecordSampler()) | ||
.AddSource("SDK.TailSampling.POC") | ||
.AddProcessor(new TailSamplingProcessor()) | ||
.AddConsoleExporter() | ||
.Build(); | ||
|
||
var random = new Random(2357); | ||
|
||
// Generate some spans | ||
for (var i = 0; i < 50; i++) | ||
{ | ||
using (var activity = MyActivitySource.StartActivity("SayHello")) | ||
{ | ||
activity?.SetTag("foo", "bar"); | ||
|
||
// Simulate a mix of failed and successful spans | ||
var randomValue = random.Next(5); | ||
switch (randomValue) | ||
{ | ||
case 0: | ||
activity?.SetStatus(ActivityStatusCode.Error); | ||
break; | ||
default: | ||
activity?.SetStatus(ActivityStatusCode.Ok); | ||
break; | ||
} | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
# Tail Based Sampling at an activity (span) level: An Example | ||
|
||
This document describes one possible way to achieve a form of tail-based | ||
sampling to include all failed activities in addition to head-based sampling. | ||
|
||
It does this by leveraging the extensibility mechanisms in the OpenTelemetry | ||
SDK. It uses a combination of a custom sampler and an ActivityProcessor | ||
(span processor). | ||
|
||
This is a way to achieve a combination of: | ||
|
||
- Head-based sampling (probabilistic/unbiased sampling), and | ||
- Tail-based sampling (a non-probabilistic/biased sampling). | ||
|
||
## How does this sampling example work? | ||
|
||
We use a hybrid approach: we do head based sampling to get a | ||
probabilistic subset of all activities which includes both successful activities | ||
and failure activities. In addition, we want to capture all failure activities. | ||
To do this, if the parent based sampler's decision is to drop it, we return | ||
a "Record-Only" sampling result. This ensures that the activity processor | ||
receives that activity. In the activity processor, at the end of an activity, | ||
we check if it is a failure activity. If so, we change the decision from | ||
"Record-Only" to set the sampled flag so that the exporter receives the | ||
activity. In this example, each activity is filtered individually without | ||
consideration to any other activities. | ||
|
||
This is a basic form of tail-based sampling at an activity level. If an | ||
activity failed, we always sample it in addition to all head-sampled | ||
activities. | ||
|
||
## When should you consider such an option? | ||
|
||
This is a good option if you want to get all failure activities in addition to | ||
head based sampling. With this, you get basic activity level tail-based sampling | ||
at a SDK level without having to install any additional components. | ||
|
||
## Tradeoffs | ||
|
||
Tail-sampling this way involves many tradeoffs such as: | ||
|
||
1. Additional performance cost: Unlike head-based sampling where the sampling | ||
decision is made at activity creation time, in tail sampling the decision is made | ||
only at the end, so there is additional memory/processing cost. | ||
|
||
2. Partial traces: Since this sampling is at a activity level, the generated trace | ||
will be partial. For example, if another part of the call tree is successful, | ||
those activities may not be exported leading to an incomplete trace. | ||
|
||
3. If multiple exporters are used, this decision will impact all of them: | ||
[Issue 3861](https://github.com/open-telemetry/opentelemetry-dotnet/issues/3861). | ||
|
||
## Sample Output | ||
|
||
You should see output such as the below when you run this example. | ||
|
||
```text | ||
Including error activity with id | ||
00-404ddff248b8f9a9b21e347d68d2640e-035858bc3c168885-01 and status Error | ||
Activity.TraceId: 404ddff248b8f9a9b21e347d68d2640e | ||
Activity.SpanId: 035858bc3c168885 | ||
Activity.TraceFlags: Recorded | ||
Activity.ActivitySourceName: SDK.TailSampling.POC | ||
Activity.DisplayName: SayHello | ||
Activity.Kind: Internal | ||
Activity.StartTime: 2023-02-09T19:05:32.5563112Z | ||
Activity.Duration: 00:00:00.0028144 | ||
Activity.Tags: | ||
foo: bar | ||
StatusCode: Error | ||
Resource associated with Activity: | ||
service.name: unknown_service:Examples.TailBasedSamplingAtSpanLevel | ||
Dropping activity with id 00-ea861bda268c58d328ab7cbe49851499-daba29055de80a53-00 | ||
and status Ok | ||
Including error activity with id | ||
00-802dea991247e2d699d943167eb546de-cc120b0bd1741b52-01 and status Error | ||
Activity.TraceId: 802dea991247e2d699d943167eb546de | ||
Activity.SpanId: cc120b0bd1741b52 | ||
Activity.TraceFlags: Recorded | ||
Activity.ActivitySourceName: SDK.TailSampling.POC | ||
Activity.DisplayName: SayHello | ||
Activity.Kind: Internal | ||
Activity.StartTime: 2023-02-09T19:05:32.7021138Z | ||
Activity.Duration: 00:00:00.0000012 | ||
Activity.Tags: | ||
foo: bar | ||
StatusCode: Error | ||
Resource associated with Activity: | ||
service.name: unknown_service:Examples.TailBasedSamplingAtSpanLevel | ||
Including head-sampled activity with id | ||
00-f3c88010615e285c8f3cb3e2bcd70c7f-f9316215f12437c3-01 and status Ok | ||
Activity.TraceId: f3c88010615e285c8f3cb3e2bcd70c7f | ||
Activity.SpanId: f9316215f12437c3 | ||
Activity.TraceFlags: Recorded | ||
Activity.ActivitySourceName: SDK.TailSampling.POC | ||
Activity.DisplayName: SayHello | ||
Activity.Kind: Internal | ||
Activity.StartTime: 2023-02-09T19:05:32.8519346Z | ||
Activity.Duration: 00:00:00.0000034 | ||
Activity.Tags: | ||
foo: bar | ||
StatusCode: Ok | ||
Resource associated with Activity: | ||
service.name: unknown_service:Examples.TailBasedSamplingAtSpanLevel | ||
``` |
73 changes: 73 additions & 0 deletions
73
docs/trace/tail-based-sampling-span-level/TailSamplingProcessor.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
// <copyright file="TailSamplingProcessor.cs" company="OpenTelemetry Authors"> | ||
// Copyright The OpenTelemetry Authors | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
// </copyright> | ||
|
||
using System.Diagnostics; | ||
using OpenTelemetry; | ||
|
||
namespace SDKBasedSpanLevelTailSamplingSample; | ||
|
||
/// <summary> | ||
/// A custom processor for filtering <see cref="Activity"/> instances. | ||
/// </summary> | ||
internal sealed class TailSamplingProcessor : BaseProcessor<Activity> | ||
{ | ||
public TailSamplingProcessor() | ||
: base() | ||
{ | ||
} | ||
|
||
public override void OnEnd(Activity activity) | ||
{ | ||
if (activity.Recorded) | ||
{ | ||
// This means that this activity was included based on head-based sampling, | ||
// we continue with that decision and no further change is needed. | ||
Console.WriteLine($"Including head-sampled activity with id {activity.Id} and status {activity.Status}"); | ||
} | ||
else | ||
{ | ||
this.IncludeForExportIfFailedActivity(activity); | ||
} | ||
|
||
base.OnEnd(activity); | ||
} | ||
|
||
// Note: This is used to filter spans at the end of a span. | ||
// This is a basic form of tail-based sampling at a span level. | ||
// If a span failed, we always sample it in addition to all head-sampled spans. | ||
// In this example, each span is filtered individually without consideration to any other spans. | ||
// Tail-sampling this way involves many tradeoffs. A few examples of the tradeoffs: | ||
// 1. Performance: Unlike head-based sampling where the sampling decision is made at span creation time, in | ||
// tail sampling the decision is made only at the end, so there is additional memory cost. | ||
// 2. Traces will not be complete: Since this sampling is at a span level, the generated trace will be partial and won't be complete. | ||
// For example, if another part of the call tree is successful, those spans may not be sampled in leading to a partial trace. | ||
// 3. If multiple exporters are used, this decision will impact all of them: https://github.com/open-telemetry/opentelemetry-dotnet/issues/3861. | ||
private void IncludeForExportIfFailedActivity(Activity activity) | ||
{ | ||
if (activity.Status == ActivityStatusCode.Error) | ||
{ | ||
// We decide to always include all the failure spans | ||
// Set the recorded flag so that this will be exported. | ||
activity.ActivityTraceFlags |= ActivityTraceFlags.Recorded; | ||
Console.WriteLine($"Including error activity with id {activity.Id} and status {activity.Status}"); | ||
} | ||
else | ||
{ | ||
// This span is not sampled and exporters won't see this span. | ||
Console.WriteLine($"Dropping activity with id {activity.Id} and status {activity.Status}"); | ||
} | ||
} | ||
} |
5 changes: 5 additions & 0 deletions
5
docs/trace/tail-based-sampling-span-level/tail-based-sampling-example.csproj
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
<Project Sdk="Microsoft.NET.Sdk"> | ||
<ItemGroup> | ||
<ProjectReference Include="$(RepoRoot)\src\OpenTelemetry.Exporter.Console\OpenTelemetry.Exporter.Console.csproj" /> | ||
</ItemGroup> | ||
</Project> |