Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add new components to allow for generating metrics from 100% of spans without impacting sampling #802

Merged
merged 2 commits into from
May 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
/*
* Copyright The OpenTelemetry Authors
* SPDX-License-Identifier: Apache-2.0
*/

package io.opentelemetry.contrib.awsxray;

import io.opentelemetry.api.common.Attributes;
import io.opentelemetry.api.trace.SpanKind;
import io.opentelemetry.api.trace.TraceState;
import io.opentelemetry.context.Context;
import io.opentelemetry.sdk.trace.data.LinkData;
import io.opentelemetry.sdk.trace.samplers.Sampler;
import io.opentelemetry.sdk.trace.samplers.SamplingDecision;
import io.opentelemetry.sdk.trace.samplers.SamplingResult;
import java.util.List;
import javax.annotation.concurrent.Immutable;

/**
* This sampler will return the sampling result of the provided {@link #rootSampler}, unless the
* sampling result contains the sampling decision {@link SamplingDecision#DROP}, in which case, a
* new sampling result will be returned that is functionally equivalent to the original, except that
* it contains the sampling decision {@link SamplingDecision#RECORD_ONLY}. This ensures that all
* spans are recorded, with no change to sampling.
*
* <p>The intended use case of this sampler is to provide a means of sending all spans to a
* processor without having an impact on the sampling rate. This may be desirable if a user wishes
* to count or otherwise measure all spans produced in a service, without incurring the cost of 100%
* sampling.
*/
@Immutable
public final class AlwaysRecordSampler implements Sampler {

private final Sampler rootSampler;

public static AlwaysRecordSampler create(Sampler rootSampler) {
return new AlwaysRecordSampler(rootSampler);
}

private AlwaysRecordSampler(Sampler rootSampler) {
this.rootSampler = rootSampler;
}

@Override
public SamplingResult shouldSample(
Context parentContext,
String traceId,
String name,
SpanKind spanKind,
Attributes attributes,
List<LinkData> parentLinks) {
SamplingResult result =
rootSampler.shouldSample(parentContext, traceId, name, spanKind, attributes, parentLinks);
if (result.getDecision() == SamplingDecision.DROP) {
result = wrapResultWithRecordOnlyResult(result);
}

return result;
}

@Override
public String getDescription() {
return "AlwaysRecordSampler{" + rootSampler.getDescription() + "}";
}

private static SamplingResult wrapResultWithRecordOnlyResult(SamplingResult result) {
return new SamplingResult() {
@Override
public SamplingDecision getDecision() {
return SamplingDecision.RECORD_ONLY;
}

@Override
public Attributes getAttributes() {
return result.getAttributes();
}

@Override
public TraceState getUpdatedTraceState(TraceState parentTraceState) {
return result.getUpdatedTraceState(parentTraceState);
}
};
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ final class AwsAttributeKeys {

private AwsAttributeKeys() {}

static final AttributeKey<String> AWS_SPAN_KIND = AttributeKey.stringKey("aws.span.kind");

static final AttributeKey<String> AWS_LOCAL_SERVICE = AttributeKey.stringKey("aws.local.service");

static final AttributeKey<String> AWS_LOCAL_OPERATION =
AttributeKey.stringKey("aws.local.operation");

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
/*
* Copyright The OpenTelemetry Authors
* SPDX-License-Identifier: Apache-2.0
*/

package io.opentelemetry.contrib.awsxray;

import static io.opentelemetry.contrib.awsxray.AwsAttributeKeys.AWS_LOCAL_OPERATION;
import static io.opentelemetry.contrib.awsxray.AwsAttributeKeys.AWS_LOCAL_SERVICE;
import static io.opentelemetry.contrib.awsxray.AwsAttributeKeys.AWS_REMOTE_OPERATION;
import static io.opentelemetry.contrib.awsxray.AwsAttributeKeys.AWS_REMOTE_SERVICE;
import static io.opentelemetry.contrib.awsxray.AwsAttributeKeys.AWS_SPAN_KIND;
import static io.opentelemetry.semconv.resource.attributes.ResourceAttributes.SERVICE_NAME;
import static io.opentelemetry.semconv.trace.attributes.SemanticAttributes.DB_OPERATION;
import static io.opentelemetry.semconv.trace.attributes.SemanticAttributes.DB_SYSTEM;
import static io.opentelemetry.semconv.trace.attributes.SemanticAttributes.FAAS_INVOKED_NAME;
import static io.opentelemetry.semconv.trace.attributes.SemanticAttributes.FAAS_INVOKED_PROVIDER;
import static io.opentelemetry.semconv.trace.attributes.SemanticAttributes.GRAPHQL_OPERATION_TYPE;
import static io.opentelemetry.semconv.trace.attributes.SemanticAttributes.MESSAGING_OPERATION;
import static io.opentelemetry.semconv.trace.attributes.SemanticAttributes.MESSAGING_SYSTEM;
import static io.opentelemetry.semconv.trace.attributes.SemanticAttributes.PEER_SERVICE;
import static io.opentelemetry.semconv.trace.attributes.SemanticAttributes.RPC_METHOD;
import static io.opentelemetry.semconv.trace.attributes.SemanticAttributes.RPC_SERVICE;

import io.opentelemetry.api.common.AttributeKey;
import io.opentelemetry.api.common.Attributes;
import io.opentelemetry.api.common.AttributesBuilder;
import io.opentelemetry.api.trace.SpanKind;
import io.opentelemetry.sdk.resources.Resource;
import io.opentelemetry.sdk.trace.data.SpanData;
import io.opentelemetry.semconv.resource.attributes.ResourceAttributes;
import io.opentelemetry.semconv.trace.attributes.SemanticAttributes;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
* AwsMetricAttributeGenerator generates very specific metric attributes based on low-cardinality
* span and resource attributes. If such attributes are not present, we fallback to default values.
*
* <p>The goal of these particular metric attributes is to get metrics for incoming and outgoing
* traffic for a service. Namely, {@link SpanKind#SERVER} and {@link SpanKind#CONSUMER} spans
* represent "incoming" traffic, {@link SpanKind#CLIENT} and {@link SpanKind#PRODUCER} spans
* represent "outgoing" traffic, and {@link SpanKind#INTERNAL} spans are ignored.
*/
final class AwsMetricAttributeGenerator implements MetricAttributeGenerator {

private static final Logger logger =
Logger.getLogger(AwsMetricAttributeGenerator.class.getName());

// Special SERVICE attribute value if GRAPHQL_OPERATION_TYPE attribute key is present.
private static final String GRAPHQL = "graphql";

// Default attribute values if no valid span attribute value is identified
private static final String UNKNOWN_SERVICE = "UnknownService";
private static final String UNKNOWN_OPERATION = "UnknownOperation";
private static final String UNKNOWN_REMOTE_SERVICE = "UnknownRemoteService";
private static final String UNKNOWN_REMOTE_OPERATION = "UnknownRemoteOperation";

@Override
public Attributes generateMetricAttributesFromSpan(SpanData span, Resource resource) {
thpierce marked this conversation as resolved.
Show resolved Hide resolved
AttributesBuilder builder = Attributes.builder();
switch (span.getKind()) {
case CONSUMER:
case SERVER:
setService(resource, span, builder);
setIngressOperation(span, builder);
setSpanKind(span, builder);
break;
case PRODUCER:
case CLIENT:
setService(resource, span, builder);
setEgressOperation(span, builder);
setRemoteServiceAndOperation(span, builder);
setSpanKind(span, builder);
break;
default:
// Add no attributes, signalling no metrics should be emitted.
}
return builder.build();
}

/** Service is always derived from {@link ResourceAttributes#SERVICE_NAME} */
private static void setService(Resource resource, SpanData span, AttributesBuilder builder) {
String service = resource.getAttribute(SERVICE_NAME);
if (service == null) {
logUnknownAttribute(AWS_LOCAL_SERVICE, span);
service = UNKNOWN_SERVICE;
}
builder.put(AWS_LOCAL_SERVICE, service);
}

/**
* Ingress operation (i.e. operation for Server and Consumer spans) is always derived from span
* name.
*/
private static void setIngressOperation(SpanData span, AttributesBuilder builder) {
String operation = span.getName();
if (operation == null) {
logUnknownAttribute(AWS_LOCAL_OPERATION, span);
operation = UNKNOWN_OPERATION;
}
builder.put(AWS_LOCAL_OPERATION, operation);
}

/**
* Egress operation (i.e. operation for Client and Producer spans) is always derived from a
* special span attribute, {@link AwsAttributeKeys#AWS_LOCAL_OPERATION}. This attribute is
* generated with a separate SpanProcessor, {@link AttributePropagatingSpanProcessor}
*/
private static void setEgressOperation(SpanData span, AttributesBuilder builder) {
String operation = span.getAttributes().get(AWS_LOCAL_OPERATION);
if (operation == null) {
logUnknownAttribute(AWS_LOCAL_OPERATION, span);
operation = UNKNOWN_OPERATION;
}
builder.put(AWS_LOCAL_OPERATION, operation);
}

/**
* Remote attributes (only for Client and Producer spans) are generated based on low-cardinality
* span attributes, in priority order.
*
* <p>The first priority is the AWS Remote attributes, which are generated from manually
* instrumented span attributes, and are clear indications of customer intent. If AWS Remote
* attributes are not present, the next highest priority span attribute is Peer Service, which is
* also a reliable indicator of customer intent. If this is set, it will override
* AWS_REMOTE_SERVICE identified from any other span attribute, other than AWS Remote attributes.
*
* <p>After this, we look for the following low-cardinality span attributes that can be used to
* determine the remote metric attributes:
*
* <ul>
* <li>RPC
* <li>DB
* <li>FAAS
* <li>Messaging
* <li>GraphQL - Special case, if {@link SemanticAttributes#GRAPHQL_OPERATION_TYPE} is present,
* we use it for RemoteOperation and set RemoteService to {@link #GRAPHQL}.
* </ul>
*
* <p>In each case, these span attributes were selected from the OpenTelemetry trace semantic
* convention specifications as they adhere to the three following criteria:
*
* <ul>
* <li>Attributes are meaningfully indicative of remote service/operation names.
* <li>Attributes are defined in the specification to be low cardinality, usually with a low-
* cardinality list of values.
* <li>Attributes are confirmed to have low-cardinality values, based on code analysis.
* </ul>
*
* TODO: This specific logic may change in future. Specifically, we are still deciding which HTTP
* and RPC attributes to use here, but this is a sufficient starting point.
*/
private static void setRemoteServiceAndOperation(SpanData span, AttributesBuilder builder) {
if (isKeyPresent(span, AWS_REMOTE_SERVICE) || isKeyPresent(span, AWS_REMOTE_OPERATION)) {
setRemoteService(span, builder, AWS_REMOTE_SERVICE);
setRemoteOperation(span, builder, AWS_REMOTE_OPERATION);
} else if (isKeyPresent(span, RPC_SERVICE) || isKeyPresent(span, RPC_METHOD)) {
setRemoteService(span, builder, RPC_SERVICE);
setRemoteOperation(span, builder, RPC_METHOD);
} else if (isKeyPresent(span, DB_SYSTEM) || isKeyPresent(span, DB_OPERATION)) {
setRemoteService(span, builder, DB_SYSTEM);
setRemoteOperation(span, builder, DB_OPERATION);
} else if (isKeyPresent(span, FAAS_INVOKED_PROVIDER) || isKeyPresent(span, FAAS_INVOKED_NAME)) {
setRemoteService(span, builder, FAAS_INVOKED_PROVIDER);
setRemoteOperation(span, builder, FAAS_INVOKED_NAME);
} else if (isKeyPresent(span, MESSAGING_SYSTEM) || isKeyPresent(span, MESSAGING_OPERATION)) {
setRemoteService(span, builder, MESSAGING_SYSTEM);
setRemoteOperation(span, builder, MESSAGING_OPERATION);
} else if (isKeyPresent(span, GRAPHQL_OPERATION_TYPE)) {
builder.put(AWS_REMOTE_SERVICE, GRAPHQL);
setRemoteOperation(span, builder, GRAPHQL_OPERATION_TYPE);
} else {
logUnknownAttribute(AWS_REMOTE_SERVICE, span);
builder.put(AWS_REMOTE_SERVICE, UNKNOWN_REMOTE_SERVICE);
logUnknownAttribute(AWS_REMOTE_OPERATION, span);
builder.put(AWS_REMOTE_OPERATION, UNKNOWN_REMOTE_OPERATION);
}

// Peer service takes priority as RemoteService over everything but AWS Remote.
if (isKeyPresent(span, PEER_SERVICE) && !isKeyPresent(span, AWS_REMOTE_SERVICE)) {
setRemoteService(span, builder, PEER_SERVICE);
Comment on lines +181 to +182
Copy link
Member

@mxiamxia mxiamxia May 12, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a callout but not a blocker - is it expected that we have added the log entry by logUnknownAttribute(AWS_REMOTE_SERVICE, span); but the RemoteService is actually set properly here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea I see what you mean, it's a little bit odd. I think for now it's ok. We plan to revise a bit of this logic in future, and it's not totally unreasonable for the time being. We don't expect PEER_SERVICE to be frequently applied. Will make a note for the future.

}
}

/** Span kind is needed for differentiating metrics in the EMF exporter */
private static void setSpanKind(SpanData span, AttributesBuilder builder) {
String spanKind = span.getKind().name();
builder.put(AWS_SPAN_KIND, spanKind);
}

private static boolean isKeyPresent(SpanData span, AttributeKey<String> key) {
return span.getAttributes().get(key) != null;
}

private static void setRemoteService(
SpanData span, AttributesBuilder builder, AttributeKey<String> remoteServiceKey) {
String remoteService = span.getAttributes().get(remoteServiceKey);
if (remoteService == null) {
logUnknownAttribute(AWS_REMOTE_SERVICE, span);
remoteService = UNKNOWN_REMOTE_SERVICE;
}
builder.put(AWS_REMOTE_SERVICE, remoteService);
}

private static void setRemoteOperation(
SpanData span, AttributesBuilder builder, AttributeKey<String> remoteOperationKey) {
String remoteOperation = span.getAttributes().get(remoteOperationKey);
if (remoteOperation == null) {
logUnknownAttribute(AWS_REMOTE_OPERATION, span);
remoteOperation = UNKNOWN_REMOTE_OPERATION;
}
builder.put(AWS_REMOTE_OPERATION, remoteOperation);
}

private static void logUnknownAttribute(AttributeKey<String> attributeKey, SpanData span) {
String[] params = {
attributeKey.getKey(), span.getKind().name(), span.getSpanContext().getSpanId()
};
logger.log(Level.FINEST, "No valid {0} value found for {1} span {2}", params);
}
}
Loading