Skip to content

Commit

Permalink
Add new components to allow for generating metrics from 100% of spans…
Browse files Browse the repository at this point in the history
… without impacting sampling

In this commit, we are adding several new components to the awsxray package:
* AlwaysRecordSampler - A simple aggregate sampler that always ensures spans are "recorded" (i.e. sent to span processors). This allows us to generate metrics from 100% of spans without impacting true sampling rate.
* SpanMetricsProcessor - A span processor that will generate specific metrics pertaining to latency, faults, and errors. Relies on a MetricAttributeGenerator to build attributes for the metrics, and wraps these metric attributes around the span attributes before passing the span to a delegate span processor for further processing/exporting.
* MetricAttributeGenerator - A generic interface for components that consume spans and resources and export attributes for metrics generated from these spans.
* AwsMetricAttributeGenerator - A specific implementation of MetricAttributeGenerator, used for generating AWS-specific attributes.
* SpanMetricsProcessorBuilder - A builder class for SpanMetricsProcessor

Related issue: #789
  • Loading branch information
thpierce committed Mar 30, 2023
1 parent 241d1ab commit 6a49d3f
Show file tree
Hide file tree
Showing 8 changed files with 1,542 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
/*
* Copyright The OpenTelemetry Authors
* SPDX-License-Identifier: Apache-2.0
*/

package io.opentelemetry.contrib.awsxray;

import io.opentelemetry.api.common.Attributes;
import io.opentelemetry.api.trace.SpanKind;
import io.opentelemetry.api.trace.TraceState;
import io.opentelemetry.context.Context;
import io.opentelemetry.sdk.trace.data.LinkData;
import io.opentelemetry.sdk.trace.samplers.Sampler;
import io.opentelemetry.sdk.trace.samplers.SamplingDecision;
import io.opentelemetry.sdk.trace.samplers.SamplingResult;
import java.util.List;
import javax.annotation.concurrent.Immutable;

/**
* This sampler will return the sampling result of the provided {@link #rootSampler}, unless the
* sampling result contains the sampling decision {@link SamplingDecision#DROP}, in which case, a
* new sampling result will be returned that is functionally equivalent to the original, except that
* it contains the sampling decision {@link SamplingDecision#RECORD_ONLY}. This ensures that all
* spans are recorded, with no change to sampling.
*
* <p>The intended use case of this sampler is to provide a means of sending all spans to a
* processor without having an impact on the sampling rate. This may be desirable if a user wishes
* to count or otherwise measure all spans produced in an application, without incurring the cost of
* 100% sampling.
*/
@Immutable
public final class AlwaysRecordSampler implements Sampler {

private final Sampler rootSampler;

public static AlwaysRecordSampler create(Sampler rootSampler) {
return new AlwaysRecordSampler(rootSampler);
}

private AlwaysRecordSampler(Sampler rootSampler) {
this.rootSampler = rootSampler;
}

@Override
public SamplingResult shouldSample(
Context parentContext,
String traceId,
String name,
SpanKind spanKind,
Attributes attributes,
List<LinkData> parentLinks) {
SamplingResult result =
rootSampler.shouldSample(parentContext, traceId, name, spanKind, attributes, parentLinks);
if (result.getDecision() == SamplingDecision.DROP) {
result = wrapResultWithRecordOnlyResult(result);
}

return result;
}

@Override
public String getDescription() {
return "AlwaysRecordSampler{" + rootSampler.getDescription() + "}";
}

private static SamplingResult wrapResultWithRecordOnlyResult(SamplingResult result) {
return new SamplingResult() {
@Override
public SamplingDecision getDecision() {
return SamplingDecision.RECORD_ONLY;
}

@Override
public Attributes getAttributes() {
return result.getAttributes();
}

@Override
public TraceState getUpdatedTraceState(TraceState parentTraceState) {
return result.getUpdatedTraceState(parentTraceState);
}
};
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
/*
* Copyright The OpenTelemetry Authors
* SPDX-License-Identifier: Apache-2.0
*/

package io.opentelemetry.contrib.awsxray;

import static io.opentelemetry.contrib.awsxray.AwsAttributeKeys.AWS_LOCAL_OPERATION;
import static io.opentelemetry.contrib.awsxray.AwsAttributeKeys.AWS_REMOTE_APPLICATION;
import static io.opentelemetry.contrib.awsxray.AwsAttributeKeys.AWS_REMOTE_OPERATION;
import static io.opentelemetry.semconv.resource.attributes.ResourceAttributes.SERVICE_NAME;
import static io.opentelemetry.semconv.trace.attributes.SemanticAttributes.DB_OPERATION;
import static io.opentelemetry.semconv.trace.attributes.SemanticAttributes.DB_SYSTEM;
import static io.opentelemetry.semconv.trace.attributes.SemanticAttributes.FAAS_INVOKED_NAME;
import static io.opentelemetry.semconv.trace.attributes.SemanticAttributes.FAAS_INVOKED_PROVIDER;
import static io.opentelemetry.semconv.trace.attributes.SemanticAttributes.GRAPHQL_OPERATION_TYPE;
import static io.opentelemetry.semconv.trace.attributes.SemanticAttributes.MESSAGING_OPERATION;
import static io.opentelemetry.semconv.trace.attributes.SemanticAttributes.MESSAGING_SYSTEM;
import static io.opentelemetry.semconv.trace.attributes.SemanticAttributes.PEER_SERVICE;
import static io.opentelemetry.semconv.trace.attributes.SemanticAttributes.RPC_METHOD;
import static io.opentelemetry.semconv.trace.attributes.SemanticAttributes.RPC_SERVICE;

import io.opentelemetry.api.common.AttributeKey;
import io.opentelemetry.api.common.Attributes;
import io.opentelemetry.api.common.AttributesBuilder;
import io.opentelemetry.api.trace.SpanKind;
import io.opentelemetry.sdk.resources.Resource;
import io.opentelemetry.sdk.trace.ReadableSpan;
import io.opentelemetry.semconv.resource.attributes.ResourceAttributes;
import io.opentelemetry.semconv.trace.attributes.SemanticAttributes;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
* AwsMetricAttributeGenerator generates very specific metric attributes based on low-cardinality
* span and resource attributes. If such attributes are not present, we fallback to default values.
*
* <p>The goal of these particular metric attributes is to get metrics for incoming and outgoing
* traffic for an application. Namely, {@link SpanKind#SERVER} amd {@link SpanKind#CONSUMER} spans
* represent "incoming" traffic, {@link SpanKind#CLIENT} and {@link SpanKind#PRODUCER} spans
* represent "outgoing" traffic, and {@link SpanKind#INTERNAL} spans are ignored.
*/
final class AwsMetricAttributeGenerator implements MetricAttributeGenerator {

private static final Logger logger =
Logger.getLogger(AwsMetricAttributeGenerator.class.getName());

// Generated metric attribute keys
private static final AttributeKey<String> APPLICATION = AttributeKey.stringKey("Application");
private static final AttributeKey<String> OPERATION = AttributeKey.stringKey("Operation");
private static final AttributeKey<String> REMOTE_APPLICATION =
AttributeKey.stringKey("RemoteApplication");
private static final AttributeKey<String> REMOTE_OPERATION =
AttributeKey.stringKey("RemoteOperation");
private static final AttributeKey<String> SPAN_KIND = AttributeKey.stringKey("span.kind");

// Special APPLICATION attribute value if GRAPHQL_OPERATION_TYPE attribute key is present.
private static final String GRAPHQL = "graphql";

// Default attribute values if no valid span attribute value is identified
private static final String UNKNOWN_APPLICATION = "UnknownApplication";
private static final String UNKNOWN_OPERATION = "UnknownOperation";
private static final String UNKNOWN_REMOTE_APPLICATION = "UnknownRemoteApplication";
private static final String UNKNOWN_REMOTE_OPERATION = "UnknownRemoteOperation";

@Override
public Attributes generateMetricAttributesFromSpan(ReadableSpan span, Resource resource) {
AttributesBuilder builder = Attributes.builder();
switch (span.getKind()) {
case CONSUMER:
case SERVER:
setApplication(resource, span, builder);
setIngressOperation(span, builder);
setSpanKind(span, builder);
break;
case PRODUCER:
case CLIENT:
setApplication(resource, span, builder);
setEgressOperation(span, builder);
setRemoteApplicationAndOperation(span, builder);
setSpanKind(span, builder);
break;
default:
// Add no attributes, signalling no metrics should be emitted.
}
return builder.build();
}

/** Application is always derived from {@link ResourceAttributes#SERVICE_NAME} */
private static void setApplication(
Resource resource, ReadableSpan span, AttributesBuilder builder) {
String application = resource.getAttribute(SERVICE_NAME);
if (application == null) {
logUnknownAttribute(APPLICATION, span);
application = UNKNOWN_APPLICATION;
}
builder.put(APPLICATION, application);
}

/**
* Ingress operation (i.e. operation for Server and Consumer spans) is always derived from span
* name.
*/
private static void setIngressOperation(ReadableSpan span, AttributesBuilder builder) {
String operation = span.getName();
if (operation == null) {
logUnknownAttribute(OPERATION, span);
operation = UNKNOWN_OPERATION;
}
builder.put(OPERATION, operation);
}

/**
* Egress operation (i.e. operation for Client and Producer spans) is always derived from a
* special span attribute, {@link AwsAttributeKeys#AWS_LOCAL_OPERATION}. This attribute is
* generated with a separate SpanProcessor, {@link LocalAttributesSpanProcessor}
*/
private static void setEgressOperation(ReadableSpan span, AttributesBuilder builder) {
String operation = span.getAttribute(AWS_LOCAL_OPERATION);
if (operation == null) {
logUnknownAttribute(OPERATION, span);
operation = UNKNOWN_OPERATION;
}
builder.put(OPERATION, operation);
}

/**
* Remote attributes (only for Client and Producer spans) are generated based on low-cardinality
* span attributes, in priority order.
*
* <p>The first priority is the AWS Remote attributes, which are generated from manually
* instrumented span attributes, and are clear indications of customer intent. If AWS Remote
* attributes are not present, the next highest priority span attribute is Peer Service, which is
* also a reliable indicator of customer intent. If this is set, it will override {@link
* #REMOTE_APPLICATION} identified from any other span attribute, other than AWS Remote
* attributes.
*
* <p>After this, we look for the following low-c∂ardinality span attributes that can be used to
* determine the remote metric attributes:
*
* <ul>
* <li>RPC
* <li>DB
* <li>FAAS
* <li>Messaging
* <li>GraphQL - Special case, if {@link SemanticAttributes#GRAPHQL_OPERATION_TYPE} is present,
* we use it for RemoteOperation and set RemoteApplication to {@link #GRAPHQL}.
* </ul>
*
* <p>In each case, these span attributes were selected from the OpenTelemetry trace semantic
* convention specifications as they adhere to the three following criteria:
*
* <ul>
* <li>Attributes are meaningfully indicative of remote service/operation names.
* <li>Attributes are defined in the specification to be low cardinality, usually with a low-
* cardinality list of values.
* <li>Attributes are confirmed to have low-cardinality values, based on code analysis.
* </ul>
*/
private static void setRemoteApplicationAndOperation(
ReadableSpan span, AttributesBuilder builder) {
if (isKeyPresent(span, AWS_REMOTE_APPLICATION) || isKeyPresent(span, AWS_REMOTE_OPERATION)) {
setRemoteApplication(span, builder, AWS_REMOTE_APPLICATION);
setRemoteOperation(span, builder, AWS_REMOTE_OPERATION);
} else if (isKeyPresent(span, RPC_SERVICE) || isKeyPresent(span, RPC_METHOD)) {
setRemoteApplication(span, builder, RPC_SERVICE);
setRemoteOperation(span, builder, RPC_METHOD);
} else if (isKeyPresent(span, DB_SYSTEM) || isKeyPresent(span, DB_OPERATION)) {
setRemoteApplication(span, builder, DB_SYSTEM);
setRemoteOperation(span, builder, DB_OPERATION);
} else if (isKeyPresent(span, FAAS_INVOKED_PROVIDER) || isKeyPresent(span, FAAS_INVOKED_NAME)) {
setRemoteApplication(span, builder, FAAS_INVOKED_PROVIDER);
setRemoteOperation(span, builder, FAAS_INVOKED_NAME);
} else if (isKeyPresent(span, MESSAGING_SYSTEM) || isKeyPresent(span, MESSAGING_OPERATION)) {
setRemoteApplication(span, builder, MESSAGING_SYSTEM);
setRemoteOperation(span, builder, MESSAGING_OPERATION);
} else if (isKeyPresent(span, GRAPHQL_OPERATION_TYPE)) {
builder.put(REMOTE_APPLICATION, GRAPHQL);
setRemoteOperation(span, builder, GRAPHQL_OPERATION_TYPE);
} else {
logUnknownAttribute(REMOTE_APPLICATION, span);
builder.put(REMOTE_APPLICATION, UNKNOWN_REMOTE_APPLICATION);
logUnknownAttribute(REMOTE_OPERATION, span);
builder.put(REMOTE_OPERATION, UNKNOWN_REMOTE_OPERATION);
}

// Peer service takes priority as RemoteApplication over everything but AWS Remote.
if (isKeyPresent(span, PEER_SERVICE) && !isKeyPresent(span, AWS_REMOTE_APPLICATION)) {
setRemoteApplication(span, builder, PEER_SERVICE);
}
}

/** Span kind is needed for differentiating metrics in the EMF exporter */
private static void setSpanKind(ReadableSpan span, AttributesBuilder builder) {
String spanKind = span.getKind().name();
builder.put(SPAN_KIND, spanKind);
}

private static boolean isKeyPresent(ReadableSpan span, AttributeKey<String> key) {
return span.getAttribute(key) != null;
}

private static void setRemoteApplication(
ReadableSpan span, AttributesBuilder builder, AttributeKey<String> remoteApplicationKey) {
String remoteApplication = span.getAttribute(remoteApplicationKey);
if (remoteApplication == null) {
logUnknownAttribute(REMOTE_APPLICATION, span);
remoteApplication = UNKNOWN_REMOTE_APPLICATION;
}
builder.put(REMOTE_APPLICATION, remoteApplication);
}

private static void setRemoteOperation(
ReadableSpan span, AttributesBuilder builder, AttributeKey<String> remoteOperationKey) {
String remoteOperation = span.getAttribute(remoteOperationKey);
if (remoteOperation == null) {
logUnknownAttribute(REMOTE_OPERATION, span);
remoteOperation = UNKNOWN_REMOTE_OPERATION;
}
builder.put(REMOTE_OPERATION, remoteOperation);
}

private static void logUnknownAttribute(AttributeKey<String> attributeKey, ReadableSpan span) {
String[] params = {
attributeKey.getKey(), span.getKind().name(), span.getSpanContext().getSpanId()
};
logger.log(Level.FINEST, "No valid {0} value found for {1} span {2}", params);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/*
* Copyright The OpenTelemetry Authors
* SPDX-License-Identifier: Apache-2.0
*/

package io.opentelemetry.contrib.awsxray;

import io.opentelemetry.api.common.Attributes;
import io.opentelemetry.sdk.resources.Resource;
import io.opentelemetry.sdk.trace.ReadableSpan;

/**
* Metric attribute generator defines an interface for classes that can generate specific attributes
* to be used by a {@link SpanMetricsProcessor} to produce metrics and wrap the original span.
*/
public interface MetricAttributeGenerator {

/**
* Given a span and associated resource, produce meaningful metric attributes for metrics produced
* from the span. If no metrics should be generated from this span, return {@link
* Attributes#empty()}.
*
* @param span - Span to be used to generate metric attributes.
* @param resource - Resource associated with Span to be used to generate metric attributes.
* @return A set of zero or more attributes. Must not return null.
*/
Attributes generateMetricAttributesFromSpan(ReadableSpan span, Resource resource);
}
Loading

0 comments on commit 6a49d3f

Please sign in to comment.