Skip to content

Commit

Permalink
fix: start trace exporter only if required (#8147)
Browse files Browse the repository at this point in the history
This PR splits the collector URL into `_METRICS_ENDPOINT` and
`_TRACES_ENDPOINT` and makes exporting traces optional (if no processor
is registered then the default processor is a no-op)

See
https://www.npmjs.com/package/@opentelemetry/exporter-trace-otlp-http#user-content-configuration-options-as-environment-variables
  • Loading branch information
alexghr authored and spalladino committed Aug 27, 2024
1 parent 56235a5 commit 5eaaac5
Show file tree
Hide file tree
Showing 15 changed files with 59 additions and 45 deletions.
14 changes: 8 additions & 6 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ services:
P2P_ENABLED: true
PEER_ID_PRIVATE_KEY:
AZTEC_PORT: 8999
OTEL_EXPORTER_OTLP_ENDPOINT: ${OTEL_EXPORTER_OTLP_ENDPOINT:-http://otel-collector:4318}
OTEL_EXPORTER_OTLP_METRICS_ENDPOINT: ${OTEL_EXPORTER_OTLP_METRICS_ENDPOINT:-http://otel-collector:4318/v1/metrics}
OTEL_EXPORTER_OTLP_TRACES_ENDPOINT: ${OTEL_EXPORTER_OTLP_TRACES_ENDPOINT:-http://otel-collector:4318/v1/traces}
secrets:
- ethereum-host
- p2p-boot-node
Expand All @@ -77,13 +78,14 @@ services:
# if the stack is started with --profile metrics --profile node, give the collector a chance to start before the node
i=0
max=3
while ! curl --head --silent $$OTEL_EXPORTER_OTLP_ENDPOINT > /dev/null; do
while ! curl --head --silent $$OTEL_EXPORTER_OTLP_METRICS_ENDPOINT > /dev/null; do
echo "OpenTelemetry collector not up. Retrying after 1s";
sleep 1;
i=$$((i+1));
if [ $$i -eq $$max ]; then
echo "OpenTelemetry collector at $$OTEL_EXPORTER_OTLP_ENDPOINT not up after $${max}s. Running without metrics";
unset OTEL_EXPORTER_OTLP_ENDPOINT;
echo "OpenTelemetry collector at $$OTEL_EXPORTER_METRICS_ENDPOINT not up after $${max}s. Running without metrics";
unset OTEL_EXPORTER_METRICS_ENDPOINT;
unset OTEL_EXPORTER_TRACES_ENDPOINT;
break
fi;
done;
Expand Down Expand Up @@ -171,8 +173,8 @@ configs:
prometheus-config:
content: |
global:
evaluation_interval: 30s
scrape_interval: 10s
evaluation_interval: 15s
scrape_interval: 15s
scrape_configs:
- job_name: otel-collector
static_configs:
Expand Down
2 changes: 1 addition & 1 deletion yarn-project/aztec/src/cli/cmds/start_archiver.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ export const startArchiver = async (options: any, signalHandlers: (() => Promise
const store = await createStore(archiverConfig, rollupAddress, storeLog);
const archiverStore = new KVArchiverDataStore(store, archiverConfig.maxLogs);

const telemetry = createAndStartTelemetryClient(getTelemetryClientConfig());
const telemetry = await createAndStartTelemetryClient(getTelemetryClientConfig());
const archiver = await Archiver.createAndSync(archiverConfig, archiverStore, telemetry, true);
const archiverServer = createArchiverRpcServer(archiver);
services.push({ archiver: archiverServer });
Expand Down
2 changes: 1 addition & 1 deletion yarn-project/aztec/src/cli/cmds/start_node.ts
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ export const startNode = async (
}

const telemetryConfig = extractRelevantOptions<TelemetryClientConfig>(options, telemetryClientConfigMappings, 'tel');
const telemetryClient = createAndStartTelemetryClient(telemetryConfig);
const telemetryClient = await createAndStartTelemetryClient(telemetryConfig);

// Create and start Aztec Node.
const node = await createAztecNode(nodeConfig, telemetryClient);
Expand Down
2 changes: 1 addition & 1 deletion yarn-project/aztec/src/cli/cmds/start_prover_agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ export const startProverAgent: ServiceStarter = async (options, signalHandlers,
const source = createProvingJobSourceClient(proverConfig.nodeUrl, 'provingJobSource');

const telemetryConfig = extractRelevantOptions<TelemetryClientConfig>(options, telemetryClientConfigMappings, 'tel');
const telemetry = createAndStartTelemetryClient(telemetryConfig);
const telemetry = await createAndStartTelemetryClient(telemetryConfig);

let circuitProver: ServerCircuitProver;
if (proverConfig.realProofs) {
Expand Down
9 changes: 4 additions & 5 deletions yarn-project/aztec/src/cli/cmds/start_prover_node.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,7 @@ import {
createProverNodeRpcServer,
proverNodeConfigMappings,
} from '@aztec/prover-node';
import {
createAndStartTelemetryClient,
getConfigEnvVars as getTelemetryClientConfig,
} from '@aztec/telemetry-client/start';
import { createAndStartTelemetryClient, telemetryClientConfigMappings } from '@aztec/telemetry-client/start';

import { mnemonicToAccount } from 'viem/accounts';

Expand Down Expand Up @@ -70,7 +67,9 @@ export const startProverNode = async (
proverConfig.l1Contracts = await createAztecNodeClient(nodeUrl).getL1ContractAddresses();
}

const telemetry = createAndStartTelemetryClient(getTelemetryClientConfig());
const telemetry = await createAndStartTelemetryClient(
extractRelevantOptions(options, telemetryClientConfigMappings, 'tel'),
);
const proverNode = await createProverNode(proverConfig, { telemetry });

services.push({ node: createProverNodeRpcServer(proverNode) });
Expand Down
2 changes: 1 addition & 1 deletion yarn-project/aztec/src/sandbox.ts
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ export async function createSandbox(config: Partial<SandboxConfig> = {}) {
await deployContractsToL1(aztecNodeConfig, hdAccount);
}

const client = createAndStartTelemetryClient(getTelemetryClientConfig());
const client = await createAndStartTelemetryClient(getTelemetryClientConfig());
const node = await createAztecNode(aztecNodeConfig, client);
const pxe = await createAztecPXE(node);

Expand Down
4 changes: 2 additions & 2 deletions yarn-project/aztec/terraform/node/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -337,8 +337,8 @@ resource "aws_ecs_task_definition" "aztec-node" {
value = tostring(var.PROVING_ENABLED)
},
{
name = "OTEL_EXPORTER_OTLP_ENDPOINT"
value = "http://aztec-otel.local:4318"
name = "OTEL_EXPORTER_OTLP_METRICS_ENDPOINT"
value = "http://aztec-otel.local:4318/v1/metrics"
},
{
name = "OTEL_SERVICE_NAME"
Expand Down
2 changes: 1 addition & 1 deletion yarn-project/aztec/terraform/prover-node/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ resource "aws_ecs_task_definition" "aztec-prover-node" {
{ name = "PROVER_NODE_MAX_PENDING_JOBS", value = tostring(var.PROVER_NODE_MAX_PENDING_JOBS) },

// Metrics
{ name = "OTEL_EXPORTER_OTLP_ENDPOINT", value = "http://aztec-otel.local:4318" },
{ name = "OTEL_EXPORTER_OTLP_METRICS_ENDPOINT", value = "http://aztec-otel.local:4318/v1/metrics" },
{ name = "OTEL_SERVICE_NAME", value = "${var.DEPLOY_TAG}-aztec-prover-node-${count.index + 1}" },

// L1 addresses
Expand Down
4 changes: 2 additions & 2 deletions yarn-project/aztec/terraform/prover/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -274,8 +274,8 @@ resource "aws_ecs_task_definition" "aztec-proving-agent" {
"value": "${var.PROVING_ENABLED}"
},
{
"name": "OTEL_EXPORTER_OTLP_ENDPOINT",
"value": "http://aztec-otel.local:4318"
"name": "OTEL_EXPORTER_OTLP_METRICS_ENDPOINT",
"value": "http://aztec-otel.local:4318/v1/metrics"
},
{
"name": "OTEL_SERVICE_NAME",
Expand Down
4 changes: 2 additions & 2 deletions yarn-project/end-to-end/src/fixtures/snapshot_manager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ async function setupFromFresh(
aztecNodeConfig.bbWorkingDirectory = bbConfig.bbWorkingDirectory;
}

const telemetry = createAndStartTelemetryClient(getTelemetryConfig());
const telemetry = await createAndStartTelemetryClient(getTelemetryConfig());
logger.verbose('Creating and synching an aztec node...');
const aztecNode = await AztecNodeService.createAndSync(aztecNodeConfig, telemetry);

Expand Down Expand Up @@ -408,7 +408,7 @@ async function setupFromState(statePath: string, logger: Logger): Promise<Subsys
const { publicClient, walletClient } = createL1Clients(aztecNodeConfig.l1RpcUrl, mnemonicToAccount(MNEMONIC));

logger.verbose('Creating aztec node...');
const telemetry = createAndStartTelemetryClient(getTelemetryConfig());
const telemetry = await createAndStartTelemetryClient(getTelemetryConfig());
const aztecNode = await AztecNodeService.createAndSync(aztecNodeConfig, telemetry);

const proverNodePrivateKey = getPrivateKeyFromIndex(2);
Expand Down
6 changes: 4 additions & 2 deletions yarn-project/end-to-end/src/fixtures/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,11 @@ export { deployAndInitializeTokenAndBridgeContracts } from '../shared/cross_chai

const { PXE_URL = '' } = process.env;

const telemetry = createAndStartTelemetryClient(getTelemetryConfig());
const telemetryPromise = createAndStartTelemetryClient(getTelemetryConfig());
if (typeof afterAll === 'function') {
afterAll(async () => {
await telemetry.stop();
const client = await telemetryPromise;
await client.stop();
});
}

Expand Down Expand Up @@ -395,6 +396,7 @@ export async function setup(
}
config.l1PublishRetryIntervalMS = 100;

const telemetry = await telemetryPromise;
const aztecNode = await AztecNodeService.createAndSync(config, telemetry);
const sequencer = aztecNode.getSequencer();

Expand Down
3 changes: 2 additions & 1 deletion yarn-project/foundation/src/config/env_var.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,9 @@ export type EnvVar =
| 'P2P_QUERY_FOR_IP'
| 'P2P_TX_POOL_KEEP_PROVEN_FOR'
| 'TELEMETRY'
| 'OTEL_EXPORTER_OTLP_ENDPOINT'
| 'OTEL_SERVICE_NAME'
| 'OTEL_EXPORTER_OTLP_METRICS_ENDPOINT'
| 'OTEL_EXPORTER_OTLP_TRACES_ENDPOINT'
| 'NETWORK_NAME'
| 'NETWORK'
| 'API_KEY'
Expand Down
14 changes: 10 additions & 4 deletions yarn-project/telemetry-client/src/config.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,21 @@
import { type ConfigMappingsType, getConfigFromMappings } from '@aztec/foundation/config';

export interface TelemetryClientConfig {
collectorBaseUrl?: URL;
metricsCollectorUrl?: URL;
tracesCollectorUrl?: URL;
serviceName: string;
networkName: string;
}

export const telemetryClientConfigMappings: ConfigMappingsType<TelemetryClientConfig> = {
collectorBaseUrl: {
env: 'OTEL_EXPORTER_OTLP_ENDPOINT',
description: 'The URL of the telemetry collector',
metricsCollectorUrl: {
env: 'OTEL_EXPORTER_OTLP_METRICS_ENDPOINT',
description: 'The URL of the telemetry collector for metrics',
parseEnv: (val: string) => new URL(val),
},
tracesCollectorUrl: {
env: 'OTEL_EXPORTER_OTLP_TRACES_ENDPOINT',
description: 'The URL of the telemetry collector for traces',
parseEnv: (val: string) => new URL(val),
},
serviceName: {
Expand Down
30 changes: 17 additions & 13 deletions yarn-project/telemetry-client/src/otel.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,22 +62,19 @@ export class OpenTelemetryClient implements TelemetryClient {
description: 'Target information',
});

if (this.resource.asyncAttributesPending) {
void this.resource.waitForAsyncAttributes!().then(() => {
this.targetInfo!.record(1, this.resource.attributes);
});
} else {
this.targetInfo.record(1, this.resource.attributes);
}

this.targetInfo.record(1, this.resource.attributes);
this.hostMetrics.start();
}

public async stop() {
await Promise.all([this.meterProvider.shutdown()]);
}

public static createAndStart(collectorBaseUrl: URL, log: DebugLogger): OpenTelemetryClient {
public static async createAndStart(
metricsCollector: URL,
tracesCollector: URL | undefined,
log: DebugLogger,
): Promise<OpenTelemetryClient> {
const resource = detectResourcesSync({
detectors: [
osDetectorSync,
Expand All @@ -90,20 +87,27 @@ export class OpenTelemetryClient implements TelemetryClient {
],
});

if (resource.asyncAttributesPending) {
await resource.waitForAsyncAttributes!();
}

const tracerProvider = new NodeTracerProvider({
resource,
});
tracerProvider.addSpanProcessor(
new BatchSpanProcessor(new OTLPTraceExporter({ url: new URL('/v1/traces', collectorBaseUrl).href })),
);

// optionally push traces to an OTEL collector instance
if (tracesCollector) {
tracerProvider.addSpanProcessor(new BatchSpanProcessor(new OTLPTraceExporter({ url: tracesCollector.href })));
}

tracerProvider.register();

const meterProvider = new MeterProvider({
resource,
readers: [
new PeriodicExportingMetricReader({
exporter: new OTLPMetricExporter({
url: new URL('/v1/metrics', collectorBaseUrl).href,
url: metricsCollector.href,
}),
}),
],
Expand Down
6 changes: 3 additions & 3 deletions yarn-project/telemetry-client/src/start.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@ import { type TelemetryClient } from './telemetry.js';

export * from './config.js';

export function createAndStartTelemetryClient(config: TelemetryClientConfig): TelemetryClient {
export async function createAndStartTelemetryClient(config: TelemetryClientConfig): Promise<TelemetryClient> {
const log = createDebugLogger('aztec:telemetry-client');
if (config.collectorBaseUrl) {
if (config.metricsCollectorUrl) {
log.info('Using OpenTelemetry client');
return OpenTelemetryClient.createAndStart(config.collectorBaseUrl, log);
return await OpenTelemetryClient.createAndStart(config.metricsCollectorUrl, config.tracesCollectorUrl, log);
} else {
log.info('Using NoopTelemetryClient');
return new NoopTelemetryClient();
Expand Down

0 comments on commit 5eaaac5

Please sign in to comment.