From 4e63106a43b3f16d46baa03552a355b9d4ab3e7a Mon Sep 17 00:00:00 2001 From: Alex Gherghisan Date: Fri, 23 Aug 2024 15:20:43 +0100 Subject: [PATCH] fix: start trace exporter only if required (#8147) This PR splits the collector URL into `_METRICS_ENDPOINT` and `_TRACES_ENDPOINT` and makes exporting traces optional (if no processor is registered then the default processor is a no-op) See https://www.npmjs.com/package/@opentelemetry/exporter-trace-otlp-http#user-content-configuration-options-as-environment-variables --- docker-compose.yml | 14 +++++---- .../aztec/src/cli/cmds/start_archiver.ts | 2 +- yarn-project/aztec/src/cli/cmds/start_node.ts | 2 +- .../aztec/src/cli/cmds/start_prover_agent.ts | 2 +- .../aztec/src/cli/cmds/start_prover_node.ts | 9 +++--- yarn-project/aztec/src/sandbox.ts | 2 +- yarn-project/aztec/terraform/node/main.tf | 4 +-- .../aztec/terraform/prover-node/main.tf | 2 +- yarn-project/aztec/terraform/prover/main.tf | 4 +-- .../src/fixtures/snapshot_manager.ts | 4 +-- yarn-project/end-to-end/src/fixtures/utils.ts | 6 ++-- yarn-project/foundation/src/config/env_var.ts | 3 +- yarn-project/telemetry-client/src/config.ts | 14 ++++++--- yarn-project/telemetry-client/src/otel.ts | 30 +++++++++++-------- yarn-project/telemetry-client/src/start.ts | 6 ++-- 15 files changed, 59 insertions(+), 45 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 44b53fbdb29..2e44e214ba7 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -62,7 +62,8 @@ services: P2P_ENABLED: true PEER_ID_PRIVATE_KEY: AZTEC_PORT: 8999 - OTEL_EXPORTER_OTLP_ENDPOINT: ${OTEL_EXPORTER_OTLP_ENDPOINT:-http://otel-collector:4318} + OTEL_EXPORTER_OTLP_METRICS_ENDPOINT: ${OTEL_EXPORTER_OTLP_METRICS_ENDPOINT:-http://otel-collector:4318/v1/metrics} + OTEL_EXPORTER_OTLP_TRACES_ENDPOINT: ${OTEL_EXPORTER_OTLP_TRACES_ENDPOINT:-http://otel-collector:4318/v1/traces} secrets: - ethereum-host - p2p-boot-node @@ -77,13 +78,14 @@ services: # if the stack is started with --profile metrics --profile node, give the collector a chance to start before the node i=0 max=3 - while ! curl --head --silent $$OTEL_EXPORTER_OTLP_ENDPOINT > /dev/null; do + while ! curl --head --silent $$OTEL_EXPORTER_OTLP_METRICS_ENDPOINT > /dev/null; do echo "OpenTelemetry collector not up. Retrying after 1s"; sleep 1; i=$$((i+1)); if [ $$i -eq $$max ]; then - echo "OpenTelemetry collector at $$OTEL_EXPORTER_OTLP_ENDPOINT not up after $${max}s. Running without metrics"; - unset OTEL_EXPORTER_OTLP_ENDPOINT; + echo "OpenTelemetry collector at $$OTEL_EXPORTER_METRICS_ENDPOINT not up after $${max}s. Running without metrics"; + unset OTEL_EXPORTER_METRICS_ENDPOINT; + unset OTEL_EXPORTER_TRACES_ENDPOINT; break fi; done; @@ -171,8 +173,8 @@ configs: prometheus-config: content: | global: - evaluation_interval: 30s - scrape_interval: 10s + evaluation_interval: 15s + scrape_interval: 15s scrape_configs: - job_name: otel-collector static_configs: diff --git a/yarn-project/aztec/src/cli/cmds/start_archiver.ts b/yarn-project/aztec/src/cli/cmds/start_archiver.ts index 95ad83b9972..a8675164ae1 100644 --- a/yarn-project/aztec/src/cli/cmds/start_archiver.ts +++ b/yarn-project/aztec/src/cli/cmds/start_archiver.ts @@ -25,7 +25,7 @@ export const startArchiver = async (options: any, signalHandlers: (() => Promise const store = await createStore(archiverConfig, rollupAddress, storeLog); const archiverStore = new KVArchiverDataStore(store, archiverConfig.maxLogs); - const telemetry = createAndStartTelemetryClient(getTelemetryClientConfig()); + const telemetry = await createAndStartTelemetryClient(getTelemetryClientConfig()); const archiver = await Archiver.createAndSync(archiverConfig, archiverStore, telemetry, true); const archiverServer = createArchiverRpcServer(archiver); services.push({ archiver: archiverServer }); diff --git a/yarn-project/aztec/src/cli/cmds/start_node.ts b/yarn-project/aztec/src/cli/cmds/start_node.ts index b4279d7b2d7..c6f9814bc07 100644 --- a/yarn-project/aztec/src/cli/cmds/start_node.ts +++ b/yarn-project/aztec/src/cli/cmds/start_node.ts @@ -91,7 +91,7 @@ export const startNode = async ( } const telemetryConfig = extractRelevantOptions(options, telemetryClientConfigMappings, 'tel'); - const telemetryClient = createAndStartTelemetryClient(telemetryConfig); + const telemetryClient = await createAndStartTelemetryClient(telemetryConfig); // Create and start Aztec Node. const node = await createAztecNode(nodeConfig, telemetryClient); diff --git a/yarn-project/aztec/src/cli/cmds/start_prover_agent.ts b/yarn-project/aztec/src/cli/cmds/start_prover_agent.ts index f6c73503816..0a6534fae54 100644 --- a/yarn-project/aztec/src/cli/cmds/start_prover_agent.ts +++ b/yarn-project/aztec/src/cli/cmds/start_prover_agent.ts @@ -21,7 +21,7 @@ export const startProverAgent: ServiceStarter = async (options, signalHandlers, const source = createProvingJobSourceClient(proverConfig.nodeUrl, 'provingJobSource'); const telemetryConfig = extractRelevantOptions(options, telemetryClientConfigMappings, 'tel'); - const telemetry = createAndStartTelemetryClient(telemetryConfig); + const telemetry = await createAndStartTelemetryClient(telemetryConfig); let circuitProver: ServerCircuitProver; if (proverConfig.realProofs) { diff --git a/yarn-project/aztec/src/cli/cmds/start_prover_node.ts b/yarn-project/aztec/src/cli/cmds/start_prover_node.ts index 1c22e3d7308..21d8a54f3f9 100644 --- a/yarn-project/aztec/src/cli/cmds/start_prover_node.ts +++ b/yarn-project/aztec/src/cli/cmds/start_prover_node.ts @@ -9,10 +9,7 @@ import { createProverNodeRpcServer, proverNodeConfigMappings, } from '@aztec/prover-node'; -import { - createAndStartTelemetryClient, - getConfigEnvVars as getTelemetryClientConfig, -} from '@aztec/telemetry-client/start'; +import { createAndStartTelemetryClient, telemetryClientConfigMappings } from '@aztec/telemetry-client/start'; import { mnemonicToAccount } from 'viem/accounts'; @@ -70,7 +67,9 @@ export const startProverNode = async ( proverConfig.l1Contracts = await createAztecNodeClient(nodeUrl).getL1ContractAddresses(); } - const telemetry = createAndStartTelemetryClient(getTelemetryClientConfig()); + const telemetry = await createAndStartTelemetryClient( + extractRelevantOptions(options, telemetryClientConfigMappings, 'tel'), + ); const proverNode = await createProverNode(proverConfig, { telemetry }); services.push({ node: createProverNodeRpcServer(proverNode) }); diff --git a/yarn-project/aztec/src/sandbox.ts b/yarn-project/aztec/src/sandbox.ts index c3f18cd4c60..c50bae06142 100644 --- a/yarn-project/aztec/src/sandbox.ts +++ b/yarn-project/aztec/src/sandbox.ts @@ -170,7 +170,7 @@ export async function createSandbox(config: Partial = {}) { await deployContractsToL1(aztecNodeConfig, hdAccount); } - const client = createAndStartTelemetryClient(getTelemetryClientConfig()); + const client = await createAndStartTelemetryClient(getTelemetryClientConfig()); const node = await createAztecNode(aztecNodeConfig, client); const pxe = await createAztecPXE(node); diff --git a/yarn-project/aztec/terraform/node/main.tf b/yarn-project/aztec/terraform/node/main.tf index 4a963fa174d..ef2638d1bbe 100644 --- a/yarn-project/aztec/terraform/node/main.tf +++ b/yarn-project/aztec/terraform/node/main.tf @@ -337,8 +337,8 @@ resource "aws_ecs_task_definition" "aztec-node" { value = tostring(var.PROVING_ENABLED) }, { - name = "OTEL_EXPORTER_OTLP_ENDPOINT" - value = "http://aztec-otel.local:4318" + name = "OTEL_EXPORTER_OTLP_METRICS_ENDPOINT" + value = "http://aztec-otel.local:4318/v1/metrics" }, { name = "OTEL_SERVICE_NAME" diff --git a/yarn-project/aztec/terraform/prover-node/main.tf b/yarn-project/aztec/terraform/prover-node/main.tf index 5cca8711717..e499eacb050 100644 --- a/yarn-project/aztec/terraform/prover-node/main.tf +++ b/yarn-project/aztec/terraform/prover-node/main.tf @@ -202,7 +202,7 @@ resource "aws_ecs_task_definition" "aztec-prover-node" { { name = "PROVER_NODE_MAX_PENDING_JOBS", value = tostring(var.PROVER_NODE_MAX_PENDING_JOBS) }, // Metrics - { name = "OTEL_EXPORTER_OTLP_ENDPOINT", value = "http://aztec-otel.local:4318" }, + { name = "OTEL_EXPORTER_OTLP_METRICS_ENDPOINT", value = "http://aztec-otel.local:4318/v1/metrics" }, { name = "OTEL_SERVICE_NAME", value = "${var.DEPLOY_TAG}-aztec-prover-node-${count.index + 1}" }, // L1 addresses diff --git a/yarn-project/aztec/terraform/prover/main.tf b/yarn-project/aztec/terraform/prover/main.tf index a288f13d8bf..924aae3e6ea 100644 --- a/yarn-project/aztec/terraform/prover/main.tf +++ b/yarn-project/aztec/terraform/prover/main.tf @@ -274,8 +274,8 @@ resource "aws_ecs_task_definition" "aztec-proving-agent" { "value": "${var.PROVING_ENABLED}" }, { - "name": "OTEL_EXPORTER_OTLP_ENDPOINT", - "value": "http://aztec-otel.local:4318" + "name": "OTEL_EXPORTER_OTLP_METRICS_ENDPOINT", + "value": "http://aztec-otel.local:4318/v1/metrics" }, { "name": "OTEL_SERVICE_NAME", diff --git a/yarn-project/end-to-end/src/fixtures/snapshot_manager.ts b/yarn-project/end-to-end/src/fixtures/snapshot_manager.ts index 287effeb7c3..c57f8fe542c 100644 --- a/yarn-project/end-to-end/src/fixtures/snapshot_manager.ts +++ b/yarn-project/end-to-end/src/fixtures/snapshot_manager.ts @@ -325,7 +325,7 @@ async function setupFromFresh( aztecNodeConfig.bbWorkingDirectory = bbConfig.bbWorkingDirectory; } - const telemetry = createAndStartTelemetryClient(getTelemetryConfig()); + const telemetry = await createAndStartTelemetryClient(getTelemetryConfig()); logger.verbose('Creating and synching an aztec node...'); const aztecNode = await AztecNodeService.createAndSync(aztecNodeConfig, telemetry); @@ -408,7 +408,7 @@ async function setupFromState(statePath: string, logger: Logger): Promise { - await telemetry.stop(); + const client = await telemetryPromise; + await client.stop(); }); } @@ -395,6 +396,7 @@ export async function setup( } config.l1PublishRetryIntervalMS = 100; + const telemetry = await telemetryPromise; const aztecNode = await AztecNodeService.createAndSync(config, telemetry); const sequencer = aztecNode.getSequencer(); diff --git a/yarn-project/foundation/src/config/env_var.ts b/yarn-project/foundation/src/config/env_var.ts index fdb6394eb30..2d9a5029351 100644 --- a/yarn-project/foundation/src/config/env_var.ts +++ b/yarn-project/foundation/src/config/env_var.ts @@ -37,8 +37,9 @@ export type EnvVar = | 'P2P_QUERY_FOR_IP' | 'P2P_TX_POOL_KEEP_PROVEN_FOR' | 'TELEMETRY' - | 'OTEL_EXPORTER_OTLP_ENDPOINT' | 'OTEL_SERVICE_NAME' + | 'OTEL_EXPORTER_OTLP_METRICS_ENDPOINT' + | 'OTEL_EXPORTER_OTLP_TRACES_ENDPOINT' | 'NETWORK_NAME' | 'NETWORK' | 'API_KEY' diff --git a/yarn-project/telemetry-client/src/config.ts b/yarn-project/telemetry-client/src/config.ts index 7a11f4e1a8a..c7789ba05bc 100644 --- a/yarn-project/telemetry-client/src/config.ts +++ b/yarn-project/telemetry-client/src/config.ts @@ -1,15 +1,21 @@ import { type ConfigMappingsType, getConfigFromMappings } from '@aztec/foundation/config'; export interface TelemetryClientConfig { - collectorBaseUrl?: URL; + metricsCollectorUrl?: URL; + tracesCollectorUrl?: URL; serviceName: string; networkName: string; } export const telemetryClientConfigMappings: ConfigMappingsType = { - collectorBaseUrl: { - env: 'OTEL_EXPORTER_OTLP_ENDPOINT', - description: 'The URL of the telemetry collector', + metricsCollectorUrl: { + env: 'OTEL_EXPORTER_OTLP_METRICS_ENDPOINT', + description: 'The URL of the telemetry collector for metrics', + parseEnv: (val: string) => new URL(val), + }, + tracesCollectorUrl: { + env: 'OTEL_EXPORTER_OTLP_TRACES_ENDPOINT', + description: 'The URL of the telemetry collector for traces', parseEnv: (val: string) => new URL(val), }, serviceName: { diff --git a/yarn-project/telemetry-client/src/otel.ts b/yarn-project/telemetry-client/src/otel.ts index 0a95433dc2b..d46a3c70ac4 100644 --- a/yarn-project/telemetry-client/src/otel.ts +++ b/yarn-project/telemetry-client/src/otel.ts @@ -62,14 +62,7 @@ export class OpenTelemetryClient implements TelemetryClient { description: 'Target information', }); - if (this.resource.asyncAttributesPending) { - void this.resource.waitForAsyncAttributes!().then(() => { - this.targetInfo!.record(1, this.resource.attributes); - }); - } else { - this.targetInfo.record(1, this.resource.attributes); - } - + this.targetInfo.record(1, this.resource.attributes); this.hostMetrics.start(); } @@ -77,7 +70,11 @@ export class OpenTelemetryClient implements TelemetryClient { await Promise.all([this.meterProvider.shutdown()]); } - public static createAndStart(collectorBaseUrl: URL, log: DebugLogger): OpenTelemetryClient { + public static async createAndStart( + metricsCollector: URL, + tracesCollector: URL | undefined, + log: DebugLogger, + ): Promise { const resource = detectResourcesSync({ detectors: [ osDetectorSync, @@ -90,12 +87,19 @@ export class OpenTelemetryClient implements TelemetryClient { ], }); + if (resource.asyncAttributesPending) { + await resource.waitForAsyncAttributes!(); + } + const tracerProvider = new NodeTracerProvider({ resource, }); - tracerProvider.addSpanProcessor( - new BatchSpanProcessor(new OTLPTraceExporter({ url: new URL('/v1/traces', collectorBaseUrl).href })), - ); + + // optionally push traces to an OTEL collector instance + if (tracesCollector) { + tracerProvider.addSpanProcessor(new BatchSpanProcessor(new OTLPTraceExporter({ url: tracesCollector.href }))); + } + tracerProvider.register(); const meterProvider = new MeterProvider({ @@ -103,7 +107,7 @@ export class OpenTelemetryClient implements TelemetryClient { readers: [ new PeriodicExportingMetricReader({ exporter: new OTLPMetricExporter({ - url: new URL('/v1/metrics', collectorBaseUrl).href, + url: metricsCollector.href, }), }), ], diff --git a/yarn-project/telemetry-client/src/start.ts b/yarn-project/telemetry-client/src/start.ts index 5425107e7ee..eb07a4a431b 100644 --- a/yarn-project/telemetry-client/src/start.ts +++ b/yarn-project/telemetry-client/src/start.ts @@ -7,11 +7,11 @@ import { type TelemetryClient } from './telemetry.js'; export * from './config.js'; -export function createAndStartTelemetryClient(config: TelemetryClientConfig): TelemetryClient { +export async function createAndStartTelemetryClient(config: TelemetryClientConfig): Promise { const log = createDebugLogger('aztec:telemetry-client'); - if (config.collectorBaseUrl) { + if (config.metricsCollectorUrl) { log.info('Using OpenTelemetry client'); - return OpenTelemetryClient.createAndStart(config.collectorBaseUrl, log); + return await OpenTelemetryClient.createAndStart(config.metricsCollectorUrl, config.tracesCollectorUrl, log); } else { log.info('Using NoopTelemetryClient'); return new NoopTelemetryClient();