From 16098c454751939d9e3587ff458bb652b28e3d79 Mon Sep 17 00:00:00 2001 From: Sergi Romeu Date: Fri, 18 Oct 2024 13:30:19 +0200 Subject: [PATCH] [APM][OTel] Use `telemetry.sdk` as a fallback for missing `agent.name` on non-tracing data (#196529) ## Summary Related to #195854 This PR adds a fallback when we are missing `agent.name` on APM Service Inventory list. Using [OTel semantic convention fields](https://opentelemetry.io/docs/specs/semconv/resource/#telemetry-sdk) `telemetry.sdk.language` and `telemetry.sdk.name` to maintain `agent.name` format for OTel fields like `otlp/${agent}` `opentelemetry/${agent}`. ## Screenshots | Before | After | |-------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------| | ![image](https://github.com/user-attachments/assets/792ca256-f01d-4eae-8a2d-af16fa34ea61) | ![image](https://github.com/user-attachments/assets/2816d1c7-1207-4da8-adb5-ec417b3fd26e) | ![image](https://github.com/user-attachments/assets/27df0ffc-8d5f-475c-ad6b-04086521871b)|![image](https://github.com/user-attachments/assets/d088a746-1375-4918-8e55-d3968a80772d) ## How to test it 1. Open otel-apm-e2e-poc repo 2. Add the following code under `processors -> transform -> metric_statements` in `otelcol.yaml` ```yaml - context: datapoint statements: - set(attributes["processor.event"], "metric") ``` 3. Run `make start-stack` & `make build` & `make run` on otel poc repo 4. Don't forget to run also `make start` on opentelemetry-demo and [follow this guide](https://github.com/elastic/otel-apm-e2e-poc?tab=readme-ov-file#working-with-opentelemetry-demo) to make it work. 5. In Kibana add `elasticsearch.ignoreVersionMismatch: true` to `kibana.dev.yml`. 6. If you go to APM Service Inventory list you will see missing icons. 7. Checkout to my branch 8. Try again, they should be fixed --- packages/kbn-apm-types/src/es_fields/apm.ts | 4 +++ packages/kbn-elastic-agent-utils/README.md | 8 ++++++ packages/kbn-elastic-agent-utils/index.ts | 1 + .../src/agent_guards.test.ts | 13 ++++++++++ .../src/agent_guards.ts | 16 ++++++++++++ .../__snapshots__/es_fields.test.ts.snap | 18 +++++++++++++ .../services/get_service_metadata_details.ts | 26 ++++++++++++++++++- .../services/get_service_metadata_icons.ts | 16 ++++++++++-- .../get_service_transaction_stats.ts | 21 ++++++++++++--- .../get_services_without_transactions.ts | 25 ++++++++++++++++-- 10 files changed, 140 insertions(+), 8 deletions(-) diff --git a/packages/kbn-apm-types/src/es_fields/apm.ts b/packages/kbn-apm-types/src/es_fields/apm.ts index 5d50833161979..7068058024d9d 100644 --- a/packages/kbn-apm-types/src/es_fields/apm.ts +++ b/packages/kbn-apm-types/src/es_fields/apm.ts @@ -190,6 +190,10 @@ export const METRIC_OTEL_JVM_SYSTEM_CPU_PERCENT = 'process.runtime.jvm.system.cp export const METRIC_OTEL_JVM_GC_DURATION = 'process.runtime.jvm.gc.duration'; export const VALUE_OTEL_JVM_PROCESS_MEMORY_HEAP = 'heap'; export const VALUE_OTEL_JVM_PROCESS_MEMORY_NON_HEAP = 'non_heap'; +// OpenTelemetry semconv fields for AgentName https://opentelemetry.io/docs/specs/semconv/resource/#telemetry-sdk +export const TELEMETRY_SDK_NAME = 'telemetry.sdk.name'; +export const TELEMETRY_SDK_LANGUAGE = 'telemetry.sdk.language'; +export const TELEMETRY_SDK_VERSION = 'telemetry.sdk.version'; // Metadata export const TIER = '_tier'; diff --git a/packages/kbn-elastic-agent-utils/README.md b/packages/kbn-elastic-agent-utils/README.md index f5b99bf1b1594..716de6156183d 100644 --- a/packages/kbn-elastic-agent-utils/README.md +++ b/packages/kbn-elastic-agent-utils/README.md @@ -4,6 +4,14 @@ A utility package providing functions for working with Elastic Agents. This pack ## Functions +- **`getAgentName`** + + ```typescript + export function getAgentName(agentName: string | null, telemetryAgentName: string | null, telemetrySdkName: string | null): AgentName; + ``` + + Tries to get the correct agent name, `agentName` is the first option, if it fails checks if `telemetryAgentName` and `telemetrySdkName` are defined and return them as `telemetrySdkName/telemetryAgentName`, if not, `telemetryAgentName` is returned + - **`isOpenTelemetryAgentName`** ```typescript diff --git a/packages/kbn-elastic-agent-utils/index.ts b/packages/kbn-elastic-agent-utils/index.ts index d76095a44a0d3..d92db7cd9489c 100644 --- a/packages/kbn-elastic-agent-utils/index.ts +++ b/packages/kbn-elastic-agent-utils/index.ts @@ -8,6 +8,7 @@ */ export { + getAgentName, isOpenTelemetryAgentName, hasOpenTelemetryPrefix, isJavaAgentName, diff --git a/packages/kbn-elastic-agent-utils/src/agent_guards.test.ts b/packages/kbn-elastic-agent-utils/src/agent_guards.test.ts index 655addbe73c35..1c9360649439b 100644 --- a/packages/kbn-elastic-agent-utils/src/agent_guards.test.ts +++ b/packages/kbn-elastic-agent-utils/src/agent_guards.test.ts @@ -8,6 +8,7 @@ */ import { + getAgentName, hasOpenTelemetryPrefix, isAndroidAgentName, isAWSLambdaAgentName, @@ -119,4 +120,16 @@ describe('Agents guards', () => { expect(isAzureFunctionsAgentName('azure.functions')).toBe(true); expect(isAzureFunctionsAgentName('not-an-agent')).toBe(false); }); + + it('getAgentName returns agent name by default', () => { + expect(getAgentName('nodejs', 'go', 'otlp')).toBe('nodejs'); + }); + + it('getAgentName returns telemetry sdk name and telemetry agent name if agent name is not defined', () => { + expect(getAgentName(null, 'go', 'otlp')).toBe('otlp/go'); + }); + + it('getAgentName returns telemetry agent name if agent name and telemetry sdk are not defined', () => { + expect(getAgentName(null, 'go', null)).toBe('go'); + }); }); diff --git a/packages/kbn-elastic-agent-utils/src/agent_guards.ts b/packages/kbn-elastic-agent-utils/src/agent_guards.ts index 8402582f8d9a3..ea9d112d9630b 100644 --- a/packages/kbn-elastic-agent-utils/src/agent_guards.ts +++ b/packages/kbn-elastic-agent-utils/src/agent_guards.ts @@ -24,6 +24,22 @@ import type { ServerlessType, } from './agent_names'; +export function getAgentName( + agentName: string | null, + telemetryAgentName: string | null, + telemetrySdkName: string | null +) { + if (agentName) { + return agentName; + } + + if (telemetrySdkName && telemetryAgentName) { + return `${telemetrySdkName}/${telemetryAgentName}`; + } + + return telemetryAgentName; +} + export function hasOpenTelemetryPrefix(agentName?: string, language: string = '') { if (!agentName) { return false; diff --git a/x-pack/plugins/observability_solution/apm/common/es_fields/__snapshots__/es_fields.test.ts.snap b/x-pack/plugins/observability_solution/apm/common/es_fields/__snapshots__/es_fields.test.ts.snap index 88d00196e074b..f679686247007 100644 --- a/x-pack/plugins/observability_solution/apm/common/es_fields/__snapshots__/es_fields.test.ts.snap +++ b/x-pack/plugins/observability_solution/apm/common/es_fields/__snapshots__/es_fields.test.ts.snap @@ -324,6 +324,12 @@ exports[`Error SPAN_SYNC 1`] = `undefined`; exports[`Error SPAN_TYPE 1`] = `undefined`; +exports[`Error TELEMETRY_SDK_LANGUAGE 1`] = `undefined`; + +exports[`Error TELEMETRY_SDK_NAME 1`] = `undefined`; + +exports[`Error TELEMETRY_SDK_VERSION 1`] = `undefined`; + exports[`Error TIER 1`] = `undefined`; exports[`Error TIMESTAMP_US 1`] = `1337`; @@ -679,6 +685,12 @@ exports[`Span SPAN_SYNC 1`] = `false`; exports[`Span SPAN_TYPE 1`] = `"span type"`; +exports[`Span TELEMETRY_SDK_LANGUAGE 1`] = `undefined`; + +exports[`Span TELEMETRY_SDK_NAME 1`] = `undefined`; + +exports[`Span TELEMETRY_SDK_VERSION 1`] = `undefined`; + exports[`Span TIER 1`] = `undefined`; exports[`Span TIMESTAMP_US 1`] = `1337`; @@ -1052,6 +1064,12 @@ exports[`Transaction SPAN_SYNC 1`] = `undefined`; exports[`Transaction SPAN_TYPE 1`] = `undefined`; +exports[`Transaction TELEMETRY_SDK_LANGUAGE 1`] = `undefined`; + +exports[`Transaction TELEMETRY_SDK_NAME 1`] = `undefined`; + +exports[`Transaction TELEMETRY_SDK_VERSION 1`] = `undefined`; + exports[`Transaction TIER 1`] = `undefined`; exports[`Transaction TIMESTAMP_US 1`] = `1337`; diff --git a/x-pack/plugins/observability_solution/apm/server/routes/services/get_service_metadata_details.ts b/x-pack/plugins/observability_solution/apm/server/routes/services/get_service_metadata_details.ts index 0319ae66039e5..0c8c456064362 100644 --- a/x-pack/plugins/observability_solution/apm/server/routes/services/get_service_metadata_details.ts +++ b/x-pack/plugins/observability_solution/apm/server/routes/services/get_service_metadata_details.ts @@ -9,6 +9,7 @@ import { rangeQuery } from '@kbn/observability-plugin/server'; import { ProcessorEvent } from '@kbn/observability-plugin/common'; import { unflattenKnownApmEventFields } from '@kbn/apm-data-access-plugin/server/utils'; import { FlattenedApmEvent } from '@kbn/apm-data-access-plugin/server/utils/unflatten_known_fields'; +import { getAgentName } from '@kbn/elastic-agent-utils'; import { environmentQuery } from '../../../common/utils/environment_query'; import { CLOUD_AVAILABILITY_ZONE, @@ -21,6 +22,11 @@ import { SERVICE_VERSION, FAAS_ID, FAAS_TRIGGER_TYPE, + AGENT_NAME, + TELEMETRY_SDK_LANGUAGE, + TELEMETRY_SDK_NAME, + AGENT_VERSION, + TELEMETRY_SDK_VERSION, } from '../../../common/es_fields/apm'; import { ContainerType } from '../../../common/service_metadata'; import { APMEventClient } from '../../lib/helpers/create_es_client/create_apm_event_client'; @@ -158,7 +164,25 @@ export async function getServiceMetadataDetails({ }, }; - const response = await apmEventClient.search('get_service_metadata_details', params); + const data = await apmEventClient.search('get_service_metadata_details', params); + + if (data.hits.total.value === 0) { + return { + service: undefined, + container: undefined, + cloud: undefined, + }; + } + + const response = structuredClone(data); + response.hits.hits[0].fields[AGENT_NAME] = getAgentName( + data.hits.hits[0]?.fields?.[AGENT_NAME] as unknown as string | null, + data.hits.hits[0]?.fields?.[TELEMETRY_SDK_LANGUAGE] as unknown as string | null, + data.hits.hits[0]?.fields?.[TELEMETRY_SDK_NAME] as unknown as string | null + ) as unknown as unknown[]; + response.hits.hits[0].fields[AGENT_VERSION] = + response.hits.hits[0].fields[AGENT_VERSION] ?? + data.hits.hits[0]?.fields?.[TELEMETRY_SDK_VERSION]; const event = unflattenKnownApmEventFields( maybe(response.hits.hits[0])?.fields as undefined | FlattenedApmEvent diff --git a/x-pack/plugins/observability_solution/apm/server/routes/services/get_service_metadata_icons.ts b/x-pack/plugins/observability_solution/apm/server/routes/services/get_service_metadata_icons.ts index ee0a857c9b719..e2a3ffd22f703 100644 --- a/x-pack/plugins/observability_solution/apm/server/routes/services/get_service_metadata_icons.ts +++ b/x-pack/plugins/observability_solution/apm/server/routes/services/get_service_metadata_icons.ts @@ -9,6 +9,7 @@ import { rangeQuery } from '@kbn/observability-plugin/server'; import { ProcessorEvent } from '@kbn/observability-plugin/common'; import { unflattenKnownApmEventFields } from '@kbn/apm-data-access-plugin/server/utils'; import type { FlattenedApmEvent } from '@kbn/apm-data-access-plugin/server/utils/unflatten_known_fields'; +import { getAgentName } from '@kbn/elastic-agent-utils'; import { maybe } from '../../../common/utils/maybe'; import { asMutableArray } from '../../../common/utils/as_mutable_array'; import { @@ -22,6 +23,8 @@ import { LABEL_TELEMETRY_AUTO_VERSION, AGENT_VERSION, SERVICE_FRAMEWORK_NAME, + TELEMETRY_SDK_NAME, + TELEMETRY_SDK_LANGUAGE, } from '../../../common/es_fields/apm'; import { ContainerType, SERVICE_METADATA_KUBERNETES_KEYS } from '../../../common/service_metadata'; import { getProcessorEventForTransactions } from '../../lib/helpers/transactions'; @@ -66,6 +69,8 @@ export async function getServiceMetadataIcons({ CONTAINER_ID, AGENT_NAME, CLOUD_SERVICE_NAME, + TELEMETRY_SDK_NAME, + TELEMETRY_SDK_LANGUAGE, ...SERVICE_METADATA_KUBERNETES_KEYS, ] as const); @@ -85,9 +90,9 @@ export async function getServiceMetadataIcons({ }, }; - const response = await apmEventClient.search('get_service_metadata_icons', params); + const data = await apmEventClient.search('get_service_metadata_icons', params); - if (response.hits.total.value === 0) { + if (data.hits.total.value === 0) { return { agentName: undefined, containerType: undefined, @@ -96,6 +101,13 @@ export async function getServiceMetadataIcons({ }; } + const response = structuredClone(data); + response.hits.hits[0].fields[AGENT_NAME] = getAgentName( + data.hits.hits[0]?.fields?.[AGENT_NAME] as unknown as string | null, + data.hits.hits[0]?.fields?.[TELEMETRY_SDK_LANGUAGE] as unknown as string | null, + data.hits.hits[0]?.fields?.[TELEMETRY_SDK_NAME] as unknown as string | null + ) as unknown as unknown[]; + const event = unflattenKnownApmEventFields( maybe(response.hits.hits[0])?.fields as undefined | FlattenedApmEvent ); diff --git a/x-pack/plugins/observability_solution/apm/server/routes/services/get_services/get_service_transaction_stats.ts b/x-pack/plugins/observability_solution/apm/server/routes/services/get_services/get_service_transaction_stats.ts index 4d0d25edf634d..69f06ee74b535 100644 --- a/x-pack/plugins/observability_solution/apm/server/routes/services/get_services/get_service_transaction_stats.ts +++ b/x-pack/plugins/observability_solution/apm/server/routes/services/get_services/get_service_transaction_stats.ts @@ -6,6 +6,7 @@ */ import { kqlQuery, rangeQuery, wildcardQuery } from '@kbn/observability-plugin/server'; +import { getAgentName } from '@kbn/elastic-agent-utils'; import { ApmDocumentType } from '../../../../common/document_type'; import { AGENT_NAME, @@ -13,6 +14,8 @@ import { SERVICE_NAME, TRANSACTION_TYPE, SERVICE_OVERFLOW_COUNT, + TELEMETRY_SDK_NAME, + TELEMETRY_SDK_LANGUAGE, } from '../../../../common/es_fields/apm'; import { RollupInterval } from '../../../../common/rollup'; import { ServiceGroup } from '../../../../common/service_groups'; @@ -124,6 +127,16 @@ export async function getServiceTransactionStats({ size: maxNumServices, }, aggs: { + telemetryAgentName: { + terms: { + field: TELEMETRY_SDK_LANGUAGE, + }, + }, + telemetrySdkName: { + terms: { + field: TELEMETRY_SDK_NAME, + }, + }, transactionType: { terms: { field: TRANSACTION_TYPE, @@ -169,9 +182,11 @@ export async function getServiceTransactionStats({ topTransactionTypeBucket?.environments.buckets.map( (environmentBucket) => environmentBucket.key as string ) ?? [], - agentName: topTransactionTypeBucket?.sample.top[0].metrics[AGENT_NAME] as - | AgentName - | undefined, + agentName: getAgentName( + topTransactionTypeBucket?.sample.top[0].metrics[AGENT_NAME] as string | null, + bucket.telemetryAgentName.buckets[0]?.key as string | null, + bucket.telemetrySdkName.buckets[0]?.key as string | null + ) as AgentName, latency: topTransactionTypeBucket?.avg_duration.value, transactionErrorRate: topTransactionTypeBucket ? calculateFailedTransactionRate(topTransactionTypeBucket) diff --git a/x-pack/plugins/observability_solution/apm/server/routes/services/get_services/get_services_without_transactions.ts b/x-pack/plugins/observability_solution/apm/server/routes/services/get_services/get_services_without_transactions.ts index 7f694d1c99a16..a966a37a64220 100644 --- a/x-pack/plugins/observability_solution/apm/server/routes/services/get_services/get_services_without_transactions.ts +++ b/x-pack/plugins/observability_solution/apm/server/routes/services/get_services/get_services_without_transactions.ts @@ -7,8 +7,15 @@ import { kqlQuery, rangeQuery, wildcardQuery } from '@kbn/observability-plugin/server'; import { ProcessorEvent } from '@kbn/observability-plugin/common'; +import { getAgentName } from '@kbn/elastic-agent-utils'; import { AgentName } from '../../../../typings/es_schemas/ui/fields/agent'; -import { AGENT_NAME, SERVICE_ENVIRONMENT, SERVICE_NAME } from '../../../../common/es_fields/apm'; +import { + AGENT_NAME, + SERVICE_ENVIRONMENT, + SERVICE_NAME, + TELEMETRY_SDK_LANGUAGE, + TELEMETRY_SDK_NAME, +} from '../../../../common/es_fields/apm'; import { environmentQuery } from '../../../../common/utils/environment_query'; import { ServiceGroup } from '../../../../common/service_groups'; import { RandomSampler } from '../../../lib/helpers/get_random_sampler'; @@ -99,6 +106,16 @@ export async function getServicesWithoutTransactions({ field: SERVICE_ENVIRONMENT, }, }, + telemetryAgentName: { + terms: { + field: TELEMETRY_SDK_LANGUAGE, + }, + }, + telemetrySdkName: { + terms: { + field: TELEMETRY_SDK_NAME, + }, + }, latest: { top_metrics: { metrics: [{ field: AGENT_NAME } as const], @@ -122,7 +139,11 @@ export async function getServicesWithoutTransactions({ return { serviceName: bucket.key as string, environments: bucket.environments.buckets.map((envBucket) => envBucket.key as string), - agentName: bucket.latest.top[0].metrics[AGENT_NAME] as AgentName, + agentName: getAgentName( + bucket.latest.top[0].metrics[AGENT_NAME] as string | null, + bucket.telemetryAgentName.buckets[0]?.key as string | null, + bucket.telemetrySdkName.buckets[0]?.key as string | null + ) as AgentName, }; }) ?? [], maxCountExceeded,