diff --git a/CHANGELOG.md b/CHANGELOG.md index d460b325b2..3d9c58db47 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ - (Splunk) Deprecate collectd/mysql monitor. Use the [mysql receiver](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/mysqlreceiver) instead. ([#5538](https://github.com/signalfx/splunk-otel-collector/pull/5538)) - (Splunk) Deprecate the collectd/nginx monitor. Please use the [nginx receiver](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/receiver/nginxreceiver/) instead. ([#5537](https://github.com/signalfx/splunk-otel-collector/pull/5537)) - (Splunk) Deprecate the collectd/chrony monitor. Please use the [chronyreceiver](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/chronyreceiver) instead. ([#5536](https://github.com/signalfx/splunk-otel-collector/pull/5536)) +- (Splunk) Deprecate the ecs-metadata monitor ([#5541](https://github.com/signalfx/splunk-otel-collector/pull/5541)) ### 🚀 New components 🚀 diff --git a/cmd/otelcol/config/collector/ecs_ec2_config.yaml b/cmd/otelcol/config/collector/ecs_ec2_config.yaml index 9ed9b3a800..fdc7c30421 100644 --- a/cmd/otelcol/config/collector/ecs_ec2_config.yaml +++ b/cmd/otelcol/config/collector/ecs_ec2_config.yaml @@ -4,9 +4,6 @@ config_sources: env: defaults: METRICS_TO_EXCLUDE: [] - ECS_METADATA_EXCLUDED_IMAGES: [] - ECS_TASK_METADATA_ENDPOINT: "${ECS_CONTAINER_METADATA_URI_V4}/task" - ECS_TASK_STATS_ENDPOINT: "${ECS_CONTAINER_METADATA_URI_V4}/task/stats" extensions: health_check: @@ -28,7 +25,8 @@ receivers: scrapers: cpu: disk: - filesystem: + # Filesystem metrics are not working OOTB, see https://splunk.atlassian.net/browse/OTL-3075. + # filesystem: memory: network: # System load average metrics https://en.wikipedia.org/wiki/Load_(computing) @@ -77,12 +75,7 @@ receivers: #access_token_passthrough: true zipkin: endpoint: 0.0.0.0:9411 - smartagent/ecs-metadata: - type: ecs-metadata - metadataEndpoint: "${env:ECS_TASK_METADATA_ENDPOINT}" - statsEndpoint: "${env:ECS_TASK_STATS_ENDPOINT}" - excludedImages: ${env:ECS_METADATA_EXCLUDED_IMAGES} - + awsecscontainermetrics: processors: batch: metadata_keys: @@ -94,17 +87,12 @@ processors: memory_limiter: check_interval: 2s limit_mib: ${SPLUNK_MEMORY_LIMIT_MIB} - # detect if the collector is running on a cloud system - # important for creating unique cloud provider dimensions resourcedetection: detectors: [ecs] override: false - # Same as above but overrides resource attributes set by receivers resourcedetection/internal: detectors: [ecs] override: true - # Defines the filter processor with example settings - # Full configuration here: https://github.com/open-telemetry/opentelemetry-collector/tree/main/processor/filterprocessor filter: metrics: exclude: @@ -154,7 +142,7 @@ service: #- resource/add_environment exporters: [sapm, signalfx] metrics: - receivers: [hostmetrics, otlp, signalfx, smartagent/ecs-metadata] + receivers: [hostmetrics, otlp, signalfx, awsecscontainermetrics] processors: [memory_limiter, batch, filter, resourcedetection] exporters: [signalfx] metrics/internal: diff --git a/cmd/otelcol/config/collector/fargate_config.yaml b/cmd/otelcol/config/collector/fargate_config.yaml index e796a70713..5c08ce6d74 100644 --- a/cmd/otelcol/config/collector/fargate_config.yaml +++ b/cmd/otelcol/config/collector/fargate_config.yaml @@ -4,7 +4,6 @@ config_sources: env: defaults: METRICS_TO_EXCLUDE: [] - ECS_METADATA_EXCLUDED_IMAGES: [] extensions: health_check: @@ -56,9 +55,7 @@ receivers: #access_token_passthrough: true zipkin: endpoint: 0.0.0.0:9411 - smartagent/ecs-metadata: - type: ecs-metadata - excludedImages: ${env:ECS_METADATA_EXCLUDED_IMAGES} + awsecscontainermetrics: processors: batch: @@ -128,7 +125,7 @@ service: #- resource/add_environment exporters: [sapm, signalfx] metrics: - receivers: [otlp, signalfx, smartagent/ecs-metadata, prometheus/internal] + receivers: [otlp, signalfx, awsecscontainermetrics, prometheus/internal] processors: [memory_limiter, batch, resourcedetection] exporters: [signalfx] logs: diff --git a/cmd/otelcol/fips/config/ecs_ec2_config.yaml b/cmd/otelcol/fips/config/ecs_ec2_config.yaml index 76101349d4..989f1fca61 100644 --- a/cmd/otelcol/fips/config/ecs_ec2_config.yaml +++ b/cmd/otelcol/fips/config/ecs_ec2_config.yaml @@ -4,9 +4,6 @@ config_sources: env: defaults: METRICS_TO_EXCLUDE: [] - ECS_METADATA_EXCLUDED_IMAGES: [] - ECS_TASK_METADATA_ENDPOINT: "${ECS_CONTAINER_METADATA_URI_V4}/task" - ECS_TASK_STATS_ENDPOINT: "${ECS_CONTAINER_METADATA_URI_V4}/task/stats" extensions: health_check: diff --git a/cmd/otelcol/fips/config/fargate_config.yaml b/cmd/otelcol/fips/config/fargate_config.yaml index 45207b9b8a..0608a63dce 100644 --- a/cmd/otelcol/fips/config/fargate_config.yaml +++ b/cmd/otelcol/fips/config/fargate_config.yaml @@ -4,7 +4,6 @@ config_sources: env: defaults: METRICS_TO_EXCLUDE: [] - ECS_METADATA_EXCLUDED_IMAGES: [] extensions: health_check: diff --git a/deployments/ecs/ec2/README.md b/deployments/ecs/ec2/README.md index 3149c2965f..21e400eca8 100644 --- a/deployments/ecs/ec2/README.md +++ b/deployments/ecs/ec2/README.md @@ -1,16 +1,20 @@ # Amazon ECS EC2 Deployment -Familiarity with Amazon ECS using launch type EC2 is assumed. Consult the + +Familiarity with Amazon ECS using launch type EC2 is assumed. Consult the [Getting started with the Amazon ECS console using Amazon EC2](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/getting-started-ecs-ec2.html) for further reading. The [Splunk OpenTelemetry Collector](https://github.com/signalfx/splunk-otel-collector) -(Collector) can be run as a Daemon service in an ECS cluster with EC2 launch type. +(Collector) can be run as a Daemon service, if using the `ecs_observer`, +or as a Sidecar. Requires Collector release v0.34.1 or newer which corresponds to image tag 0.34.1 and newer. See image repository [here](https://quay.io/repository/signalfx/splunk-otel-collector?tab=tags). ## Getting Started + ### Create Task Definition + Take the task definition JSON for the Collector [here](./splunk-otel-collector.json), replace `MY_SPLUNK_ACCESS_TOKEN` and `MY_SPLUNK_REALM` with valid values. We recommend pinning the [image version](https://github.com/signalfx/splunk-otel-collector/blob/main/deployments/ecs/ec2/splunk-otel-collector.json#L56) to a specific version instead of latest to avoid upgrade issues. Use the JSON to create a task definition of **EC2 launch type** following @@ -22,31 +26,13 @@ The Collector is configured to use the default configuration file `/etc/otel/col The Collector image Dockerfile is available [here](../../../cmd/otelcol/Dockerfile) and the contents of the default configuration file can be seen [here](../../../cmd/otelcol/config/collector/ecs_ec2_config.yaml). -The suggested configured network mode for the task is **host**. This means that **task metadata -endpoint version 2** used by receiver `smartagent/ecs-metadata` is not enabled by default. See +See [here](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-metadata-endpoint.html) to determine if **task metadata endpoint version 4** is enabled by default for your task. If so the default configuration for ECS with EC2 launch type already uses it to form the metadata and stats endpoints -for the receiver to query to generate metrics and no task definition change is required. If you're using an alternate -task stats or metadata endpoint, configure them via the `ECS_TASK_METADATA_ENDPOINT` and `ECS_TASK_STATS_ENDPOINT` -environment variables list in your container definition as necessary: - -```json -"environment": [ -... - { - "name": "ECS_TASK_METADATA_ENDPOINT", - "value": "/task" - }, - { - "name": "ECS_TASK_STATS_ENDPOINT", - "value": "/task/stats" - }, -... -] -``` +for the receiver to query to generate metrics and no task definition change is required. -**Note**: You do not need the `smartagent/ecs-metadata` metrics receiver in the default +**Note**: You do not need the `awsecscontainermetrics` metrics receiver in the default configuration file if all you want is tracing or logs. You can take the default configuration, remove the receiver, then use the configuration in a custom configuration following the direction in the [custom configuration](#custom-configuration) section. @@ -68,26 +54,8 @@ The default configuration includes a filter processor that allows you to specify You can set the memory limit for the memory limiter processor using environment variable `SPLUNK_MEMORY_LIMIT_MIB`. For more information about the memory limiter processor, see [its documentation](https://github.com/open-telemetry/opentelemetry-collector/blob/main/processor/memorylimiterprocessor/README.md). -### Launch the Collector -The Collector is designed to be run as a Daemon service in an EC2 ECS cluster. - -To create a Collector service from the Amazon ECS console: - -Go to your cluster in the console -1. Click on the "Services" tab. -2. Click "Create" at the top of the tab. -3. Select: - - Launch Type -> EC2 - - Task Definition (Family) -> splunk-otel-collector - - Task Definition (Revision) -> 1 (or whatever the latest is in your case) - - Service Name -> splunk-otel-collector - - Service type -> DAEMON -4. Leave everything else at default and click "Next step" -5. Leave everything on this next page at their defaults and click "Next step". -6. Leave everything on this next page at their defaults and click "Next step". -7. Click "Create Service" and the collector should be deployed onto each node in the ECS cluster. You should see infrastructure and docker metrics flowing soon. - ## Custom Configuration + To use a custom configuration file, replace the value of environment variable `SPLUNK_CONFIG` with the file path of the custom configuration file in Collector task definition. @@ -95,14 +63,16 @@ task definition. Alternatively, you can specify the custom configuration YAML directly using environment variable `SPLUNK_CONFIG_YAML` as describe [below](#direct-configuration). -### ecs_observer +### Using the ecs_observer + Use extension [Amazon Elastic Container Service Observer](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/extension/observer/ecsobserver#amazon-elastic-container-service-observer) (`ecs_observer`) in your custom configuration to discover metrics targets in running tasks, filtered by service names, task definitions and container labels. `ecs_observer` is currently limited to Prometheus targets and requires the read-only -permissions below. You can add the permissions to the task role by adding them to a +permissions below. You can add the permissions to the task role by adding them to a customer-managed policy that is attached to the task role. + ```text ecs:List* ecs:Describe* @@ -154,7 +124,29 @@ service: exporters: [signalfx] ``` +### Launch the Collector as a Daemon service + +When running the `ecs_observer` the Collector needs to run as a Daemon service in an EC2 ECS cluster. + +To create a Daemon service from the Amazon ECS console: + +Go to your cluster in the console + +1. Click on the "Services" tab. +2. Click "Create" at the top of the tab. +3. Select: + - Launch Type -> EC2 + - Task Definition (Family) -> splunk-otel-collector + - Task Definition (Revision) -> 1 (or whatever the latest is in your case) + - Service Name -> splunk-otel-collector + - Service type -> DAEMON +4. Leave everything else at default and click "Next step" +5. Leave everything on this next page at their defaults and click "Next step". +6. Leave everything on this next page at their defaults and click "Next step". +7. Click "Create Service" and the collector should be deployed onto each node in the ECS cluster. You should see infrastructure and docker metrics flowing soon. + ### Direct Configuration + The Collector provides environment variable `SPLUNK_CONFIG_YAML` for specifying the configuration YAML directly which can be used instead of `SPLUNK_CONFIG`. diff --git a/deployments/ecs/ec2/splunk-otel-collector.json b/deployments/ecs/ec2/splunk-otel-collector.json index f87049389c..7abd42ceee 100644 --- a/deployments/ecs/ec2/splunk-otel-collector.json +++ b/deployments/ecs/ec2/splunk-otel-collector.json @@ -15,10 +15,6 @@ "name": "SPLUNK_CONFIG", "value": "/etc/otel/collector/ecs_ec2_config.yaml" }, - { - "name": "ECS_METADATA_EXCLUDED_IMAGES", - "value": "[\"quay.io/signalfx/splunk-otel-collector\"]" - }, { "name": "HOST_PROC", "value": "/hostfs/proc" @@ -49,6 +45,11 @@ "readOnly": true, "containerPath": "/hostfs", "sourceVolume": "hostfs" + }, + { + "readOnly": true, + "containerPath": "/rootfs", + "sourceVolume": "hostfs" } ], "memory": 512, diff --git a/deployments/fargate/README.md b/deployments/fargate/README.md index 176c3e89ec..4dd9f548c3 100644 --- a/deployments/fargate/README.md +++ b/deployments/fargate/README.md @@ -1,4 +1,5 @@ # AWS Fargate Deployment + Familiarity with AWS Fargate (Fargate) is assumed. Consult the [User Guide for AWS Fargate](https://docs.aws.amazon.com/AmazonECS/latest/userguide/what-is-fargate.html) for further reading. @@ -11,10 +12,12 @@ Requires Collector release v0.33.0 or newer which corresponds to image tag 0.33. See image repository [here](https://quay.io/repository/signalfx/splunk-otel-collector?tab=tags). ## Getting Started + Copy the default Collector container definition JSON below. Replace `MY_SPLUNK_ACCESS_TOKEN` and `MY_SPLUNK_REALM` with valid values. Update the image tag to the newest version then add the JSON to the `containerDefinitions` section of your task definition JSON. + ```json { "environment": [ @@ -29,10 +32,6 @@ JSON. { "name": "SPLUNK_CONFIG", "value": "/etc/otel/collector/fargate_config.yaml" - }, - { - "name": "ECS_METADATA_EXCLUDED_IMAGES", - "value": "[\"quay.io/signalfx/splunk-otel-collector:*\"]" } ], "image": "quay.io/signalfx/splunk-otel-collector:0.33.0", @@ -40,18 +39,19 @@ JSON. "name": "splunk_otel_collector" } ``` + In the above container definition the Collector is configured to use the default configuration file `/etc/otel/collector/fargate_config.yaml`. The Collector image Dockerfile is available [here](../../cmd/otelcol/Dockerfile) and the contents of the default configuration file can be seen [here](../../cmd/otelcol/config/collector/fargate_config.yaml). -Note that the receiver `smartagent/ecs-metadata` is enabled by default. +Note that the receiver `awsecscontainermetrics` is enabled by default. In summary, the default Collector container definition does the following: + - Specifies the Collector image. - Sets the access token using environment variable `SPLUNK_ACCESS_TOKEN`. - Sets the realm using environment variable `SPLUNK_REALM`. - Sets the default configuration file path using environment variable `SPLUNK_CONFIG`. -- Excludes `ecs-metadata` metrics from the Collector image using environment variable `ECS_METADATA_EXCLUDED_IMAGES`. Assign a stringified array of metrics you want excluded to environment variable `METRICS_TO_EXCLUDE`. You can set the memory limit for the memory limiter processor using @@ -60,11 +60,13 @@ more information about the memory limiter processor, see [here](https://github.com/open-telemetry/opentelemetry-collector/blob/main/processor/memorylimiterprocessor/README.md) ## Custom Configuration + The example below shows an excerpt of the container definition JSON for the Collector configured to use custom configuration file `/path/to/custom/config/file`. `/path/to/custom/config/file` is a placeholder value for the actual custom configuration file path and `0.33.0` is the latest image tag at present. The custom configuration file should be present in a volume attached to the task. + ```json { "environment": [ @@ -78,7 +80,9 @@ should be present in a volume attached to the task. "name": "splunk_otel_collector" } ``` + The custom Collector container definition essentially: + - Specifies the Collector image. - Sets environment variable `SPLUNK_CONFIG` with the custom configuration file path. @@ -86,6 +90,7 @@ Alternatively, you can specify the custom configuration YAML directly using envi variable `SPLUNK_CONFIG_YAML` as describe [below](#direct-configuration). ### ecs_observer + Use extension [Amazon Elastic Container Service Observer](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/extension/observer/ecsobserver#amazon-elastic-container-service-observer) (`ecs_observer`) in your custom configuration to discover metrics targets @@ -93,6 +98,7 @@ in running tasks, filtered by service names, task definitions and container labe `ecs_observer` is currently limited to Prometheus targets and requires the read-only permissions below. You can add the permissions to the task role by adding them to a customer-managed policy that is attached to the task role. + ```text ecs:List* ecs:Describe* @@ -144,6 +150,7 @@ service: processors: [batch, resourcedetection] exporters: [signalfx] ``` + **Note:** The task ARN pattern in the configuration example above will cause `ecs_observer` to discover targets in running revisions of task `lorem-ipsum-task`. This means that when multiple revisions of task `lorem-ipsum-task` are running, the @@ -160,6 +167,7 @@ task ARN pattern must be updated to keep pace with task revisions. ``` ### Direct Configuration + In Fargate the filesystem is not readily available. This makes specifying the configuration YAML directly instead of using a file more convenient. The Collector provides environment variable `SPLUNK_CONFIG_YAML` for specifying the configuration YAML directly which can be @@ -200,6 +208,7 @@ placeholder values and image tag `0.33.0` is the latest at present. the task to have read access to the Parameter Store. ### Standalone Task + Extension `ecs_observer` is capable of scanning for targets in the entire cluster. This allows you to collect telemetry data by deploying the Collector in a task that is separate from tasks containing monitored applications. This is in contrast to the sidecar deployment @@ -210,4 +219,5 @@ processor for the standalone task since it would detect resources in the standal task itself as opposed to resources in the tasks containing the monitored applications. ### AWS Graviton2 + We support [AWS Graviton2](https://aws.amazon.com/ec2/graviton/) with the default Fargate configuration. Splunk OpenTelemetry Collector docker image can run on both amd64 and arm64 architectures. diff --git a/internal/signalfx-agent/pkg/monitors/ecs/ecs.go b/internal/signalfx-agent/pkg/monitors/ecs/ecs.go index 70e6355fe3..1f731a4247 100644 --- a/internal/signalfx-agent/pkg/monitors/ecs/ecs.go +++ b/internal/signalfx-agent/pkg/monitors/ecs/ecs.go @@ -86,6 +86,7 @@ type Monitor struct { // Configure the monitor and kick off volume metric syncing func (m *Monitor) Configure(conf *Config) error { m.logger = logger.WithField("monitorID", conf.MonitorID) + m.logger.Warn("[NOTICE] The ecs-metadata monitor is deprecated and will be removed in a future release. Use the awsecscontainermetrics receiver instead.") var err error m.imageFilter, err = filter.NewOverridableStringFilter(conf.ExcludedImages) if err != nil { diff --git a/internal/signalfx-agent/pkg/monitors/ecs/metadata.yaml b/internal/signalfx-agent/pkg/monitors/ecs/metadata.yaml index e76dfb34bc..6e6356d279 100644 --- a/internal/signalfx-agent/pkg/monitors/ecs/metadata.yaml +++ b/internal/signalfx-agent/pkg/monitors/ecs/metadata.yaml @@ -1,6 +1,8 @@ monitors: - dimensions: doc: | + The ecs-metadata monitor is deprecated and will be removed in a future release. Use the awsecscontainermetrics receiver instead. + This monitor reads container stats from a [ECS Task Metadata Endpoint version 2](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-metadata-endpoint-v2.html).