From b939ff3013a230f964fe14b56e9c5e3204ab3909 Mon Sep 17 00:00:00 2001 From: "opensearch-trigger-bot[bot]" <98922864+opensearch-trigger-bot[bot]@users.noreply.github.com> Date: Tue, 18 Jul 2023 16:07:48 -0700 Subject: [PATCH] Add S3 sink documentation (#4340) (#4584) * Add S3 sink documentation * Update _data-prepper/pipelines/configuration/sinks/s3.md * Update _data-prepper/pipelines/configuration/sinks/s3.md * Update _data-prepper/pipelines/configuration/sinks/s3.md * Apply suggestions from code review * Apply suggestions from code review --------- (cherry picked from commit 0fcfc4862e3de511f8cb19b2037917cddbeadafe) Signed-off-by: Naarcha-AWS Signed-off-by: Naarcha-AWS <97990722+Naarcha-AWS@users.noreply.github.com> Signed-off-by: github-actions[bot] Co-authored-by: github-actions[bot] Co-authored-by: Chris Moore <107723039+cwillum@users.noreply.github.com> Co-authored-by: Nathan Bower --- .../pipelines/configuration/sinks/file.md | 22 +++--- .../configuration/sinks/opensearch.md | 6 +- .../pipelines/configuration/sinks/pipeline.md | 25 ++++--- .../pipelines/configuration/sinks/s3.md | 70 +++++++++++++++++++ .../pipelines/configuration/sinks/stdout.md | 10 +-- 5 files changed, 103 insertions(+), 30 deletions(-) create mode 100644 _data-prepper/pipelines/configuration/sinks/s3.md diff --git a/_data-prepper/pipelines/configuration/sinks/file.md b/_data-prepper/pipelines/configuration/sinks/file.md index 05b2dd6ff1..74af5a1803 100644 --- a/_data-prepper/pipelines/configuration/sinks/file.md +++ b/_data-prepper/pipelines/configuration/sinks/file.md @@ -1,25 +1,31 @@ --- layout: default -title: file sink +title: file parent: Sinks grand_parent: Pipelines nav_order: 45 --- -# file sink +# file -## Overview +Use the `file` sink to create a flat file output, usually a `.log` file. -You can use the `file` sink to create a flat file output. The following table describes options you can configure for the `file` sink. +## Configuration options + +The following table describes options you can configure for the `file` sink. Option | Required | Type | Description :--- | :--- | :--- | :--- path | Yes | String | Path for the output file (e.g. `logs/my-transformed-log.log`). - \ No newline at end of file diff --git a/_data-prepper/pipelines/configuration/sinks/opensearch.md b/_data-prepper/pipelines/configuration/sinks/opensearch.md index 81ebe0dbc4..472169a221 100644 --- a/_data-prepper/pipelines/configuration/sinks/opensearch.md +++ b/_data-prepper/pipelines/configuration/sinks/opensearch.md @@ -1,12 +1,12 @@ --- layout: default -title: OpenSearch sink +title: opensearch parent: Sinks grand_parent: Pipelines -nav_order: 45 +nav_order: 50 --- -# OpenSearch sink +# opensearch You can use the `opensearch` sink plugin to send data to an OpenSearch cluster, a legacy Elasticsearch cluster, or an Amazon OpenSearch Service domain. diff --git a/_data-prepper/pipelines/configuration/sinks/pipeline.md b/_data-prepper/pipelines/configuration/sinks/pipeline.md index 614a9c4efb..3cba75a220 100644 --- a/_data-prepper/pipelines/configuration/sinks/pipeline.md +++ b/_data-prepper/pipelines/configuration/sinks/pipeline.md @@ -1,25 +1,30 @@ --- layout: default -title: Pipeline sink +title: pipeline parent: Sinks grand_parent: Pipelines -nav_order: 45 +nav_order: 55 --- -# Pipeline sink +# pipeline -## Overview +Use the `pipeline` sink to write to another pipeline. -You can use the `pipeline` sink to write to another pipeline. +## Configuration options + +The `pipeline` sink supports the following configuration options. Option | Required | Type | Description :--- | :--- | :--- | :--- name | Yes | String | Name of the pipeline to write to. - \ No newline at end of file +``` +sample-pipeline: + sink: + - pipeline: + name: movies +``` diff --git a/_data-prepper/pipelines/configuration/sinks/s3.md b/_data-prepper/pipelines/configuration/sinks/s3.md new file mode 100644 index 0000000000..8114f76d50 --- /dev/null +++ b/_data-prepper/pipelines/configuration/sinks/s3.md @@ -0,0 +1,70 @@ +--- +layout: default +title: s3 +parent: Sinks +grand_parent: Pipelines +nav_order: 55 +--- + +# s3 + +The `s3` sink sends records to an Amazon Simple Storage Service (Amazon S3) bucket using the S3 client. + +## Usage + +The following example creates a pipeline configured with an s3 sink. It contains additional options for customizing the event and size thresholds for which the pipeline sends record events and sets the codec type `ndjson`: + +``` +pipeline: + ... + sink: + - s3: + aws: + region: us-east-1 + sts_role_arn: arn:aws:iam::123456789012:role/Data-Prepper + sts_header_overrides: + max_retries: 5 + bucket: + name: bucket_name + object_key: + path_prefix: my-elb/%{yyyy}/%{MM}/%{dd}/ + threshold: + event_count: 2000 + maximum_size: 50mb + event_collect_timeout: 15s + codec: + ndjson: + buffer_type: in_memory +``` + +## Configuration + +Use the following options when customizing the `s3` sink. + +Option | Required | Type | Description +:--- | :--- | :--- | :--- +`bucket` | Yes | String | The object from which the data is retrieved and then stored. The `name` must match the name of your object store. +`region` | No | String | The AWS Region to use when connecting to S3. Defaults to the [standard SDK behavior to determine the Region](https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/region-selection.html). +`sts_role_arn` | No | String | The [AWS Security Token Service](https://docs.aws.amazon.com/STS/latest/APIReference/welcome.html) (AWS STS) role that the `s3` sink assumes when sending a request to S3. Defaults to the [standard SDK behavior for credentials](https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/credentials.html). +`sts_external_id` | No | String | The external ID to attach to AssumeRole requests from AWS STS. +`max_retries` | No | Integer | The maximum number of times a single request should retry when ingesting data to S3. Defaults to `5`. +`object_key` | No | Sets the `path_prefix` and the `file_pattern` of the object store. Defaults to the S3 object `events-%{yyyy-MM-dd'T'hh-mm-ss}` found inside the root directory of the bucket. + +## Threshold configuration options + +Use the following options to set ingestion thresholds for the `s3` sink. + +Option | Required | Type | Description +:--- | :--- | :--- | :--- +`event_count` | Yes | Integer | The maximum number of events the S3 bucket can ingest. +`maximum_size` | Yes | String | The maximum count or number of bytes that the S3 bucket can ingest. Defaults to `50mb`. +`event_collect_timeout` | Yes | String | Sets the time period during which events are collected before ingestion. All values are strings that represent duration, either an ISO_8601 notation string, such as `PT20.345S`, or a simple notation, such as `60s` or `1500ms`. + +## buffer_type + +`buffer_type` is an optional configuration that records stored events temporarily before flushing them into an S3 bucket. Use of one of the following options: + +- `local_file`: Flushes the record into a file on your machine. +- `in_memory`: Stores the record in memory. + + \ No newline at end of file diff --git a/_data-prepper/pipelines/configuration/sinks/stdout.md b/_data-prepper/pipelines/configuration/sinks/stdout.md index 7b55cb0a10..35b1b08126 100644 --- a/_data-prepper/pipelines/configuration/sinks/stdout.md +++ b/_data-prepper/pipelines/configuration/sinks/stdout.md @@ -8,12 +8,4 @@ nav_order: 45 # stdout sink -## Overview - -You can use the `stdout` sink for console output and testing. It has no configurable options. - - \ No newline at end of file +Use the `stdout` sink for console output and testing. It has no configurable options.