From 203c48bae361f2509de1e800ee9da0bd604769e5 Mon Sep 17 00:00:00 2001 From: Andrew Tulloch Date: Tue, 1 Oct 2024 19:48:52 +0100 Subject: [PATCH 1/3] Bedrock Agent custom transformation support --- .../service/bedrockagent/bedrockagent_test.go | 15 +- internal/service/bedrockagent/data_source.go | 131 +++++++++++++++++- .../service/bedrockagent/data_source_test.go | 116 ++++++++++++++++ .../r/bedrockagent_data_source.html.markdown | 33 +++++ 4 files changed, 286 insertions(+), 9 deletions(-) diff --git a/internal/service/bedrockagent/bedrockagent_test.go b/internal/service/bedrockagent/bedrockagent_test.go index bf838634fad5..f9a5413a39b4 100644 --- a/internal/service/bedrockagent/bedrockagent_test.go +++ b/internal/service/bedrockagent/bedrockagent_test.go @@ -21,13 +21,14 @@ func TestAccBedrockAgent_serial(t *testing.T) { "updateOpenSearch": testAccKnowledgeBase_updateOpenSearch, }, "DataSource": { - acctest.CtBasic: testAccDataSource_basic, - acctest.CtDisappears: testAccDataSource_disappears, - "full": testAccDataSource_full, - "update": testAccDataSource_update, - "semantic": testAccDataSource_fullSemantic, - "hierarchical": testAccDataSource_fullHierarchical, - "parsing": testAccDataSource_parsing, + acctest.CtBasic: testAccDataSource_basic, + acctest.CtDisappears: testAccDataSource_disappears, + "full": testAccDataSource_full, + "update": testAccDataSource_update, + "semantic": testAccDataSource_fullSemantic, + "hierarchical": testAccDataSource_fullHierarchical, + "parsing": testAccDataSource_parsing, + "customtransformation": testAccDataSource_fullCustomTranformation, }, } diff --git a/internal/service/bedrockagent/data_source.go b/internal/service/bedrockagent/data_source.go index c98ce0a65ee7..c2b4204dafc8 100644 --- a/internal/service/bedrockagent/data_source.go +++ b/internal/service/bedrockagent/data_source.go @@ -307,6 +307,106 @@ func (r *dataSourceResource) Schema(ctx context.Context, request resource.Schema }, }, }, + "custom_transformation_configuration": schema.ListNestedBlock{ + CustomType: fwtypes.NewListNestedObjectTypeOf[customTransformationConfigurationModel](ctx), + PlanModifiers: []planmodifier.List{ + listplanmodifier.RequiresReplace(), + }, + Validators: []validator.List{ + listvalidator.SizeAtMost(1), + }, + NestedObject: schema.NestedBlockObject{ + Blocks: map[string]schema.Block{ + "intermediate_storage": schema.ListNestedBlock{ + CustomType: fwtypes.NewListNestedObjectTypeOf[intermediaStorageModel](ctx), + PlanModifiers: []planmodifier.List{ + listplanmodifier.RequiresReplace(), + }, + Validators: []validator.List{ + listvalidator.SizeAtMost(1), + }, + NestedObject: schema.NestedBlockObject{ + Blocks: map[string]schema.Block{ + "s3_location": schema.ListNestedBlock{ + CustomType: fwtypes.NewListNestedObjectTypeOf[s3LocationModel](ctx), + PlanModifiers: []planmodifier.List{ + listplanmodifier.RequiresReplace(), + }, + Validators: []validator.List{ + listvalidator.SizeAtMost(1), + }, + NestedObject: schema.NestedBlockObject{ + Attributes: map[string]schema.Attribute{ + "uri": schema.StringAttribute{ + Required: true, + PlanModifiers: []planmodifier.String{ + stringplanmodifier.RequiresReplace(), + }, + }, + }, + }, + }, + }, + }, + }, + "transformation": schema.ListNestedBlock{ + CustomType: fwtypes.NewListNestedObjectTypeOf[transformationModel](ctx), + PlanModifiers: []planmodifier.List{ + listplanmodifier.RequiresReplace(), + }, + Validators: []validator.List{ + listvalidator.SizeAtMost(1), + }, + NestedObject: schema.NestedBlockObject{ + Attributes: map[string]schema.Attribute{ + "step_to_apply": schema.StringAttribute{ + CustomType: fwtypes.StringEnumType[awstypes.StepType](), + Required: true, + PlanModifiers: []planmodifier.String{ + stringplanmodifier.RequiresReplace(), + }, + }, + }, + Blocks: map[string]schema.Block{ + "transformation_function": schema.ListNestedBlock{ + CustomType: fwtypes.NewListNestedObjectTypeOf[transformationFunctionModel](ctx), + PlanModifiers: []planmodifier.List{ + listplanmodifier.RequiresReplace(), + }, + Validators: []validator.List{ + listvalidator.SizeAtMost(1), + }, + NestedObject: schema.NestedBlockObject{ + Blocks: map[string]schema.Block{ + "transformation_lambda_configuration": schema.ListNestedBlock{ + CustomType: fwtypes.NewListNestedObjectTypeOf[transformationLambdaConfigurationModel](ctx), + PlanModifiers: []planmodifier.List{ + listplanmodifier.RequiresReplace(), + }, + Validators: []validator.List{ + listvalidator.SizeAtMost(1), + }, + NestedObject: schema.NestedBlockObject{ + Attributes: map[string]schema.Attribute{ + "lambda_arn": schema.StringAttribute{ + CustomType: fwtypes.ARNType, + Required: true, + PlanModifiers: []planmodifier.String{ + stringplanmodifier.RequiresReplace(), + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, "parsing_configuration": schema.ListNestedBlock{ CustomType: fwtypes.NewListNestedObjectTypeOf[parsingConfigurationModel](ctx), PlanModifiers: []planmodifier.List{ @@ -657,8 +757,9 @@ type serverSideEncryptionConfigurationModel struct { } type vectorIngestionConfigurationModel struct { - ChunkingConfiguration fwtypes.ListNestedObjectValueOf[chunkingConfigurationModel] `tfsdk:"chunking_configuration"` - ParsingConfiguration fwtypes.ListNestedObjectValueOf[parsingConfigurationModel] `tfsdk:"parsing_configuration"` + ChunkingConfiguration fwtypes.ListNestedObjectValueOf[chunkingConfigurationModel] `tfsdk:"chunking_configuration"` + CustomTransformationConfiguration fwtypes.ListNestedObjectValueOf[customTransformationConfigurationModel] `tfsdk:"custom_transformation_configuration"` + ParsingConfiguration fwtypes.ListNestedObjectValueOf[parsingConfigurationModel] `tfsdk:"parsing_configuration"` } type parsingConfigurationModel struct { @@ -666,6 +767,32 @@ type parsingConfigurationModel struct { BedrockFoundationModelConfiguration fwtypes.ListNestedObjectValueOf[bedrockFoundationModelConfigurationModel] `tfsdk:"bedrock_foundation_model_configuration"` } +type customTransformationConfigurationModel struct { + IntermediateStorage fwtypes.ListNestedObjectValueOf[intermediaStorageModel] `tfsdk:"intermediate_storage"` + Transformation fwtypes.ListNestedObjectValueOf[transformationModel] `tfsdk:"transformation"` +} + +type intermediaStorageModel struct { + S3Location fwtypes.ListNestedObjectValueOf[s3LocationModel] `tfsdk:"s3_location"` +} + +type s3LocationModel struct { + Uri types.String `tfsdk:"uri"` +} + +type transformationModel struct { + StepToApply fwtypes.StringEnum[awstypes.StepType] `tfsdk:"step_to_apply"` + TransformationFunction fwtypes.ListNestedObjectValueOf[transformationFunctionModel] `tfsdk:"transformation_function"` +} + +type transformationFunctionModel struct { + TransformationLambdaConfiguration fwtypes.ListNestedObjectValueOf[transformationLambdaConfigurationModel] `tfsdk:"transformation_lambda_configuration"` +} + +type transformationLambdaConfigurationModel struct { + LambdaArn fwtypes.ARN `tfsdk:"lambda_arn"` +} + type bedrockFoundationModelConfigurationModel struct { ModelArn fwtypes.ARN `tfsdk:"model_arn"` ParsingPrompt fwtypes.ListNestedObjectValueOf[parsingPromptModel] `tfsdk:"parsing_prompt"` diff --git a/internal/service/bedrockagent/data_source_test.go b/internal/service/bedrockagent/data_source_test.go index eb5a603bbf9f..f3fd86ea6020 100644 --- a/internal/service/bedrockagent/data_source_test.go +++ b/internal/service/bedrockagent/data_source_test.go @@ -258,6 +258,71 @@ func testAccDataSource_fullHierarchical(t *testing.T) { }) } +// Prerequisites: +// * psql run via null_resource/provisioner "local-exec" +// * jq for parsing output from aws cli to retrieve postgres password +func testAccDataSource_fullCustomTranformation(t *testing.T) { + acctest.SkipIfExeNotOnPath(t, "psql") + acctest.SkipIfExeNotOnPath(t, "jq") + acctest.SkipIfExeNotOnPath(t, "aws") + + ctx := acctest.Context(t) + if testing.Short() { + t.Skip("skipping long-running test in short mode") + } + + var dataSource types.DataSource + rName := sdkacctest.RandomWithPrefix(acctest.ResourcePrefix) + resourceName := "aws_bedrockagent_data_source.test" + foundationModel := "amazon.titan-embed-text-v1" + + resource.Test(t, resource.TestCase{ + PreCheck: func() { + acctest.PreCheck(ctx, t) + }, + ErrorCheck: acctest.ErrorCheck(t, names.BedrockAgentServiceID), + ProtoV5ProviderFactories: acctest.ProtoV5ProviderFactories, + ExternalProviders: map[string]resource.ExternalProvider{ + "null": { + Source: "hashicorp/null", + VersionConstraint: "3.2.2", + }, + }, + CheckDestroy: testAccCheckDataSourceDestroy(ctx), + Steps: []resource.TestStep{ + { + Config: testAccDataSourceConfig_fullCustomTransformation(rName, foundationModel), + Check: resource.ComposeAggregateTestCheckFunc( + testAccCheckDataSourceExists(ctx, resourceName, &dataSource), + resource.TestCheckResourceAttr(resourceName, "data_deletion_policy", "RETAIN"), + resource.TestCheckResourceAttr(resourceName, "data_source_configuration.#", acctest.Ct1), + resource.TestCheckResourceAttr(resourceName, "data_source_configuration.0.s3_configuration.#", acctest.Ct1), + resource.TestCheckResourceAttrSet(resourceName, "data_source_configuration.0.s3_configuration.0.bucket_arn"), + resource.TestCheckNoResourceAttr(resourceName, "data_source_configuration.0.s3_configuration.0.bucket_owner_account_id"), + resource.TestCheckResourceAttr(resourceName, "data_source_configuration.0.s3_configuration.0.inclusion_prefixes.#", acctest.Ct1), + resource.TestCheckTypeSetElemAttr(resourceName, "data_source_configuration.0.s3_configuration.0.inclusion_prefixes.*", "Europe/France/Nouvelle-Aquitaine/Bordeaux"), + resource.TestCheckResourceAttr(resourceName, "data_source_configuration.0.type", "S3"), + resource.TestCheckResourceAttrSet(resourceName, "data_source_id"), + resource.TestCheckResourceAttr(resourceName, names.AttrDescription, "testing"), + resource.TestCheckResourceAttr(resourceName, names.AttrName, rName), + resource.TestCheckResourceAttr(resourceName, "server_side_encryption_configuration.#", acctest.Ct0), + resource.TestCheckResourceAttr(resourceName, "vector_ingestion_configuration.#", acctest.Ct1), + resource.TestCheckResourceAttr(resourceName, "vector_ingestion_configuration.0.chunking_configuration.#", acctest.Ct1), + resource.TestCheckResourceAttr(resourceName, "vector_ingestion_configuration.0.chunking_configuration.0.chunking_strategy", "FIXED_SIZE"), + resource.TestCheckResourceAttr(resourceName, "vector_ingestion_configuration.0.chunking_configuration.0.fixed_size_chunking_configuration.#", acctest.Ct1), + resource.TestCheckResourceAttr(resourceName, "vector_ingestion_configuration.0.chunking_configuration.0.fixed_size_chunking_configuration.0.max_tokens", acctest.Ct3), + resource.TestCheckResourceAttr(resourceName, "vector_ingestion_configuration.0.chunking_configuration.0.fixed_size_chunking_configuration.0.overlap_percentage", "80"), + ), + }, + { + ResourceName: resourceName, + ImportState: true, + ImportStateVerify: true, + }, + }, + }) +} + // Prerequisites: // * psql run via null_resource/provisioner "local-exec" // * jq for parsing output from aws cli to retrieve postgres password @@ -659,6 +724,57 @@ resource "aws_bedrockagent_data_source" "test" { `, rName)) } +func testAccDataSourceConfig_fullCustomTransformation(rName, embeddingModel string) string { + return acctest.ConfigCompose(testAccDataSourceConfig_base(rName, embeddingModel), + testAccAgentActionGroupConfig_lambda(rName), fmt.Sprintf(` +resource "aws_bedrockagent_data_source" "test" { + name = %[1]q + knowledge_base_id = aws_bedrockagent_knowledge_base.test.id + data_deletion_policy = "RETAIN" + description = "testing" + + data_source_configuration { + type = "S3" + + s3_configuration { + bucket_arn = aws_s3_bucket.test.arn + inclusion_prefixes = ["Europe/France/Nouvelle-Aquitaine/Bordeaux"] + } + } + + vector_ingestion_configuration { + chunking_configuration { + chunking_strategy = "FIXED_SIZE" + + fixed_size_chunking_configuration { + max_tokens = 3 + overlap_percentage = 80 + } + } + custom_transformation_configuration { + intermediate_storage { + s3_location { + uri = "s3://${aws_s3_bucket.test_im.bucket}/customTransform" + } + } + transformation { + step_to_apply = "POST_CHUNKING" + transformation_function { + transformation_lambda_configuration { + lambda_arn = aws_lambda_function.test_lambda.arn + } + } + } + } + } +} +resource "aws_s3_bucket" "test_im" { + bucket = "%[1]s-im" +} + +`, rName)) +} + func testAccDataSourceConfig_updated(rName, embeddingModel string) string { return acctest.ConfigCompose(testAccDataSourceConfig_base(rName, embeddingModel), fmt.Sprintf(` resource "aws_bedrockagent_data_source" "test" { diff --git a/website/docs/r/bedrockagent_data_source.html.markdown b/website/docs/r/bedrockagent_data_source.html.markdown index da048b39fd8d..3b14891ccebd 100644 --- a/website/docs/r/bedrockagent_data_source.html.markdown +++ b/website/docs/r/bedrockagent_data_source.html.markdown @@ -68,6 +68,7 @@ The `server_side_encryption_configuration` configuration block supports the foll The `vector_ingestion_configuration` configuration block supports the following arguments: * `chunking_configuration` - (Optional, Forces new resource) Details about how to chunk the documents in the data source. A chunk refers to an excerpt from a data source that is returned when the knowledge base that it belongs to is queried. See [`chunking_configuration` block](#chunking_configuration-block) for details. +* `custom_transformation_configuration`- (Optional, Forces new resource) Configuration for custom transformation of data source documents. * `parsing_configuration` - (Optional, Forces new resource) Configuration for custom parsing of data source documents. See [`parsing_configuration` block](#parsing_configuration-block) for details. ### `chunking_configuration` block @@ -107,6 +108,38 @@ The `semantic_chunking_configuration` block supports the following arguments: * `buffer_size` - (Required, Forces new resource) The buffer size. * `max_tokens` - (Required, Forces new resource) The maximum number of tokens a chunk can contain. +### `custom_transformation_configuration` block + +The `custom_transformation_configuration` block supports the following arguments: + +* `intermediate_storage` - (Required, Forces new resource) The intermediate storage for custom transformation. +* `transformation_function` - (Required) The configuration of transformation function. + +### `intermediate_storage` block + +The `intermediate_storage` block supports the following arguments: + +* `s3_location` - (Required, Forces new resource) Configuration block for intermedia S3 storage. + +### `s3_location` block + +The `s3_location` block supports the following arguments: + +* `uri` - (Required, Forces new resource) S3 URI for intermediate storage. + +### `transformation_function` block + +The `transformation_function` block supports the following arguments: + +* `step_to_apply` - (Required, Forces new resource) Currently only `POST_CHUNKING` is supported. +* `transformation_lambda_configuration` - (Required, Forces new resource) The lambda configuration for custom transformation. + +### `transformation_lambda_configuration` block + +The `transformation_lambda_configuration` block supports the following arguments: + +* `lambda_arn` - (Required, Forces new resource) The ARN of the lambda to use for custom transformation. + ### `parsing_configuration` block The `parsing_configuration` configuration block supports the following arguments: From 91d4a85f320e977a8faea9ac9a7e702a8f6bd5fb Mon Sep 17 00:00:00 2001 From: Andrew Tulloch Date: Tue, 1 Oct 2024 19:58:39 +0100 Subject: [PATCH 2/3] Changelog --- .changelog/39556.txt | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .changelog/39556.txt diff --git a/.changelog/39556.txt b/.changelog/39556.txt new file mode 100644 index 000000000000..0e7ea1cdd280 --- /dev/null +++ b/.changelog/39556.txt @@ -0,0 +1,3 @@ +```release-note:enhancement +resource/aws_bedrockagent_data_source: Add `vector_ingestion_configuration.custom_transformation_configuration` argument +``` \ No newline at end of file From 191ed4ee88b96618591c6b213ea90ad138e2b5ce Mon Sep 17 00:00:00 2001 From: Andrew Tulloch Date: Tue, 1 Oct 2024 21:55:04 +0100 Subject: [PATCH 3/3] Use constant --- internal/service/bedrockagent/data_source.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/service/bedrockagent/data_source.go b/internal/service/bedrockagent/data_source.go index c2b4204dafc8..e03ba47c3329 100644 --- a/internal/service/bedrockagent/data_source.go +++ b/internal/service/bedrockagent/data_source.go @@ -337,7 +337,7 @@ func (r *dataSourceResource) Schema(ctx context.Context, request resource.Schema }, NestedObject: schema.NestedBlockObject{ Attributes: map[string]schema.Attribute{ - "uri": schema.StringAttribute{ + names.AttrURI: schema.StringAttribute{ Required: true, PlanModifiers: []planmodifier.String{ stringplanmodifier.RequiresReplace(),