From 8cc610482d61041e10d2904042a8e877727ef128 Mon Sep 17 00:00:00 2001
From: awstools <awstools@amazon.com>
Date: Fri, 20 Dec 2024 19:13:29 +0000
Subject: [PATCH] feat(client-bedrock-agent-runtime): bedrock agents now
 supports long term memory and performance configs. Invokeflow supports
 performance configs. RetrieveAndGenerate performance configs

---
 .../src/commands/DeleteAgentMemoryCommand.ts  |   1 +
 .../src/commands/InvokeAgentCommand.ts        |  47 ++++-
 .../src/commands/InvokeFlowCommand.ts         |   5 +
 .../src/commands/InvokeInlineAgentCommand.ts  |  17 +-
 .../commands/RetrieveAndGenerateCommand.ts    |   9 +
 .../RetrieveAndGenerateStreamCommand.ts       |   9 +
 .../src/models/models_0.ts                    | 190 +++++++++++++++++-
 .../src/protocols/Aws_restJson1.ts            |  58 ++++++
 .../aws-models/bedrock-agent-runtime.json     | 159 ++++++++++++++-
 9 files changed, 466 insertions(+), 29 deletions(-)
diff --git a/clients/client-bedrock-agent-runtime/src/commands/DeleteAgentMemoryCommand.ts b/clients/client-bedrock-agent-runtime/src/commands/DeleteAgentMemoryCommand.ts
index 37ec07bbcc6c3..ff78684580bf6 100644
--- a/clients/client-bedrock-agent-runtime/src/commands/DeleteAgentMemoryCommand.ts
+++ b/clients/client-bedrock-agent-runtime/src/commands/DeleteAgentMemoryCommand.ts
@@ -43,6 +43,7 @@ export interface DeleteAgentMemoryCommandOutput extends DeleteAgentMemoryRespons
  *   agentId: "STRING_VALUE", // required
  *   agentAliasId: "STRING_VALUE", // required
  *   memoryId: "STRING_VALUE",
+ *   sessionId: "STRING_VALUE",
  * };
  * const command = new DeleteAgentMemoryCommand(input);
  * const response = await client.send(command);
diff --git a/clients/client-bedrock-agent-runtime/src/commands/InvokeAgentCommand.ts b/clients/client-bedrock-agent-runtime/src/commands/InvokeAgentCommand.ts
index 547b38a2ef100..1d768092e95d8 100644
--- a/clients/client-bedrock-agent-runtime/src/commands/InvokeAgentCommand.ts
+++ b/clients/client-bedrock-agent-runtime/src/commands/InvokeAgentCommand.ts
@@ -49,6 +49,25 @@ export interface InvokeAgentCommandOutput extends InvokeAgentResponse, __Metadat
  *                <p>To activate trace enablement, turn <code>enableTrace</code> to <code>true</code>. Trace enablement helps you follow the agent's reasoning process that led it to the information it processed, the actions it took, and the final result it yielded. For more information, see <a href="https://docs.aws.amazon.com/bedrock/latest/userguide/agents-test.html#trace-events">Trace enablement</a>.</p>
  *             </li>
  *             <li>
+ *                <p>To stream agent responses, make sure that only orchestration prompt is enabled. Agent streaming is not supported for the following steps:
+ *         </p>
+ *                <ul>
+ *                   <li>
+ *                      <p>
+ *                         <code>Pre-processing</code>
+ *                      </p>
+ *                   </li>
+ *                   <li>
+ *                      <p>
+ *                         <code>Post-processing</code>
+ *                      </p>
+ *                   </li>
+ *                   <li>
+ *                      <p>Agent with 1 Knowledge base and <code>User Input</code> not enabled</p>
+ *                   </li>
+ *                </ul>
+ *             </li>
+ *             <li>
  *                <p>End a conversation by setting <code>endSession</code> to <code>true</code>.</p>
  *             </li>
  *             <li>
@@ -91,13 +110,13 @@ export interface InvokeAgentCommandOutput extends InvokeAgentResponse, __Metadat
  *           httpMethod: "STRING_VALUE",
  *           apiPath: "STRING_VALUE",
  *           confirmationState: "CONFIRM" || "DENY",
+ *           responseState: "FAILURE" || "REPROMPT",
+ *           httpStatusCode: Number("int"),
  *           responseBody: { // ResponseBody
  *             "<keys>": { // ContentBody
  *               body: "STRING_VALUE",
  *             },
  *           },
- *           httpStatusCode: Number("int"),
- *           responseState: "FAILURE" || "REPROMPT",
  *           agentId: "STRING_VALUE",
  *         },
  *         functionResult: { // FunctionResult
@@ -251,6 +270,11 @@ export interface InvokeAgentCommandOutput extends InvokeAgentResponse, __Metadat
  *   enableTrace: true || false,
  *   inputText: "STRING_VALUE",
  *   memoryId: "STRING_VALUE",
+ *   bedrockModelConfigurations: { // BedrockModelConfigurations
+ *     performanceConfig: { // PerformanceConfiguration
+ *       latency: "standard" || "optimized",
+ *     },
+ *   },
  *   streamingConfigurations: { // StreamingConfigurations
  *     streamFinalResponse: true || false,
  *     applyGuardrailInterval: Number("int"),
@@ -535,13 +559,13 @@ export interface InvokeAgentCommandOutput extends InvokeAgentResponse, __Metadat
  * //                         httpMethod: "STRING_VALUE",
  * //                         apiPath: "STRING_VALUE",
  * //                         confirmationState: "CONFIRM" || "DENY",
+ * //                         responseState: "FAILURE" || "REPROMPT",
+ * //                         httpStatusCode: Number("int"),
  * //                         responseBody: { // ResponseBody
  * //                           "<keys>": { // ContentBody
  * //                             body: "STRING_VALUE",
  * //                           },
  * //                         },
- * //                         httpStatusCode: Number("int"),
- * //                         responseState: "FAILURE" || "REPROMPT",
  * //                         agentId: "STRING_VALUE",
  * //                       },
  * //                       functionResult: { // FunctionResult
@@ -798,13 +822,13 @@ export interface InvokeAgentCommandOutput extends InvokeAgentResponse, __Metadat
  * //                         httpMethod: "STRING_VALUE",
  * //                         apiPath: "STRING_VALUE",
  * //                         confirmationState: "CONFIRM" || "DENY",
+ * //                         responseState: "FAILURE" || "REPROMPT",
+ * //                         httpStatusCode: Number("int"),
  * //                         responseBody: {
  * //                           "<keys>": {
  * //                             body: "STRING_VALUE",
  * //                           },
  * //                         },
- * //                         httpStatusCode: Number("int"),
- * //                         responseState: "FAILURE" || "REPROMPT",
  * //                         agentId: "STRING_VALUE",
  * //                       },
  * //                       functionResult: {
@@ -1074,6 +1098,9 @@ export interface InvokeAgentCommandOutput extends InvokeAgentResponse, __Metadat
  * //       message: "STRING_VALUE",
  * //       resourceName: "STRING_VALUE",
  * //     },
+ * //     modelNotReadyException: { // ModelNotReadyException
+ * //       message: "STRING_VALUE",
+ * //     },
  * //     files: { // FilePart
  * //       files: [ // OutputFiles
  * //         { // OutputFile
@@ -1112,6 +1139,14 @@ export interface InvokeAgentCommandOutput extends InvokeAgentResponse, __Metadat
  * @throws {@link InternalServerException} (server fault)
  *  <p>An internal server error occurred. Retry your request.</p>
  *
+ * @throws {@link ModelNotReadyException} (client fault)
+ *  <p>
+ *       The model specified in the request is not ready to serve inference requests. The AWS SDK
+ *       will automatically retry the operation up to 5 times. For information about configuring
+ *       automatic retries, see <a href="https://docs.aws.amazon.com/sdkref/latest/guide/feature-retry-behavior.html">Retry behavior</a> in the <i>AWS SDKs and Tools</i>
+ *       reference guide.
+ *     </p>
+ *
  * @throws {@link ResourceNotFoundException} (client fault)
  *  <p>The specified resource Amazon Resource Name (ARN) was not found. Check the Amazon Resource Name (ARN) and try your request again.</p>
  *
diff --git a/clients/client-bedrock-agent-runtime/src/commands/InvokeFlowCommand.ts b/clients/client-bedrock-agent-runtime/src/commands/InvokeFlowCommand.ts
index 899a1755b8876..c82b331631062 100644
--- a/clients/client-bedrock-agent-runtime/src/commands/InvokeFlowCommand.ts
+++ b/clients/client-bedrock-agent-runtime/src/commands/InvokeFlowCommand.ts
@@ -60,6 +60,11 @@ export interface InvokeFlowCommandOutput extends InvokeFlowResponse, __MetadataB
  *     },
  *   ],
  *   enableTrace: true || false,
+ *   modelPerformanceConfiguration: { // ModelPerformanceConfiguration
+ *     performanceConfig: { // PerformanceConfiguration
+ *       latency: "standard" || "optimized",
+ *     },
+ *   },
  * };
  * const command = new InvokeFlowCommand(input);
  * const response = await client.send(command);
diff --git a/clients/client-bedrock-agent-runtime/src/commands/InvokeInlineAgentCommand.ts b/clients/client-bedrock-agent-runtime/src/commands/InvokeInlineAgentCommand.ts
index f75823cdf90ae..2b3aad8283fc3 100644
--- a/clients/client-bedrock-agent-runtime/src/commands/InvokeInlineAgentCommand.ts
+++ b/clients/client-bedrock-agent-runtime/src/commands/InvokeInlineAgentCommand.ts
@@ -89,13 +89,13 @@ export interface InvokeInlineAgentCommandOutput extends InvokeInlineAgentRespons
  *           httpMethod: "STRING_VALUE",
  *           apiPath: "STRING_VALUE",
  *           confirmationState: "CONFIRM" || "DENY",
+ *           responseState: "FAILURE" || "REPROMPT",
+ *           httpStatusCode: Number("int"),
  *           responseBody: { // ResponseBody
  *             "<keys>": { // ContentBody
  *               body: "STRING_VALUE",
  *             },
  *           },
- *           httpStatusCode: Number("int"),
- *           responseState: "FAILURE" || "REPROMPT",
  *           agentId: "STRING_VALUE",
  *         },
  *         functionResult: { // FunctionResult
@@ -293,6 +293,11 @@ export interface InvokeInlineAgentCommandOutput extends InvokeInlineAgentRespons
  *     ],
  *     overrideLambda: "STRING_VALUE",
  *   },
+ *   bedrockModelConfigurations: { // InlineBedrockModelConfigurations
+ *     performanceConfig: { // PerformanceConfiguration
+ *       latency: "standard" || "optimized",
+ *     },
+ *   },
  * };
  * const command = new InvokeInlineAgentCommand(input);
  * const response = await client.send(command);
@@ -572,13 +577,13 @@ export interface InvokeInlineAgentCommandOutput extends InvokeInlineAgentRespons
  * //                         httpMethod: "STRING_VALUE",
  * //                         apiPath: "STRING_VALUE",
  * //                         confirmationState: "CONFIRM" || "DENY",
+ * //                         responseState: "FAILURE" || "REPROMPT",
+ * //                         httpStatusCode: Number("int"),
  * //                         responseBody: { // ResponseBody
  * //                           "<keys>": { // ContentBody
  * //                             body: "STRING_VALUE",
  * //                           },
  * //                         },
- * //                         httpStatusCode: Number("int"),
- * //                         responseState: "FAILURE" || "REPROMPT",
  * //                         agentId: "STRING_VALUE",
  * //                       },
  * //                       functionResult: { // FunctionResult
@@ -835,13 +840,13 @@ export interface InvokeInlineAgentCommandOutput extends InvokeInlineAgentRespons
  * //                         httpMethod: "STRING_VALUE",
  * //                         apiPath: "STRING_VALUE",
  * //                         confirmationState: "CONFIRM" || "DENY",
+ * //                         responseState: "FAILURE" || "REPROMPT",
+ * //                         httpStatusCode: Number("int"),
  * //                         responseBody: {
  * //                           "<keys>": {
  * //                             body: "STRING_VALUE",
  * //                           },
  * //                         },
- * //                         httpStatusCode: Number("int"),
- * //                         responseState: "FAILURE" || "REPROMPT",
  * //                         agentId: "STRING_VALUE",
  * //                       },
  * //                       functionResult: {
diff --git a/clients/client-bedrock-agent-runtime/src/commands/RetrieveAndGenerateCommand.ts b/clients/client-bedrock-agent-runtime/src/commands/RetrieveAndGenerateCommand.ts
index 59151eae69354..1a083f1dda7da 100644
--- a/clients/client-bedrock-agent-runtime/src/commands/RetrieveAndGenerateCommand.ts
+++ b/clients/client-bedrock-agent-runtime/src/commands/RetrieveAndGenerateCommand.ts
@@ -170,6 +170,9 @@ export interface RetrieveAndGenerateCommandOutput extends RetrieveAndGenerateRes
  *         additionalModelRequestFields: {
  *           "<keys>": "DOCUMENT_VALUE",
  *         },
+ *         performanceConfig: { // PerformanceConfiguration
+ *           latency: "standard" || "optimized",
+ *         },
  *       },
  *       orchestrationConfiguration: { // OrchestrationConfiguration
  *         promptTemplate: {
@@ -191,6 +194,9 @@ export interface RetrieveAndGenerateCommandOutput extends RetrieveAndGenerateRes
  *         queryTransformationConfiguration: { // QueryTransformationConfiguration
  *           type: "QUERY_DECOMPOSITION", // required
  *         },
+ *         performanceConfig: {
+ *           latency: "standard" || "optimized",
+ *         },
  *       },
  *     },
  *     externalSourcesConfiguration: { // ExternalSourcesRetrieveAndGenerateConfiguration
@@ -229,6 +235,9 @@ export interface RetrieveAndGenerateCommandOutput extends RetrieveAndGenerateRes
  *         additionalModelRequestFields: {
  *           "<keys>": "DOCUMENT_VALUE",
  *         },
+ *         performanceConfig: {
+ *           latency: "standard" || "optimized",
+ *         },
  *       },
  *     },
  *   },
diff --git a/clients/client-bedrock-agent-runtime/src/commands/RetrieveAndGenerateStreamCommand.ts b/clients/client-bedrock-agent-runtime/src/commands/RetrieveAndGenerateStreamCommand.ts
index fb12fde16d15a..a48f5914b20b4 100644
--- a/clients/client-bedrock-agent-runtime/src/commands/RetrieveAndGenerateStreamCommand.ts
+++ b/clients/client-bedrock-agent-runtime/src/commands/RetrieveAndGenerateStreamCommand.ts
@@ -173,6 +173,9 @@ export interface RetrieveAndGenerateStreamCommandOutput extends RetrieveAndGener
  *         additionalModelRequestFields: {
  *           "<keys>": "DOCUMENT_VALUE",
  *         },
+ *         performanceConfig: { // PerformanceConfiguration
+ *           latency: "standard" || "optimized",
+ *         },
  *       },
  *       orchestrationConfiguration: { // OrchestrationConfiguration
  *         promptTemplate: {
@@ -194,6 +197,9 @@ export interface RetrieveAndGenerateStreamCommandOutput extends RetrieveAndGener
  *         queryTransformationConfiguration: { // QueryTransformationConfiguration
  *           type: "QUERY_DECOMPOSITION", // required
  *         },
+ *         performanceConfig: {
+ *           latency: "standard" || "optimized",
+ *         },
  *       },
  *     },
  *     externalSourcesConfiguration: { // ExternalSourcesRetrieveAndGenerateConfiguration
@@ -232,6 +238,9 @@ export interface RetrieveAndGenerateStreamCommandOutput extends RetrieveAndGener
  *         additionalModelRequestFields: {
  *           "<keys>": "DOCUMENT_VALUE",
  *         },
+ *         performanceConfig: {
+ *           latency: "standard" || "optimized",
+ *         },
  *       },
  *     },
  *   },
diff --git a/clients/client-bedrock-agent-runtime/src/models/models_0.ts b/clients/client-bedrock-agent-runtime/src/models/models_0.ts
index 14a30a9bb6a27..b1373e1fe64a2 100644
--- a/clients/client-bedrock-agent-runtime/src/models/models_0.ts
+++ b/clients/client-bedrock-agent-runtime/src/models/models_0.ts
@@ -504,7 +504,7 @@ export interface AgentActionGroup {
    *          <p>To allow your agent to generate, run, and troubleshoot code when trying to complete a task, set this field to <code>AMAZON.CodeInterpreter</code>. You must
    *             leave the <code>description</code>, <code>apiSchema</code>, and <code>actionGroupExecutor</code> fields blank for this action group.</p>
    *          <p>During orchestration, if your agent determines that it needs to invoke an API in an action group, but doesn't have enough information to complete the API request,
-   *             it will invoke this action group instead and return an <a href="https://docs.aws.amazon.com/https:/docs.aws.amazon.com/bedrock/latest/APIReference/API_agent-runtime_Observation.html">Observation</a> reprompting the user for more information.</p>
+   *             it will invoke this action group instead and return an <a href="https://docs.aws.amazon.com/bedrock/latest/APIReference/API_agent-runtime_Observation.html">Observation</a> reprompting the user for more information.</p>
    * @public
    */
   parentActionGroupSignature?: ActionGroupSignature | undefined;
@@ -620,10 +620,10 @@ export interface ApiResult {
   confirmationState?: ConfirmationState | undefined;
 
   /**
-   * <p>The response body from the API operation. The key of the object is the content type (currently, only <code>TEXT</code> is supported). The response may be returned directly or from the Lambda function.</p>
+   * <p>Controls the final response state returned to end user when API/Function execution failed. When this state is FAILURE, the request would fail with dependency failure exception. When this state is REPROMPT, the API/function response will be sent to model for re-prompt</p>
    * @public
    */
-  responseBody?: Record<string, ContentBody> | undefined;
+  responseState?: ResponseState | undefined;
 
   /**
    * <p>http status code from API execution response (for example: 200, 400, 500).</p>
@@ -632,10 +632,10 @@ export interface ApiResult {
   httpStatusCode?: number | undefined;
 
   /**
-   * <p>Controls the final response state returned to end user when API/Function execution failed. When this state is FAILURE, the request would fail with dependency failure exception. When this state is REPROMPT, the API/function response will be sent to model for re-prompt</p>
+   * <p>The response body from the API operation. The key of the object is the content type (currently, only <code>TEXT</code> is supported). The response may be returned directly or from the Lambda function.</p>
    * @public
    */
-  responseState?: ResponseState | undefined;
+  responseBody?: Record<string, ContentBody> | undefined;
 
   /**
    * <p>The agent's ID.</p>
@@ -1334,6 +1334,44 @@ export interface FlowInput {
   content: FlowInputContent | undefined;
 }
 
+/**
+ * @public
+ * @enum
+ */
+export const PerformanceConfigLatency = {
+  OPTIMIZED: "optimized",
+  STANDARD: "standard",
+} as const;
+
+/**
+ * @public
+ */
+export type PerformanceConfigLatency = (typeof PerformanceConfigLatency)[keyof typeof PerformanceConfigLatency];
+
+/**
+ * <p>Performance settings for a model.</p>
+ * @public
+ */
+export interface PerformanceConfiguration {
+  /**
+   * <p>To use a latency-optimized version of the model, set to <code>optimized</code>.</p>
+   * @public
+   */
+  latency?: PerformanceConfigLatency | undefined;
+}
+
+/**
+ * <p>The performance configuration for a model called with <a>InvokeFlow</a>.</p>
+ * @public
+ */
+export interface ModelPerformanceConfiguration {
+  /**
+   * <p>The latency configuration for the model.</p>
+   * @public
+   */
+  performanceConfig?: PerformanceConfiguration | undefined;
+}
+
 /**
  * @public
  */
@@ -1361,6 +1399,12 @@ export interface InvokeFlowRequest {
    * @public
    */
   enableTrace?: boolean | undefined;
+
+  /**
+   * <p>Model performance settings for the request.</p>
+   * @public
+   */
+  modelPerformanceConfiguration?: ModelPerformanceConfiguration | undefined;
 }
 
 /**
@@ -2328,6 +2372,18 @@ export interface GenerateQueryResponse {
   queries?: GeneratedQuery[] | undefined;
 }
 
+/**
+ * <p>Settings for a model called with <a>InvokeAgent</a>.</p>
+ * @public
+ */
+export interface BedrockModelConfigurations {
+  /**
+   * <p>The performance configuration for the model.</p>
+   * @public
+   */
+  performanceConfig?: PerformanceConfiguration | undefined;
+}
+
 /**
  * <p>A content block.</p>
  * @public
@@ -2790,8 +2846,7 @@ export interface VectorSearchRerankingConfiguration {
 
 /**
  * <p>
- *             Configurations for streaming.
- *         </p>
+ *             Configurations for streaming.</p>
  * @public
  */
 export interface StreamingConfigurations {
@@ -3333,6 +3388,31 @@ export interface FilePart {
   files?: OutputFile[] | undefined;
 }
 
+/**
+ * <p>
+ *       The model specified in the request is not ready to serve inference requests. The AWS SDK
+ *       will automatically retry the operation up to 5 times. For information about configuring
+ *       automatic retries, see <a href="https://docs.aws.amazon.com/sdkref/latest/guide/feature-retry-behavior.html">Retry behavior</a> in the <i>AWS SDKs and Tools</i>
+ *       reference guide.
+ *     </p>
+ * @public
+ */
+export class ModelNotReadyException extends __BaseException {
+  readonly name: "ModelNotReadyException" = "ModelNotReadyException";
+  readonly $fault: "client" = "client";
+  /**
+   * @internal
+   */
+  constructor(opts: __ExceptionOptionType<ModelNotReadyException, __BaseException>) {
+    super({
+      name: "ModelNotReadyException",
+      $fault: "client",
+      ...opts,
+    });
+    Object.setPrototypeOf(this, ModelNotReadyException.prototype);
+  }
+}
+
 /**
  * <p>Details about a caller.</p>
  * @public
@@ -5049,6 +5129,7 @@ export type ResponseStream =
   | ResponseStream.DependencyFailedExceptionMember
   | ResponseStream.FilesMember
   | ResponseStream.InternalServerExceptionMember
+  | ResponseStream.ModelNotReadyExceptionMember
   | ResponseStream.ResourceNotFoundExceptionMember
   | ResponseStream.ReturnControlMember
   | ResponseStream.ServiceQuotaExceededExceptionMember
@@ -5078,6 +5159,7 @@ export namespace ResponseStream {
     conflictException?: never;
     dependencyFailedException?: never;
     badGatewayException?: never;
+    modelNotReadyException?: never;
     files?: never;
     $unknown?: never;
   }
@@ -5099,6 +5181,7 @@ export namespace ResponseStream {
     conflictException?: never;
     dependencyFailedException?: never;
     badGatewayException?: never;
+    modelNotReadyException?: never;
     files?: never;
     $unknown?: never;
   }
@@ -5120,6 +5203,7 @@ export namespace ResponseStream {
     conflictException?: never;
     dependencyFailedException?: never;
     badGatewayException?: never;
+    modelNotReadyException?: never;
     files?: never;
     $unknown?: never;
   }
@@ -5141,6 +5225,7 @@ export namespace ResponseStream {
     conflictException?: never;
     dependencyFailedException?: never;
     badGatewayException?: never;
+    modelNotReadyException?: never;
     files?: never;
     $unknown?: never;
   }
@@ -5162,6 +5247,7 @@ export namespace ResponseStream {
     conflictException?: never;
     dependencyFailedException?: never;
     badGatewayException?: never;
+    modelNotReadyException?: never;
     files?: never;
     $unknown?: never;
   }
@@ -5183,6 +5269,7 @@ export namespace ResponseStream {
     conflictException?: never;
     dependencyFailedException?: never;
     badGatewayException?: never;
+    modelNotReadyException?: never;
     files?: never;
     $unknown?: never;
   }
@@ -5204,6 +5291,7 @@ export namespace ResponseStream {
     conflictException?: never;
     dependencyFailedException?: never;
     badGatewayException?: never;
+    modelNotReadyException?: never;
     files?: never;
     $unknown?: never;
   }
@@ -5225,6 +5313,7 @@ export namespace ResponseStream {
     conflictException?: never;
     dependencyFailedException?: never;
     badGatewayException?: never;
+    modelNotReadyException?: never;
     files?: never;
     $unknown?: never;
   }
@@ -5246,6 +5335,7 @@ export namespace ResponseStream {
     conflictException?: never;
     dependencyFailedException?: never;
     badGatewayException?: never;
+    modelNotReadyException?: never;
     files?: never;
     $unknown?: never;
   }
@@ -5267,6 +5357,7 @@ export namespace ResponseStream {
     conflictException: ConflictException;
     dependencyFailedException?: never;
     badGatewayException?: never;
+    modelNotReadyException?: never;
     files?: never;
     $unknown?: never;
   }
@@ -5288,6 +5379,7 @@ export namespace ResponseStream {
     conflictException?: never;
     dependencyFailedException: DependencyFailedException;
     badGatewayException?: never;
+    modelNotReadyException?: never;
     files?: never;
     $unknown?: never;
   }
@@ -5309,6 +5401,34 @@ export namespace ResponseStream {
     conflictException?: never;
     dependencyFailedException?: never;
     badGatewayException: BadGatewayException;
+    modelNotReadyException?: never;
+    files?: never;
+    $unknown?: never;
+  }
+
+  /**
+   * <p>
+   *       The model specified in the request is not ready to serve Inference requests. The AWS SDK
+   *       will automatically retry the operation up to 5 times. For information about configuring
+   *       automatic retries, see <a href="https://docs.aws.amazon.com/sdkref/latest/guide/feature-retry-behavior.html">Retry behavior</a> in the <i>AWS SDKs and Tools</i>
+   *       reference guide.
+   *     </p>
+   * @public
+   */
+  export interface ModelNotReadyExceptionMember {
+    chunk?: never;
+    trace?: never;
+    returnControl?: never;
+    internalServerException?: never;
+    validationException?: never;
+    resourceNotFoundException?: never;
+    serviceQuotaExceededException?: never;
+    throttlingException?: never;
+    accessDeniedException?: never;
+    conflictException?: never;
+    dependencyFailedException?: never;
+    badGatewayException?: never;
+    modelNotReadyException: ModelNotReadyException;
     files?: never;
     $unknown?: never;
   }
@@ -5330,6 +5450,7 @@ export namespace ResponseStream {
     conflictException?: never;
     dependencyFailedException?: never;
     badGatewayException?: never;
+    modelNotReadyException?: never;
     files: FilePart;
     $unknown?: never;
   }
@@ -5350,6 +5471,7 @@ export namespace ResponseStream {
     conflictException?: never;
     dependencyFailedException?: never;
     badGatewayException?: never;
+    modelNotReadyException?: never;
     files?: never;
     $unknown: [string, any];
   }
@@ -5367,6 +5489,7 @@ export namespace ResponseStream {
     conflictException: (value: ConflictException) => T;
     dependencyFailedException: (value: DependencyFailedException) => T;
     badGatewayException: (value: BadGatewayException) => T;
+    modelNotReadyException: (value: ModelNotReadyException) => T;
     files: (value: FilePart) => T;
     _: (name: string, value: any) => T;
   }
@@ -5388,6 +5511,7 @@ export namespace ResponseStream {
     if (value.dependencyFailedException !== undefined)
       return visitor.dependencyFailedException(value.dependencyFailedException);
     if (value.badGatewayException !== undefined) return visitor.badGatewayException(value.badGatewayException);
+    if (value.modelNotReadyException !== undefined) return visitor.modelNotReadyException(value.modelNotReadyException);
     if (value.files !== undefined) return visitor.files(value.files);
     return visitor._(value.$unknown[0], value.$unknown[1]);
   };
@@ -5422,6 +5546,18 @@ export interface InvokeAgentResponse {
   memoryId?: string | undefined;
 }
 
+/**
+ * <p>Settings for a model called with <a>InvokeInlineAgent</a>.</p>
+ * @public
+ */
+export interface InlineBedrockModelConfigurations {
+  /**
+   * <p>The latency configuration for the model.</p>
+   * @public
+   */
+  performanceConfig?: PerformanceConfiguration | undefined;
+}
+
 /**
  * <p>
  *             The configuration details for the guardrail.
@@ -6086,6 +6222,12 @@ export interface DeleteAgentMemoryRequest {
    * @public
    */
   memoryId?: string | undefined;
+
+  /**
+   * <p>The unique session identifier of the memory.</p>
+   * @public
+   */
+  sessionId?: string | undefined;
 }
 
 /**
@@ -6972,6 +7114,12 @@ export interface ExternalSourcesGenerationConfiguration {
    * @public
    */
   additionalModelRequestFields?: Record<string, __DocumentType> | undefined;
+
+  /**
+   * <p>The latency configuration for the model.</p>
+   * @public
+   */
+  performanceConfig?: PerformanceConfiguration | undefined;
 }
 
 /**
@@ -7108,6 +7256,12 @@ export interface GenerationConfiguration {
    * @public
    */
   additionalModelRequestFields?: Record<string, __DocumentType> | undefined;
+
+  /**
+   * <p>The latency configuration for the model.</p>
+   * @public
+   */
+  performanceConfig?: PerformanceConfiguration | undefined;
 }
 
 /**
@@ -7165,6 +7319,12 @@ export interface OrchestrationConfiguration {
    * @public
    */
   queryTransformationConfiguration?: QueryTransformationConfiguration | undefined;
+
+  /**
+   * <p>The latency configuration for the model.</p>
+   * @public
+   */
+  performanceConfig?: PerformanceConfiguration | undefined;
 }
 
 /**
@@ -8484,6 +8644,12 @@ export interface InvokeInlineAgentRequest {
    * @public
    */
   promptOverrideConfiguration?: PromptOverrideConfiguration | undefined;
+
+  /**
+   * <p>Model settings for the request.</p>
+   * @public
+   */
+  bedrockModelConfigurations?: InlineBedrockModelConfigurations | undefined;
 }
 
 /**
@@ -8653,10 +8819,19 @@ export interface InvokeAgentRequest {
    */
   memoryId?: string | undefined;
 
+  /**
+   * <p>Model performance settings for the request.</p>
+   * @public
+   */
+  bedrockModelConfigurations?: BedrockModelConfigurations | undefined;
+
   /**
    * <p>
    *            Specifies the configurations for streaming.
    *         </p>
+   *          <note>
+   *             <p>To use agent streaming, you need permissions to perform the <code>bedrock:InvokeModelWithResponseStream</code> action.</p>
+   *          </note>
    * @public
    */
   streamingConfigurations?: StreamingConfigurations | undefined;
@@ -9573,6 +9748,7 @@ export const ResponseStreamFilterSensitiveLog = (obj: ResponseStream): any => {
   if (obj.conflictException !== undefined) return { conflictException: obj.conflictException };
   if (obj.dependencyFailedException !== undefined) return { dependencyFailedException: obj.dependencyFailedException };
   if (obj.badGatewayException !== undefined) return { badGatewayException: obj.badGatewayException };
+  if (obj.modelNotReadyException !== undefined) return { modelNotReadyException: obj.modelNotReadyException };
   if (obj.files !== undefined) return { files: FilePartFilterSensitiveLog(obj.files) };
   if (obj.$unknown !== undefined) return { [obj.$unknown[0]]: "UNKNOWN" };
 };
diff --git a/clients/client-bedrock-agent-runtime/src/protocols/Aws_restJson1.ts b/clients/client-bedrock-agent-runtime/src/protocols/Aws_restJson1.ts
index 411551974423b..72c1b4996faec 100644
--- a/clients/client-bedrock-agent-runtime/src/protocols/Aws_restJson1.ts
+++ b/clients/client-bedrock-agent-runtime/src/protocols/Aws_restJson1.ts
@@ -62,6 +62,7 @@ import {
   APISchema,
   Attribution,
   BadGatewayException,
+  BedrockModelConfigurations,
   BedrockRerankingConfiguration,
   BedrockRerankingModelConfiguration,
   ByteContentDoc,
@@ -110,6 +111,7 @@ import {
   InlineAgentResponseStream,
   InlineAgentReturnControlPayload,
   InlineAgentTracePart,
+  InlineBedrockModelConfigurations,
   InlineSessionState,
   InputFile,
   InputPrompt,
@@ -129,6 +131,8 @@ import {
   MetadataAttributeSchema,
   MetadataConfigurationForReranking,
   ModelInvocationInput,
+  ModelNotReadyException,
+  ModelPerformanceConfiguration,
   Observation,
   OptimizedPromptEvent,
   OptimizedPromptStream,
@@ -137,6 +141,7 @@ import {
   OutputFile,
   ParameterDetail,
   PayloadPart,
+  PerformanceConfiguration,
   PostProcessingTrace,
   PreProcessingTrace,
   PromptConfiguration,
@@ -196,6 +201,7 @@ export const se_DeleteAgentMemoryCommand = async (
   b.p("agentAliasId", () => input.agentAliasId!, "{agentAliasId}", false);
   const query: any = map({
     [_mI]: [, input[_mI]!],
+    [_sI]: [, input[_sI]!],
   });
   let body: any;
   b.m("DELETE").h(headers).q(query).b(body);
@@ -267,6 +273,7 @@ export const se_InvokeAgentCommand = async (
   let body: any;
   body = JSON.stringify(
     take(input, {
+      bedrockModelConfigurations: (_) => _json(_),
       enableTrace: [],
       endSession: [],
       inputText: [],
@@ -298,6 +305,7 @@ export const se_InvokeFlowCommand = async (
     take(input, {
       enableTrace: [],
       inputs: (_) => se_FlowInputs(_, context),
+      modelPerformanceConfiguration: (_) => _json(_),
     })
   );
   b.m("POST").h(headers).b(body);
@@ -321,6 +329,7 @@ export const se_InvokeInlineAgentCommand = async (
   body = JSON.stringify(
     take(input, {
       actionGroups: (_) => _json(_),
+      bedrockModelConfigurations: (_) => _json(_),
       customerEncryptionKeyArn: [],
       enableTrace: [],
       endSession: [],
@@ -721,6 +730,9 @@ const de_CommandError = async (output: __HttpResponse, context: __SerdeContext):
     case "ValidationException":
     case "com.amazonaws.bedrockagentruntime#ValidationException":
       throw await de_ValidationExceptionRes(parsedOutput, context);
+    case "ModelNotReadyException":
+    case "com.amazonaws.bedrockagentruntime#ModelNotReadyException":
+      throw await de_ModelNotReadyExceptionRes(parsedOutput, context);
     default:
       const parsedBody = parsedOutput.body;
       return throwDefaultError({
@@ -828,6 +840,26 @@ const de_InternalServerExceptionRes = async (
   return __decorateServiceException(exception, parsedOutput.body);
 };
 
+/**
+ * deserializeAws_restJson1ModelNotReadyExceptionRes
+ */
+const de_ModelNotReadyExceptionRes = async (
+  parsedOutput: any,
+  context: __SerdeContext
+): Promise<ModelNotReadyException> => {
+  const contents: any = map({});
+  const data: any = parsedOutput.body;
+  const doc = take(data, {
+    message: __expectString,
+  });
+  Object.assign(contents, doc);
+  const exception = new ModelNotReadyException({
+    $metadata: deserializeMetadata(parsedOutput),
+    ...contents,
+  });
+  return __decorateServiceException(exception, parsedOutput.body);
+};
+
 /**
  * deserializeAws_restJson1ResourceNotFoundExceptionRes
  */
@@ -1198,6 +1230,11 @@ const de_ResponseStream = (
         badGatewayException: await de_BadGatewayException_event(event["badGatewayException"], context),
       };
     }
+    if (event["modelNotReadyException"] != null) {
+      return {
+        modelNotReadyException: await de_ModelNotReadyException_event(event["modelNotReadyException"], context),
+      };
+    }
     if (event["files"] != null) {
       return {
         files: await de_FilePart_event(event["files"], context),
@@ -1399,6 +1436,16 @@ const de_InternalServerException_event = async (
   };
   return de_InternalServerExceptionRes(parsedOutput, context);
 };
+const de_ModelNotReadyException_event = async (
+  output: any,
+  context: __SerdeContext
+): Promise<ModelNotReadyException> => {
+  const parsedOutput: any = {
+    ...output,
+    body: await parseBody(output.body, context),
+  };
+  return de_ModelNotReadyExceptionRes(parsedOutput, context);
+};
 const de_OptimizedPromptEvent_event = async (output: any, context: __SerdeContext): Promise<OptimizedPromptEvent> => {
   const contents: OptimizedPromptEvent = {} as any;
   const data: any = await parseBody(output.body, context);
@@ -1496,6 +1543,8 @@ const se_AdditionalModelRequestFieldsValue = (input: __DocumentType, context: __
 
 // se_APISchema omitted.
 
+// se_BedrockModelConfigurations omitted.
+
 /**
  * serializeAws_restJson1BedrockRerankingConfiguration
  */
@@ -1581,6 +1630,7 @@ const se_ExternalSourcesGenerationConfiguration = (
     additionalModelRequestFields: (_) => se_AdditionalModelRequestFields(_, context),
     guardrailConfiguration: _json,
     inferenceConfig: (_) => se_InferenceConfig(_, context),
+    performanceConfig: _json,
     promptTemplate: _json,
   });
 };
@@ -1679,6 +1729,7 @@ const se_GenerationConfiguration = (input: GenerationConfiguration, context: __S
     additionalModelRequestFields: (_) => se_AdditionalModelRequestFields(_, context),
     guardrailConfiguration: _json,
     inferenceConfig: (_) => se_InferenceConfig(_, context),
+    performanceConfig: _json,
     promptTemplate: _json,
   });
 };
@@ -1711,6 +1762,8 @@ const se_InferenceConfiguration = (input: InferenceConfiguration, context: __Ser
   });
 };
 
+// se_InlineBedrockModelConfigurations omitted.
+
 /**
  * serializeAws_restJson1InlineSessionState
  */
@@ -1849,6 +1902,8 @@ const se_KnowledgeBaseVectorSearchConfiguration = (
 
 // se_MetadataConfigurationForReranking omitted.
 
+// se_ModelPerformanceConfiguration omitted.
+
 /**
  * serializeAws_restJson1OrchestrationConfiguration
  */
@@ -1856,6 +1911,7 @@ const se_OrchestrationConfiguration = (input: OrchestrationConfiguration, contex
   return take(input, {
     additionalModelRequestFields: (_) => se_AdditionalModelRequestFields(_, context),
     inferenceConfig: (_) => se_InferenceConfig(_, context),
+    performanceConfig: _json,
     promptTemplate: _json,
     queryTransformationConfiguration: _json,
   });
@@ -1865,6 +1921,8 @@ const se_OrchestrationConfiguration = (input: OrchestrationConfiguration, contex
 
 // se_ParameterMap omitted.
 
+// se_PerformanceConfiguration omitted.
+
 /**
  * serializeAws_restJson1PromptConfiguration
  */
diff --git a/codegen/sdk-codegen/aws-models/bedrock-agent-runtime.json b/codegen/sdk-codegen/aws-models/bedrock-agent-runtime.json
index e494c400d6407..22636b56d4147 100644
--- a/codegen/sdk-codegen/aws-models/bedrock-agent-runtime.json
+++ b/codegen/sdk-codegen/aws-models/bedrock-agent-runtime.json
@@ -225,7 +225,7 @@
         "parentActionGroupSignature": {
           "target": "com.amazonaws.bedrockagentruntime#ActionGroupSignature",
           "traits": {
-            "smithy.api#documentation": "<p>\n            To allow your agent to request the user for additional information when trying to complete a task, set this field to <code>AMAZON.UserInput</code>. \n            You must leave the <code>description</code>, <code>apiSchema</code>, and <code>actionGroupExecutor</code> fields blank for this action group.\n        </p>\n         <p>To allow your agent to generate, run, and troubleshoot code when trying to complete a task, set this field to <code>AMAZON.CodeInterpreter</code>. You must \n            leave the <code>description</code>, <code>apiSchema</code>, and <code>actionGroupExecutor</code> fields blank for this action group.</p>\n         <p>During orchestration, if your agent determines that it needs to invoke an API in an action group, but doesn't have enough information to complete the API request,\n            it will invoke this action group instead and return an <a href=\"https://docs.aws.amazon.com/https:/docs.aws.amazon.com/bedrock/latest/APIReference/API_agent-runtime_Observation.html\">Observation</a> reprompting the user for more information.</p>"
+            "smithy.api#documentation": "<p>\n            To allow your agent to request the user for additional information when trying to complete a task, set this field to <code>AMAZON.UserInput</code>. \n            You must leave the <code>description</code>, <code>apiSchema</code>, and <code>actionGroupExecutor</code> fields blank for this action group.\n        </p>\n         <p>To allow your agent to generate, run, and troubleshoot code when trying to complete a task, set this field to <code>AMAZON.CodeInterpreter</code>. You must \n            leave the <code>description</code>, <code>apiSchema</code>, and <code>actionGroupExecutor</code> fields blank for this action group.</p>\n         <p>During orchestration, if your agent determines that it needs to invoke an API in an action group, but doesn't have enough information to complete the API request,\n            it will invoke this action group instead and return an <a href=\"https://docs.aws.amazon.com/bedrock/latest/APIReference/API_agent-runtime_Observation.html\">Observation</a> reprompting the user for more information.</p>"
           }
         },
         "actionGroupExecutor": {
@@ -1279,10 +1279,10 @@
             "smithy.api#documentation": "<p>Controls the API operations or functions to invoke based on the user confirmation.</p>"
           }
         },
-        "responseBody": {
-          "target": "com.amazonaws.bedrockagentruntime#ResponseBody",
+        "responseState": {
+          "target": "com.amazonaws.bedrockagentruntime#ResponseState",
           "traits": {
-            "smithy.api#documentation": "<p>The response body from the API operation. The key of the object is the content type (currently, only <code>TEXT</code> is supported). The response may be returned directly or from the Lambda function.</p>"
+            "smithy.api#documentation": "<p>Controls the final response state returned to end user when API/Function execution failed. When this state is FAILURE, the request would fail with dependency failure exception. When this state is REPROMPT, the API/function response will be sent to model for re-prompt</p>"
           }
         },
         "httpStatusCode": {
@@ -1291,10 +1291,10 @@
             "smithy.api#documentation": "<p>http status code from API execution response (for example: 200, 400, 500).</p>"
           }
         },
-        "responseState": {
-          "target": "com.amazonaws.bedrockagentruntime#ResponseState",
+        "responseBody": {
+          "target": "com.amazonaws.bedrockagentruntime#ResponseBody",
           "traits": {
-            "smithy.api#documentation": "<p>Controls the final response state returned to end user when API/Function execution failed. When this state is FAILURE, the request would fail with dependency failure exception. When this state is REPROMPT, the API/function response will be sent to model for re-prompt</p>"
+            "smithy.api#documentation": "<p>The response body from the API operation. The key of the object is the content type (currently, only <code>TEXT</code> is supported). The response may be returned directly or from the Lambda function.</p>"
           }
         },
         "agentId": {
@@ -1390,6 +1390,20 @@
         "smithy.api#pattern": "^(arn:aws(-[^:]+)?:(bedrock|sagemaker):[a-z0-9-]{1,20}:([0-9]{12})?:([a-z-]+/)?)?([a-z0-9.-]{1,63}){0,2}(([:][a-z0-9-]{1,63}){0,2})?(/[a-z0-9]{1,12})?$"
       }
     },
+    "com.amazonaws.bedrockagentruntime#BedrockModelConfigurations": {
+      "type": "structure",
+      "members": {
+        "performanceConfig": {
+          "target": "com.amazonaws.bedrockagentruntime#PerformanceConfiguration",
+          "traits": {
+            "smithy.api#documentation": "<p>The performance configuration for the model.</p>"
+          }
+        }
+      },
+      "traits": {
+        "smithy.api#documentation": "<p>Settings for a model called with <a>InvokeAgent</a>.</p>"
+      }
+    },
     "com.amazonaws.bedrockagentruntime#BedrockRerankingConfiguration": {
       "type": "structure",
       "members": {
@@ -1873,6 +1887,13 @@
             "smithy.api#documentation": "<p>The unique identifier of the memory.</p>",
             "smithy.api#httpQuery": "memoryId"
           }
+        },
+        "sessionId": {
+          "target": "com.amazonaws.bedrockagentruntime#SessionId",
+          "traits": {
+            "smithy.api#documentation": "<p>The unique session identifier of the memory.</p>",
+            "smithy.api#httpQuery": "sessionId"
+          }
         }
       },
       "traits": {
@@ -2004,6 +2025,12 @@
           "traits": {
             "smithy.api#documentation": "<p> Additional model parameters and their corresponding values not included in the textInferenceConfig structure for an external source. Takes in custom model parameters specific to the language model being used. </p>"
           }
+        },
+        "performanceConfig": {
+          "target": "com.amazonaws.bedrockagentruntime#PerformanceConfiguration",
+          "traits": {
+            "smithy.api#documentation": "<p>The latency configuration for the model.</p>"
+          }
         }
       },
       "traits": {
@@ -3098,6 +3125,12 @@
           "traits": {
             "smithy.api#documentation": "<p> Additional model parameters and corresponding values not included in the textInferenceConfig structure for a knowledge base. This allows users to provide custom model parameters specific to the language model being used. </p>"
           }
+        },
+        "performanceConfig": {
+          "target": "com.amazonaws.bedrockagentruntime#PerformanceConfiguration",
+          "traits": {
+            "smithy.api#documentation": "<p>The latency configuration for the model.</p>"
+          }
         }
       },
       "traits": {
@@ -4308,6 +4341,20 @@
         "smithy.api#sensitive": {}
       }
     },
+    "com.amazonaws.bedrockagentruntime#InlineBedrockModelConfigurations": {
+      "type": "structure",
+      "members": {
+        "performanceConfig": {
+          "target": "com.amazonaws.bedrockagentruntime#PerformanceConfiguration",
+          "traits": {
+            "smithy.api#documentation": "<p>The latency configuration for the model.</p>"
+          }
+        }
+      },
+      "traits": {
+        "smithy.api#documentation": "<p>Settings for a model called with <a>InvokeInlineAgent</a>.</p>"
+      }
+    },
     "com.amazonaws.bedrockagentruntime#InlineSessionState": {
       "type": "structure",
       "members": {
@@ -4593,6 +4640,9 @@
         {
           "target": "com.amazonaws.bedrockagentruntime#InternalServerException"
         },
+        {
+          "target": "com.amazonaws.bedrockagentruntime#ModelNotReadyException"
+        },
         {
           "target": "com.amazonaws.bedrockagentruntime#ResourceNotFoundException"
         },
@@ -4607,7 +4657,7 @@
         }
       ],
       "traits": {
-        "smithy.api#documentation": "<note>\n            <p>The CLI doesn't support streaming operations in Amazon Bedrock, including <code>InvokeAgent</code>.</p>\n         </note>\n         <p>Sends a prompt for the agent to process and respond to. Note the following fields for the request:</p>\n         <ul>\n            <li>\n               <p>To continue the same conversation with an agent, use the same <code>sessionId</code> value in the request.</p>\n            </li>\n            <li>\n               <p>To activate trace enablement, turn <code>enableTrace</code> to <code>true</code>. Trace enablement helps you follow the agent's reasoning process that led it to the information it processed, the actions it took, and the final result it yielded. For more information, see <a href=\"https://docs.aws.amazon.com/bedrock/latest/userguide/agents-test.html#trace-events\">Trace enablement</a>.</p>\n            </li>\n            <li>\n               <p>End a conversation by setting <code>endSession</code> to <code>true</code>.</p>\n            </li>\n            <li>\n               <p>In the <code>sessionState</code> object, you can include attributes for the session or prompt or, if you configured an action group to return control, results from invocation of the action group.</p>\n            </li>\n         </ul>\n         <p>The response is returned in the <code>bytes</code> field of the <code>chunk</code> object.</p>\n         <ul>\n            <li>\n               <p>The <code>attribution</code> object contains citations for parts of the response.</p>\n            </li>\n            <li>\n               <p>If you set <code>enableTrace</code> to <code>true</code> in the request, you can trace the agent's steps and reasoning process that led it to the response.</p>\n            </li>\n            <li>\n               <p>If the action predicted was configured to return control, the response returns parameters for the action, elicited from the user, in the <code>returnControl</code> field.</p>\n            </li>\n            <li>\n               <p>Errors are also surfaced in the response.</p>\n            </li>\n         </ul>",
+        "smithy.api#documentation": "<note>\n            <p>The CLI doesn't support streaming operations in Amazon Bedrock, including <code>InvokeAgent</code>.</p>\n         </note>\n         <p>Sends a prompt for the agent to process and respond to. Note the following fields for the request:</p>\n         <ul>\n            <li>\n               <p>To continue the same conversation with an agent, use the same <code>sessionId</code> value in the request.</p>\n            </li>\n            <li>\n               <p>To activate trace enablement, turn <code>enableTrace</code> to <code>true</code>. Trace enablement helps you follow the agent's reasoning process that led it to the information it processed, the actions it took, and the final result it yielded. For more information, see <a href=\"https://docs.aws.amazon.com/bedrock/latest/userguide/agents-test.html#trace-events\">Trace enablement</a>.</p>\n            </li>\n            <li>\n               <p>To stream agent responses, make sure that only orchestration prompt is enabled. Agent streaming is not supported for the following steps:\n        </p>\n               <ul>\n                  <li>\n                     <p>\n                        <code>Pre-processing</code>\n                     </p>\n                  </li>\n                  <li>\n                     <p>\n                        <code>Post-processing</code>\n                     </p>\n                  </li>\n                  <li>\n                     <p>Agent with 1 Knowledge base and <code>User Input</code> not enabled</p>\n                  </li>\n               </ul>\n            </li>\n            <li>\n               <p>End a conversation by setting <code>endSession</code> to <code>true</code>.</p>\n            </li>\n            <li>\n               <p>In the <code>sessionState</code> object, you can include attributes for the session or prompt or, if you configured an action group to return control, results from invocation of the action group.</p>\n            </li>\n         </ul>\n         <p>The response is returned in the <code>bytes</code> field of the <code>chunk</code> object.</p>\n         <ul>\n            <li>\n               <p>The <code>attribution</code> object contains citations for parts of the response.</p>\n            </li>\n            <li>\n               <p>If you set <code>enableTrace</code> to <code>true</code> in the request, you can trace the agent's steps and reasoning process that led it to the response.</p>\n            </li>\n            <li>\n               <p>If the action predicted was configured to return control, the response returns parameters for the action, elicited from the user, in the <code>returnControl</code> field.</p>\n            </li>\n            <li>\n               <p>Errors are also surfaced in the response.</p>\n            </li>\n         </ul>",
         "smithy.api#http": {
           "code": 200,
           "method": "POST",
@@ -4672,10 +4722,16 @@
             "smithy.api#documentation": "<p>The unique identifier of the agent memory.</p>"
           }
         },
+        "bedrockModelConfigurations": {
+          "target": "com.amazonaws.bedrockagentruntime#BedrockModelConfigurations",
+          "traits": {
+            "smithy.api#documentation": "<p>Model performance settings for the request.</p>"
+          }
+        },
         "streamingConfigurations": {
           "target": "com.amazonaws.bedrockagentruntime#StreamingConfigurations",
           "traits": {
-            "smithy.api#documentation": "<p>\n           Specifies the configurations for streaming.\n        </p>"
+            "smithy.api#documentation": "<p>\n           Specifies the configurations for streaming.\n        </p>\n         <note>\n            <p>To use agent streaming, you need permissions to perform the <code>bedrock:InvokeModelWithResponseStream</code> action.</p>\n         </note>"
           }
         },
         "sourceArn": {
@@ -4806,6 +4862,12 @@
           "traits": {
             "smithy.api#documentation": "<p>Specifies whether to return the trace for the flow or not. Traces track inputs and outputs for nodes in the flow. For more information, see <a href=\"https://docs.aws.amazon.com/bedrock/latest/userguide/flows-trace.html\">Track each step in your prompt flow by viewing its trace in Amazon Bedrock</a>.</p>"
           }
+        },
+        "modelPerformanceConfiguration": {
+          "target": "com.amazonaws.bedrockagentruntime#ModelPerformanceConfiguration",
+          "traits": {
+            "smithy.api#documentation": "<p>Model performance settings for the request.</p>"
+          }
         }
       },
       "traits": {
@@ -4958,6 +5020,12 @@
           "traits": {
             "smithy.api#documentation": "<p>\n           Configurations for advanced prompts used to override the default prompts to enhance the accuracy of the inline agent.\n        </p>"
           }
+        },
+        "bedrockModelConfigurations": {
+          "target": "com.amazonaws.bedrockagentruntime#InlineBedrockModelConfigurations",
+          "traits": {
+            "smithy.api#documentation": "<p>Model settings for the request.</p>"
+          }
         }
       },
       "traits": {
@@ -5607,6 +5675,33 @@
         "smithy.api#sensitive": {}
       }
     },
+    "com.amazonaws.bedrockagentruntime#ModelNotReadyException": {
+      "type": "structure",
+      "members": {
+        "message": {
+          "target": "com.amazonaws.bedrockagentruntime#NonBlankString"
+        }
+      },
+      "traits": {
+        "smithy.api#documentation": "<p>\n      The model specified in the request is not ready to serve inference requests. The AWS SDK\n      will automatically retry the operation up to 5 times. For information about configuring\n      automatic retries, see <a href=\"https://docs.aws.amazon.com/sdkref/latest/guide/feature-retry-behavior.html\">Retry behavior</a> in the <i>AWS SDKs and Tools</i>\n      reference guide.\n    </p>",
+        "smithy.api#error": "client",
+        "smithy.api#httpError": 424
+      }
+    },
+    "com.amazonaws.bedrockagentruntime#ModelPerformanceConfiguration": {
+      "type": "structure",
+      "members": {
+        "performanceConfig": {
+          "target": "com.amazonaws.bedrockagentruntime#PerformanceConfiguration",
+          "traits": {
+            "smithy.api#documentation": "<p>The latency configuration for the model.</p>"
+          }
+        }
+      },
+      "traits": {
+        "smithy.api#documentation": "<p>The performance configuration for a model called with <a>InvokeFlow</a>.</p>"
+      }
+    },
     "com.amazonaws.bedrockagentruntime#Name": {
       "type": "string",
       "traits": {
@@ -5952,6 +6047,12 @@
           "traits": {
             "smithy.api#documentation": "<p>To split up the prompt and retrieve multiple sources, set the transformation type to\n    <code>QUERY_DECOMPOSITION</code>.</p>"
           }
+        },
+        "performanceConfig": {
+          "target": "com.amazonaws.bedrockagentruntime#PerformanceConfiguration",
+          "traits": {
+            "smithy.api#documentation": "<p>The latency configuration for the model.</p>"
+          }
         }
       },
       "traits": {
@@ -6246,6 +6347,38 @@
         }
       }
     },
+    "com.amazonaws.bedrockagentruntime#PerformanceConfigLatency": {
+      "type": "enum",
+      "members": {
+        "STANDARD": {
+          "target": "smithy.api#Unit",
+          "traits": {
+            "smithy.api#enumValue": "standard"
+          }
+        },
+        "OPTIMIZED": {
+          "target": "smithy.api#Unit",
+          "traits": {
+            "smithy.api#enumValue": "optimized"
+          }
+        }
+      }
+    },
+    "com.amazonaws.bedrockagentruntime#PerformanceConfiguration": {
+      "type": "structure",
+      "members": {
+        "latency": {
+          "target": "com.amazonaws.bedrockagentruntime#PerformanceConfigLatency",
+          "traits": {
+            "smithy.api#default": "standard",
+            "smithy.api#documentation": "<p>To use a latency-optimized version of the model, set to <code>optimized</code>.</p>"
+          }
+        }
+      },
+      "traits": {
+        "smithy.api#documentation": "<p>Performance settings for a model.</p>"
+      }
+    },
     "com.amazonaws.bedrockagentruntime#PostProcessingModelInvocationOutput": {
       "type": "structure",
       "members": {
@@ -7251,6 +7384,12 @@
             "smithy.api#documentation": "<p>There was an issue with a dependency due to a server issue. Retry your request.</p>"
           }
         },
+        "modelNotReadyException": {
+          "target": "com.amazonaws.bedrockagentruntime#ModelNotReadyException",
+          "traits": {
+            "smithy.api#documentation": "<p>\n      The model specified in the request is not ready to serve Inference requests. The AWS SDK\n      will automatically retry the operation up to 5 times. For information about configuring\n      automatic retries, see <a href=\"https://docs.aws.amazon.com/sdkref/latest/guide/feature-retry-behavior.html\">Retry behavior</a> in the <i>AWS SDKs and Tools</i>\n      reference guide.\n    </p>"
+          }
+        },
         "files": {
           "target": "com.amazonaws.bedrockagentruntime#FilePart",
           "traits": {
@@ -8721,7 +8860,7 @@
         }
       },
       "traits": {
-        "smithy.api#documentation": "<p>\n            Configurations for streaming.\n        </p>"
+        "smithy.api#documentation": "<p>\n            Configurations for streaming.</p>"
       }
     },
     "com.amazonaws.bedrockagentruntime#SummaryText": {