feat(client-bedrock): With this release, Bedrock Evaluation will now …

…support latency-optimized inference for foundation models.
aws · Jan 13, 2025 · ae86950 · ae86950
1 parent b3857a8
commit ae86950
Show file tree

Hide file tree

Showing 5 changed files with 80 additions and 0 deletions.
diff --git a/clients/client-bedrock/src/commands/CreateEvaluationJobCommand.ts b/clients/client-bedrock/src/commands/CreateEvaluationJobCommand.ts
@@ -110,6 +110,9 @@ export interface CreateEvaluationJobCommandOutput extends CreateEvaluationJobRes
  *         bedrockModel: { // EvaluationBedrockModel
  *           modelIdentifier: "STRING_VALUE", // required
  *           inferenceParams: "STRING_VALUE",
+ *           performanceConfig: { // PerformanceConfiguration
+ *             latency: "standard" || "optimized",
+ *           },
  *         },
  *       },
  *     ],

diff --git a/clients/client-bedrock/src/commands/GetEvaluationJobCommand.ts b/clients/client-bedrock/src/commands/GetEvaluationJobCommand.ts
@@ -112,6 +112,9 @@ export interface GetEvaluationJobCommandOutput extends GetEvaluationJobResponse,
  * //         bedrockModel: { // EvaluationBedrockModel
  * //           modelIdentifier: "STRING_VALUE", // required
  * //           inferenceParams: "STRING_VALUE",
+ * //           performanceConfig: { // PerformanceConfiguration
+ * //             latency: "standard" || "optimized",
+ * //           },
  * //         },
  * //       },
  * //     ],

diff --git a/clients/client-bedrock/src/models/models_0.ts b/clients/client-bedrock/src/models/models_0.ts
@@ -1033,6 +1033,32 @@ export namespace EvaluationConfig {
   };
 }
 
+/**
+ * @public
+ * @enum
+ */
+export const PerformanceConfigLatency = {
+  OPTIMIZED: "optimized",
+  STANDARD: "standard",
+} as const;
+
+/**
+ * @public
+ */
+export type PerformanceConfigLatency = (typeof PerformanceConfigLatency)[keyof typeof PerformanceConfigLatency];
+
+/**
+ * <p>Contains performance settings for a model.</p>
+ * @public
+ */
+export interface PerformanceConfiguration {
+  /**
+   * <p>Specifies whether to use the latency-optimized or standard version of a model or inference profile.</p>
+   * @public
+   */
+  latency?: PerformanceConfigLatency | undefined;
+}
+
 /**
  * <p>Contains the ARN of the Amazon Bedrock model or <a href="https://docs.aws.amazon.com/bedrock/latest/userguide/cross-region-inference.html">inference profile</a> specified in your evaluation job. Each Amazon Bedrock model supports different <code>inferenceParams</code>. To learn more about supported inference parameters for Amazon Bedrock models, see <a href="https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters.html">Inference parameters for foundation models</a>.</p>
  *          <p>The <code>inferenceParams</code> are specified using JSON. To successfully insert JSON as string make sure that all quotations are properly escaped. For example, <code>"temperature":"0.25"</code> key value pair would need to be formatted as <code>\"temperature\":\"0.25\"</code> to successfully accepted in the request.</p>
@@ -1050,6 +1076,12 @@ export interface EvaluationBedrockModel {
    * @public
    */
   inferenceParams?: string | undefined;
+
+  /**
+   * <p>Specifies performance settings for the model or inference profile.</p>
+   * @public
+   */
+  performanceConfig?: PerformanceConfiguration | undefined;
 }
 
 /**

diff --git a/clients/client-bedrock/src/protocols/Aws_restJson1.ts b/clients/client-bedrock/src/protocols/Aws_restJson1.ts
@@ -269,6 +269,7 @@ import {
   ModelInvocationJobSummary,
   OrchestrationConfiguration,
   OutputDataConfig,
+  PerformanceConfiguration,
   PromptRouterSummary,
   PromptTemplate,
   ProvisionedModelSummary,
@@ -3364,6 +3365,8 @@ const se_KnowledgeBaseVectorSearchConfiguration = (
 
 // se_OutputDataConfig omitted.
 
+// se_PerformanceConfiguration omitted.
+
 // se_PromptTemplate omitted.
 
 // se_QueryTransformationConfiguration omitted.
@@ -4183,6 +4186,8 @@ const de_ModelInvocationJobSummary = (output: any, context: __SerdeContext): Mod
 
 // de_OutputDataConfig omitted.
 
+// de_PerformanceConfiguration omitted.
+
 /**
  * deserializeAws_restJson1PromptRouterSummaries
  */

diff --git a/codegen/sdk-codegen/aws-models/bedrock.json b/codegen/sdk-codegen/aws-models/bedrock.json
@@ -3027,6 +3027,12 @@
             "smithy.api#default": "{}",
             "smithy.api#documentation": "<p>Each Amazon Bedrock support different inference parameters that change how the model behaves during inference.</p>"
           }
+        },
+        "performanceConfig": {
+          "target": "com.amazonaws.bedrock#PerformanceConfiguration",
+          "traits": {
+            "smithy.api#documentation": "<p>Specifies performance settings for the model or inference profile.</p>"
+          }
         }
       },
       "traits": {
@@ -10536,6 +10542,37 @@
         "smithy.api#pattern": "^\\S*$"
       }
     },
+    "com.amazonaws.bedrock#PerformanceConfigLatency": {
+      "type": "enum",
+      "members": {
+        "STANDARD": {
+          "target": "smithy.api#Unit",
+          "traits": {
+            "smithy.api#enumValue": "standard"
+          }
+        },
+        "OPTIMIZED": {
+          "target": "smithy.api#Unit",
+          "traits": {
+            "smithy.api#enumValue": "optimized"
+          }
+        }
+      }
+    },
+    "com.amazonaws.bedrock#PerformanceConfiguration": {
+      "type": "structure",
+      "members": {
+        "latency": {
+          "target": "com.amazonaws.bedrock#PerformanceConfigLatency",
+          "traits": {
+            "smithy.api#documentation": "<p>Specifies whether to use the latency-optimized or standard version of a model or inference profile.</p>"
+          }
+        }
+      },
+      "traits": {
+        "smithy.api#documentation": "<p>Contains performance settings for a model.</p>"
+      }
+    },
     "com.amazonaws.bedrock#PositiveInteger": {
       "type": "integer",
       "traits": {