From ae86950ecb25e79503c4cde6993e50327b9d951c Mon Sep 17 00:00:00 2001 From: awstools Date: Mon, 13 Jan 2025 19:12:24 +0000 Subject: [PATCH] feat(client-bedrock): With this release, Bedrock Evaluation will now support latency-optimized inference for foundation models. --- .../commands/CreateEvaluationJobCommand.ts | 3 ++ .../src/commands/GetEvaluationJobCommand.ts | 3 ++ clients/client-bedrock/src/models/models_0.ts | 32 ++++++++++++++++ .../src/protocols/Aws_restJson1.ts | 5 +++ codegen/sdk-codegen/aws-models/bedrock.json | 37 +++++++++++++++++++ 5 files changed, 80 insertions(+) diff --git a/clients/client-bedrock/src/commands/CreateEvaluationJobCommand.ts b/clients/client-bedrock/src/commands/CreateEvaluationJobCommand.ts index c8273a334a20..64d6ff2c1883 100644 --- a/clients/client-bedrock/src/commands/CreateEvaluationJobCommand.ts +++ b/clients/client-bedrock/src/commands/CreateEvaluationJobCommand.ts @@ -110,6 +110,9 @@ export interface CreateEvaluationJobCommandOutput extends CreateEvaluationJobRes * bedrockModel: { // EvaluationBedrockModel * modelIdentifier: "STRING_VALUE", // required * inferenceParams: "STRING_VALUE", + * performanceConfig: { // PerformanceConfiguration + * latency: "standard" || "optimized", + * }, * }, * }, * ], diff --git a/clients/client-bedrock/src/commands/GetEvaluationJobCommand.ts b/clients/client-bedrock/src/commands/GetEvaluationJobCommand.ts index 94bbba10ccbf..d65d1ce4ff23 100644 --- a/clients/client-bedrock/src/commands/GetEvaluationJobCommand.ts +++ b/clients/client-bedrock/src/commands/GetEvaluationJobCommand.ts @@ -112,6 +112,9 @@ export interface GetEvaluationJobCommandOutput extends GetEvaluationJobResponse, * // bedrockModel: { // EvaluationBedrockModel * // modelIdentifier: "STRING_VALUE", // required * // inferenceParams: "STRING_VALUE", + * // performanceConfig: { // PerformanceConfiguration + * // latency: "standard" || "optimized", + * // }, * // }, * // }, * // ], diff --git a/clients/client-bedrock/src/models/models_0.ts b/clients/client-bedrock/src/models/models_0.ts index e5efc6c289de..77ebd94d9f3b 100644 --- a/clients/client-bedrock/src/models/models_0.ts +++ b/clients/client-bedrock/src/models/models_0.ts @@ -1033,6 +1033,32 @@ export namespace EvaluationConfig { }; } +/** + * @public + * @enum + */ +export const PerformanceConfigLatency = { + OPTIMIZED: "optimized", + STANDARD: "standard", +} as const; + +/** + * @public + */ +export type PerformanceConfigLatency = (typeof PerformanceConfigLatency)[keyof typeof PerformanceConfigLatency]; + +/** + *

Contains performance settings for a model.

+ * @public + */ +export interface PerformanceConfiguration { + /** + *

Specifies whether to use the latency-optimized or standard version of a model or inference profile.

+ * @public + */ + latency?: PerformanceConfigLatency | undefined; +} + /** *

Contains the ARN of the Amazon Bedrock model or inference profile specified in your evaluation job. Each Amazon Bedrock model supports different inferenceParams. To learn more about supported inference parameters for Amazon Bedrock models, see Inference parameters for foundation models.

*

The inferenceParams are specified using JSON. To successfully insert JSON as string make sure that all quotations are properly escaped. For example, "temperature":"0.25" key value pair would need to be formatted as \"temperature\":\"0.25\" to successfully accepted in the request.

@@ -1050,6 +1076,12 @@ export interface EvaluationBedrockModel { * @public */ inferenceParams?: string | undefined; + + /** + *

Specifies performance settings for the model or inference profile.

+ * @public + */ + performanceConfig?: PerformanceConfiguration | undefined; } /** diff --git a/clients/client-bedrock/src/protocols/Aws_restJson1.ts b/clients/client-bedrock/src/protocols/Aws_restJson1.ts index f411da908368..77ad9a5743ac 100644 --- a/clients/client-bedrock/src/protocols/Aws_restJson1.ts +++ b/clients/client-bedrock/src/protocols/Aws_restJson1.ts @@ -269,6 +269,7 @@ import { ModelInvocationJobSummary, OrchestrationConfiguration, OutputDataConfig, + PerformanceConfiguration, PromptRouterSummary, PromptTemplate, ProvisionedModelSummary, @@ -3364,6 +3365,8 @@ const se_KnowledgeBaseVectorSearchConfiguration = ( // se_OutputDataConfig omitted. +// se_PerformanceConfiguration omitted. + // se_PromptTemplate omitted. // se_QueryTransformationConfiguration omitted. @@ -4183,6 +4186,8 @@ const de_ModelInvocationJobSummary = (output: any, context: __SerdeContext): Mod // de_OutputDataConfig omitted. +// de_PerformanceConfiguration omitted. + /** * deserializeAws_restJson1PromptRouterSummaries */ diff --git a/codegen/sdk-codegen/aws-models/bedrock.json b/codegen/sdk-codegen/aws-models/bedrock.json index e712ce30b4cb..c9a8096c5c1e 100644 --- a/codegen/sdk-codegen/aws-models/bedrock.json +++ b/codegen/sdk-codegen/aws-models/bedrock.json @@ -3027,6 +3027,12 @@ "smithy.api#default": "{}", "smithy.api#documentation": "

Each Amazon Bedrock support different inference parameters that change how the model behaves during inference.

" } + }, + "performanceConfig": { + "target": "com.amazonaws.bedrock#PerformanceConfiguration", + "traits": { + "smithy.api#documentation": "

Specifies performance settings for the model or inference profile.

" + } } }, "traits": { @@ -10536,6 +10542,37 @@ "smithy.api#pattern": "^\\S*$" } }, + "com.amazonaws.bedrock#PerformanceConfigLatency": { + "type": "enum", + "members": { + "STANDARD": { + "target": "smithy.api#Unit", + "traits": { + "smithy.api#enumValue": "standard" + } + }, + "OPTIMIZED": { + "target": "smithy.api#Unit", + "traits": { + "smithy.api#enumValue": "optimized" + } + } + } + }, + "com.amazonaws.bedrock#PerformanceConfiguration": { + "type": "structure", + "members": { + "latency": { + "target": "com.amazonaws.bedrock#PerformanceConfigLatency", + "traits": { + "smithy.api#documentation": "

Specifies whether to use the latency-optimized or standard version of a model or inference profile.

" + } + } + }, + "traits": { + "smithy.api#documentation": "

Contains performance settings for a model.

" + } + }, "com.amazonaws.bedrock#PositiveInteger": { "type": "integer", "traits": {