Skip to content

Commit

Permalink
e2e: add agent test using tool (#426)
Browse files Browse the repository at this point in the history
**Commit Message**

* adding e2e test making sure that we can use a tool and return the
proper response from the LLM

This PR also contains a few fixes that allow this test to work 
* added validateToolCallID as its needed for the bedrock message or it
fails
* Validate and cast the openai content value into bedrock content block
- before we assumed it was always a string but it can also be an array
* Merge the content blocks if one has text and the other has toolcall
but no text - bedrockResp.Output.Message.Content is not 1:1 with openai
choice.Message.Content.. we get the result in chunks that should be
merged, for example the text and it's toolconfig were in seperate
elements but openai expects them to be in the same
* in openAIMessageToBedrockMessageRoleAssistant we assume that either
there is a refusal or a content text, but we actually dont pass in a
text. this was causing an error as the length of the array was set to 1
so the first was empty and there must be a key specified in each content
element.

note: i think this is another bug that im trying to look into/create
unit test for, but since there is already a lot in this PR, maybe its
best to follow up with that one the next one. basically, the assistant
openai param message's content doesn't seem to be translating to this
method, and we have no content. this doesn't prevent anything else from
working though, we are just missing a text field like
`{"content":[{"text":"Certainly! I can help you get the weather
information for New York City. To do that, I'll use the available
weather tool. Let me fetch that information for you right away."}`

the rests are tests

---------

Signed-off-by: Alexa Griffith <agriffith96@gmail.com>
Signed-off-by: Alexa Griffith  <agriffith96@gmail.com>
Signed-off-by: Dan Sun <dsun20@bloomberg.net>
Co-authored-by: Dan Sun <dsun20@bloomberg.net>
  • Loading branch information
alexagriffith and yuzisun authored Feb 28, 2025
1 parent 6978f83 commit 74159c2
Show file tree
Hide file tree
Showing 4 changed files with 192 additions and 55 deletions.
4 changes: 2 additions & 2 deletions internal/apischema/awsbedrock/awsbedrock.go
Original file line number Diff line number Diff line change
Expand Up @@ -277,10 +277,10 @@ type ToolResultContentBlock struct {
Image *ImageBlock `json:"image,omitempty"`

// A tool result that is text.
Text *string `json:"text" type:"string,omitempty"`
Text *string `json:"text,omitempty"`

// A tool result that is JSON format data.
JSON *string `json:"json" type:"string,omitempty"`
JSON *string `json:"json,omitempty"`
}

// ToolResultBlock A tool result block that contains the results for a tool request that the
Expand Down
39 changes: 29 additions & 10 deletions internal/extproc/translator/openai_awsbedrock.go
Original file line number Diff line number Diff line change
Expand Up @@ -252,11 +252,12 @@ func (o *openAIToAWSBedrockTranslatorV1ChatCompletion) openAIMessageToBedrockMes
openAiMessage *openai.ChatCompletionAssistantMessageParam, role string,
) (*awsbedrock.Message, error) {
var bedrockMessage *awsbedrock.Message
contentBlocks := make([]*awsbedrock.ContentBlock, 1)
contentBlocks := make([]*awsbedrock.ContentBlock, 0)
if openAiMessage.Content.Type == openai.ChatCompletionAssistantMessageParamContentTypeRefusal {
contentBlocks[0] = &awsbedrock.ContentBlock{Text: openAiMessage.Content.Refusal}
} else {
contentBlocks[0] = &awsbedrock.ContentBlock{Text: openAiMessage.Content.Text}
contentBlocks = append(contentBlocks, &awsbedrock.ContentBlock{Text: openAiMessage.Content.Refusal})
} else if openAiMessage.Content.Text != nil {
// TODO: we are sometimes missing the content (should fix)
contentBlocks = append(contentBlocks, &awsbedrock.ContentBlock{Text: openAiMessage.Content.Text})
}
bedrockMessage = &awsbedrock.Message{
Role: role,
Expand Down Expand Up @@ -306,16 +307,34 @@ func (o *openAIToAWSBedrockTranslatorV1ChatCompletion) openAIMessageToBedrockMes
func (o *openAIToAWSBedrockTranslatorV1ChatCompletion) openAIMessageToBedrockMessageRoleTool(
openAiMessage *openai.ChatCompletionToolMessageParam, role string,
) (*awsbedrock.Message, error) {
// Validate and cast the openai content value into bedrock content block
content := make([]*awsbedrock.ToolResultContentBlock, 0)

switch v := openAiMessage.Content.Value.(type) {
case string:
content = []*awsbedrock.ToolResultContentBlock{
{
Text: &v,
},
}
case []openai.ChatCompletionContentPartTextParam:
for _, part := range v {
content = append(content, &awsbedrock.ToolResultContentBlock{
Text: &part.Text,
})
}

default:
return nil, fmt.Errorf("unexpected content type for tool message: %T", openAiMessage.Content.Value)
}

return &awsbedrock.Message{
Role: role,
Content: []*awsbedrock.ContentBlock{
{
ToolResult: &awsbedrock.ToolResultBlock{
Content: []*awsbedrock.ToolResultContentBlock{
{
Text: openAiMessage.Content.Value.(*string),
},
},
Content: content,
ToolUseID: &openAiMessage.ToolCallID,
},
},
},
Expand Down Expand Up @@ -554,7 +573,6 @@ func (o *openAIToAWSBedrockTranslatorV1ChatCompletion) ResponseBody(respHeaders
if err = json.NewDecoder(body).Decode(&bedrockResp); err != nil {
return nil, nil, tokenUsage, fmt.Errorf("failed to unmarshal body: %w", err)
}

openAIResp := openai.ChatCompletionResponse{
Object: "chat.completion",
Choices: make([]openai.ChatCompletionResponseChoice, 0),
Expand All @@ -572,6 +590,7 @@ func (o *openAIToAWSBedrockTranslatorV1ChatCompletion) ResponseBody(respHeaders
CompletionTokens: bedrockResp.Usage.OutputTokens,
}
}

// AWS Bedrock does not support N(multiple choices) > 0, so there could be only one choice.
choice := openai.ChatCompletionResponseChoice{
Index: (int64)(0),
Expand Down
80 changes: 77 additions & 3 deletions internal/extproc/translator/openai_awsbedrock_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,14 @@ func TestOpenAIToAWSBedrockTranslatorV1ChatCompletion_RequestBody(t *testing.T)
},
}, Type: openai.ChatMessageRoleUser,
},
{
Value: openai.ChatCompletionToolMessageParam{
Content: openai.StringOrArray{
Value: "Weather in Queens, NY is 70F and clear skies.",
},
ToolCallID: "call_6g7a",
}, Type: openai.ChatMessageRoleTool,
},
{
Value: openai.ChatCompletionAssistantMessageParam{
Content: openai.ChatCompletionAssistantMessageParamContent{
Expand Down Expand Up @@ -127,6 +135,21 @@ func TestOpenAIToAWSBedrockTranslatorV1ChatCompletion_RequestBody(t *testing.T)
},
},
},
{
Role: openai.ChatMessageRoleUser,
Content: []*awsbedrock.ContentBlock{
{
ToolResult: &awsbedrock.ToolResultBlock{
ToolUseID: ptr.To("call_6g7a"),
Content: []*awsbedrock.ToolResultContentBlock{
{
Text: ptr.To("Weather in Queens, NY is 70F and clear skies."),
},
},
},
},
},
},
{
Role: openai.ChatMessageRoleAssistant,
Content: []*awsbedrock.ContentBlock{
Expand Down Expand Up @@ -902,7 +925,7 @@ func TestOpenAIToAWSBedrockTranslatorV1ChatCompletion_ResponseBody(t *testing.T)
Index: 0,
Message: openai.ChatCompletionResponseChoiceMessage{
Content: ptr.To("response"),
Role: "assistant",
Role: awsbedrock.ConversationRoleAssistant,
},
FinishReason: openai.ChatCompletionChoicesFinishReasonStop,
},
Expand Down Expand Up @@ -993,6 +1016,57 @@ func TestOpenAIToAWSBedrockTranslatorV1ChatCompletion_ResponseBody(t *testing.T)
},
},
},
{
name: "merge content",
input: awsbedrock.ConverseResponse{
Usage: &awsbedrock.TokenUsage{
InputTokens: 10,
OutputTokens: 20,
TotalTokens: 30,
},
Output: &awsbedrock.ConverseOutput{
Message: awsbedrock.Message{
Role: awsbedrock.ConversationRoleAssistant,
Content: []*awsbedrock.ContentBlock{
{Text: ptr.To("response")},
{ToolUse: &awsbedrock.ToolUseBlock{
Name: "exec_python_code",
ToolUseID: "call_6g7a",
Input: map[string]interface{}{"code_block": "from playwright.sync_api import sync_playwright\n"},
}},
},
},
},
},
output: openai.ChatCompletionResponse{
Object: "chat.completion",
Usage: openai.ChatCompletionResponseUsage{
TotalTokens: 30,
PromptTokens: 10,
CompletionTokens: 20,
},
Choices: []openai.ChatCompletionResponseChoice{
{
Index: 0,
Message: openai.ChatCompletionResponseChoiceMessage{
Content: ptr.To("response"),
Role: awsbedrock.ConversationRoleAssistant,
ToolCalls: []openai.ChatCompletionMessageToolCallParam{
{
ID: "call_6g7a",
Function: openai.ChatCompletionMessageToolCallFunctionParam{
Name: "exec_python_code",
Arguments: "{\"code_block\":\"from playwright.sync_api import sync_playwright\\n\"}",
},
Type: openai.ChatCompletionMessageToolCallTypeFunction,
},
},
},
FinishReason: openai.ChatCompletionChoicesFinishReasonStop,
},
},
},
},
}

for _, tt := range tests {
Expand Down Expand Up @@ -1173,14 +1247,14 @@ func TestOpenAIToAWSBedrockTranslator_convertEvent(t *testing.T) {
{
name: "role",
in: awsbedrock.ConverseStreamEvent{
Role: ptrOf("assistant"),
Role: ptrOf(awsbedrock.ConversationRoleAssistant),
},
out: &openai.ChatCompletionResponseChunk{
Object: "chat.completion.chunk",
Choices: []openai.ChatCompletionResponseChunkChoice{
{
Delta: &openai.ChatCompletionResponseChunkChoiceDelta{
Role: "assistant",
Role: awsbedrock.ConversationRoleAssistant,
Content: &emptyString,
},
},
Expand Down
124 changes: 84 additions & 40 deletions tests/extproc/real_providers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@ import (
"bufio"
"bytes"
"cmp"
"context"
"encoding/json"
"fmt"
"os"
"strings"
"testing"
"time"

Expand Down Expand Up @@ -187,49 +189,91 @@ func TestWithRealProviders(t *testing.T) {
}
})

t.Run("Bedrock calls tool get_weather function", func(t *testing.T) {
cc.maybeSkip(t, requiredCredentialAWS)
t.Run("Bedrock uses tool in response", func(t *testing.T) {
client := openai.NewClient(option.WithBaseURL(listenerAddress + "/v1/"))
require.Eventually(t, func() bool {
chatCompletion, err := client.Chat.Completions.New(t.Context(), openai.ChatCompletionNewParams{
Messages: openai.F([]openai.ChatCompletionMessageParamUnion{
openai.UserMessage("What is the weather like in Paris today?"),
}),
Tools: openai.F([]openai.ChatCompletionToolParam{
{
Type: openai.F(openai.ChatCompletionToolTypeFunction),
Function: openai.F(openai.FunctionDefinitionParam{
Name: openai.String("get_weather"),
Description: openai.String("Get weather at the given location"),
Parameters: openai.F(openai.FunctionParameters{
"type": "object",
"properties": map[string]interface{}{
"location": map[string]string{
"type": "string",
},
},
"required": []string{"location"},
}),
for _, tc := range []realProvidersTestCase{
{name: "aws-bedrock", modelName: "us.anthropic.claude-3-5-sonnet-20240620-v1:0", required: requiredCredentialAWS}, // This will go to "aws-bedrock" using credentials file.
} {
t.Run(tc.modelName, func(t *testing.T) {
cc.maybeSkip(t, tc.required)
require.Eventually(t, func() bool {
// Step 1: Initial tool call request
question := "What is the weather in New York City?"
params := openai.ChatCompletionNewParams{
Messages: openai.F([]openai.ChatCompletionMessageParamUnion{
openai.UserMessage(question),
}),
Tools: openai.F([]openai.ChatCompletionToolParam{
{
Type: openai.F(openai.ChatCompletionToolTypeFunction),
Function: openai.F(openai.FunctionDefinitionParam{
Name: openai.String("get_weather"),
Description: openai.String("Get weather at the given location"),
Parameters: openai.F(openai.FunctionParameters{
"type": "object",
"properties": map[string]interface{}{
"location": map[string]string{
"type": "string",
},
},
"required": []string{"location"},
}),
}),
},
}),
},
}),
Model: openai.F("us.anthropic.claude-3-5-sonnet-20240620-v1:0"),
Seed: openai.Int(0),
Model: openai.F(tc.modelName),
}
completion, err := client.Chat.Completions.New(context.Background(), params)
if err != nil {
t.Logf("error: %v", err)
return false
}
// Step 2: Verify tool call
toolCalls := completion.Choices[0].Message.ToolCalls
if len(toolCalls) == 0 {
t.Logf("Expected tool call from completion result but got none")
return false
}
// Step 3: Simulate the tool returning a response, add the tool response to the params, and check the second response
params.Messages.Value = append(params.Messages.Value, completion.Choices[0].Message)
getWeatherCalled := false
for _, toolCall := range toolCalls {
if toolCall.Function.Name == "get_weather" {
getWeatherCalled = true
// Extract the location from the function call arguments
var args map[string]interface{}
if argErr := json.Unmarshal([]byte(toolCall.Function.Arguments), &args); argErr != nil {
t.Logf("Error unmarshalling the function arguments: %v", argErr)
}
location := args["location"].(string)
if location != "New York City" {
t.Logf("Expected location to be New York City but got %s", location)
}
// Simulate getting weather data
weatherData := "Sunny, 25°C"
params.Messages.Value = append(params.Messages.Value, openai.ToolMessage(toolCall.ID, weatherData))
t.Logf("Appended tool message: %v", openai.ToolMessage(toolCall.ID, weatherData)) // Debug log
}
}
if getWeatherCalled == false {
t.Logf("get_weather tool not specified in chat completion response")
return false
}

secondChatCompletion, err := client.Chat.Completions.New(context.Background(), params)
if err != nil {
t.Logf("error during second response: %v", err)
return false
}

// Step 4: Verify that the second response is correct
completionResult := secondChatCompletion.Choices[0].Message.Content
t.Logf("content of completion response using tool: %s", secondChatCompletion.Choices[0].Message.Content)
return strings.Contains(completionResult, "New York City") && strings.Contains(completionResult, "sunny") && strings.Contains(completionResult, "25°C")
}, 60*time.Second, 4*time.Second)
})
if err != nil {
t.Logf("error: %v", err)
return false
}
returnsToolCall := false
for _, choice := range chatCompletion.Choices {
t.Logf("choice content: %s", choice.Message.Content)
t.Logf("finish reason: %s", choice.FinishReason)
t.Logf("choice toolcall: %v", choice.Message.ToolCalls)
if choice.FinishReason == openai.ChatCompletionChoicesFinishReasonToolCalls {
returnsToolCall = true
}
}
return returnsToolCall
}, 30*time.Second, 2*time.Second)
}
})

// Models are served by the extproc filter as a direct response so this can run even if the
Expand Down

0 comments on commit 74159c2

Please sign in to comment.