GoogleCloudPlatform · Yuan325 · Aug 19, 2024 · Jul 11, 2024 · Jul 15, 2024 · Jul 17, 2024
@@ -0,0 +1,49 @@
+# Copyright 2024 Google LLC
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+steps:
+  - id: Install dependencies
+    name: python:3.11
+    dir: llm_demo 
+    script: pip install -r requirements.txt -r requirements-test.txt --user
+
+  - id: "Run evaluation service"
+    name: python:3.11
+    dir: llm_demo
+    env: # Set env var expected by tests
+      - "ORCHESTRATION_TYPE=${_ORCHESTRATION_TYPE}"
+      - "RETRIEVAL_EXPERIMENT_NAME=${_RETRIEVAL_EXPERIMENT_NAME}"
+      - "RESPONSE_EXPERIMENT_NAME=${_RESPONSE_EXPERIMENT_NAME}"
+    secretEnv:
+      - CLIENT_ID
+      - BASE_URL
+    script: |
+        #!/usr/bin/env bash
+        python run_evaluation.py
+
+serviceAccount: "projects/$PROJECT_ID/serviceAccounts/evaluation-testing@retrieval-app-testing.iam.gserviceaccount.com" # Necessary for ID token creation
+options:
+  logging: CLOUD_LOGGING_ONLY # Necessary for custom service account
+  dynamic_substitutions: true
+
+substitutions:
+  _ORCHESTRATION_TYPE: "langchain-tools"
+  _RETRIEVAL_EXPERIMENT_NAME: "retrieval-phase-eval-${_PR_NUMBER}"
+  _RESPONSE_EXPERIMENT_NAME:  "response-phase-eval-${_PR_NUMBER}"
+
+availableSecrets:
+  secretManager:
+    - versionName: projects/$PROJECT_ID/secrets/client_id/versions/latest
+      env: CLIENT_ID
+    - versionName: projects/$PROJECT_ID/secrets/retrieval_url/versions/latest
+      env: BASE_URL
@@ -0,0 +1,27 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .eval_golden import goldens
+from .evaluation import (
+    evaluate_response_phase,
+    evaluate_retrieval_phase,
+    run_llm_for_eval,
+)
+
+__ALL__ = [
+    "run_llm_for_eval",
+    "goldens",
+    "evaluate_retrieval_phase",
+    "evaluate_response_phase",
+]
@@ -0,0 +1,315 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from datetime import datetime, timedelta
+from typing import Any, Dict, List, Optional
+
+from pydantic import BaseModel, Field
+from pytz import timezone
+
+
+class ToolCall(BaseModel):
+    """
+    Represents tool call by orchestration.
+    """
+
+    name: str
+    arguments: Dict[str, Any] = Field(
+        default={}, description="Query arguments for tool call"
+    )
+
+
+class EvalData(BaseModel):
+    """
+    Evaluation data model.
+    This model represents the information needed for running rapid evaluation with Vertex AI.
+    """
+
+    category: Optional[str] = Field(default=None, description="Evaluation category")
+    query: Optional[str] = Field(default=None, description="User query")
+    instruction: Optional[str] = Field(
+        default=None, description="Instruction to llm system"
+    )
+    content: Optional[str] = Field(
+        default=None,
+        description="Used in tool call evaluation. Content value is the text output from the model.",
+    )
+    tool_calls: List[ToolCall] = Field(
+        default=[], description="Golden tool call for evaluation"
+    )
+    context: Optional[List[Dict[str, Any] | List[Dict[str, Any]]]] = Field(
+        default=None, description="Context given to llm in order to answer user query"
+    )
+    output: Optional[str] = Field(
+        default=None, description="Golden output for evaluation"
+    )
+    prediction_tool_calls: List[ToolCall] = Field(
+        default=[], description="Tool call output from LLM"
+    )
+    prediction_output: str = Field(default="", description="Final output from LLM")
+    reset: bool = Field(
+        default=True, description="Determine to reset the chat after invoke"
+    )
+
+
+def get_date(day_delta: int):
+    DATE_FORMATTER = "%Y-%m-%d"
+    retrieved_date = datetime.now(timezone("US/Pacific")) + timedelta(days=day_delta)
+    return retrieved_date.strftime(DATE_FORMATTER)
+
+
+goldens = [
+    EvalData(
+        category="Search Airport Tool",
+        query="What is the airport located in San Francisco?",
+        tool_calls=[
+            ToolCall(
+                name="Search Airport",
+                arguments={"country": "United States", "city": "San Francisco"},
+            ),
+        ],
+    ),
+    EvalData(
+        category="Search Airport Tool",
+        query="Tell me more about Denver International Airport?",
+        tool_calls=[
+            ToolCall(
+                name="Search Airport",
+                arguments={
+                    "country": "United States",
+                    "city": "Denver",
+                    "name": "Denver International Airport",
+                },
+            ),
+        ],
+    ),
+    EvalData(
+        category="Search Flights By Flight Number Tool",
+        query="What is the departure gate for flight CY 922?",
+        tool_calls=[
+            ToolCall(
+                name="Search Flights By Flight Number",
+                arguments={
+                    "airline": "CY",
+                    "flight_number": "922",
+                },
+            ),
+        ],
+    ),
+    EvalData(
+        category="Search Flights By Flight Number Tool",
+        query="What is flight CY 888 flying to?",
+        tool_calls=[
+            ToolCall(
+                name="Search Flights By Flight Number",
+                arguments={
+                    "airline": "CY",
+                    "flight_number": "888",
+                },
+            ),
+        ],
+    ),
+    EvalData(
+        category="List Flights Tool",
+        query="What flights are headed to JFK tomorrow?",
+        tool_calls=[
+            ToolCall(
+                name="List Flights",
+                arguments={
+                    "arrival_airport": "JFK",
+                    "date": f"{get_date(1)}",
+                },
+            ),
+        ],
+    ),
+    EvalData(
+        category="List Flights Tool",
+        query="Is there any flight from SFO to DEN?",
+        output="I will need the date to retrieve relevant flights.",
+    ),
+    EvalData(
+        category="Search Amenities Tool",
+        query="Are there any luxury shops?",
+        tool_calls=[
+            ToolCall(
+                name="Search Amenities",
+                arguments={
+                    "query": "luxury shops",
+                },
+            ),
+        ],
+    ),
+    EvalData(
+        category="Search Amenities Tool",
+        query="Where can I get coffee near gate A6?",
+        tool_calls=[
+            ToolCall(
+                name="Search Amenities",
+                arguments={
+                    "query": "coffee near gate A6",
+                },
+            ),
+        ],
+    ),
+    EvalData(
+        category="Search Policies Tool",
+        query="What is the flight cancellation policy?",
+        tool_calls=[
+            ToolCall(
+                name="Search Policies",
+                arguments={
+                    "query": "flight cancellation policy",
+                },
+            ),
+        ],
+    ),
+    EvalData(
+        category="Search Policies Tool",
+        query="How many checked bags can I bring?",
+        tool_calls=[
+            ToolCall(
+                name="Search Policies",
+                arguments={
+                    "query": "checked baggage allowance",
+                },
+            ),
+        ],
+    ),
+    EvalData(
+        category="Insert Ticket",
+        query="I would like to book flight CY 922 departing from SFO on 2024-01-01 at 6:38am.",
+        tool_calls=[
+            ToolCall(
+                name="Insert Ticket",
+                arguments={
+                    "airline": "CY",
+                    "flight_number": "922",
+                    "departure_airport": "SFO",
+                    "departure_time": "2024-01-01 06:38:00",
+                },
+            ),
+        ],
+    ),
+    EvalData(
+        category="Insert Ticket",
+        query="What flights are headed from SFO to DEN on January 1 2024?",
+        tool_calls=[
+            ToolCall(
+                name="List Flights",
+                arguments={
+                    "departure_airport": "SFO",
+                    "arrival_airport": "DEN",
+                    "date": "2024-01-01",
+                },
+            ),
+        ],
+        reset=False,
+    ),
+    EvalData(
+        category="Insert Ticket",
+        query="I would like to book the first flight.",
+        tool_calls=[
+            ToolCall(
+                name="Insert Ticket",
+                arguments={
+                    "airline": "UA",
+                    "flight_number": "1532",
+                    "departure_airport": "SFO",
+                    "arrival_airport": "DEN",
+                    "departure_time": "2024-01-01 05:50:00",
+                    "arrival_time": "2024-01-01 09:23:00",
+                },
+            ),
+        ],
+    ),
+    EvalData(
+        category="List Tickets",
+        query="Do I have any tickets?",
+        tool_calls=[ToolCall(name="List Tickets")],
+    ),
+    EvalData(
+        category="List Tickets",
+        query="When is my next flight?",
+        tool_calls=[ToolCall(name="List Tickets")],
+    ),
+    EvalData(
+        category="Airline Related Question",
+        query="What is Cymbal Air?",
+        output="Cymbal Air is a passenger airline offering convenient flights to many cities around the world from its hub in San Francisco.",
+    ),
+    EvalData(
+        category="Airline Related Question",
+        query="Where is the hub of cymbal air?",
+        output="The hub of Cymbal Air is in San Francisco.",
+    ),
+    EvalData(
+        category="Assistant Related Question",
+        query="What can you help me with?",
+        output="I can help to book flights and answer a wide range of questions pertaining to travel on Cymbal Air, as well as amenities of San Francisco Airport.",
+    ),
+    EvalData(
+        category="Assistant Related Question",
+        query="Can you help me book tickets?",
+        output="Yes, I can help with several tools such as search airports, list tickets, book tickets.",
+    ),
+    EvalData(
+        category="Out-Of-Context Question",
+        query="Can you help me solve math problems?",
+        output="Sorry, I am not given the tools for this.",
+    ),
+    EvalData(
+        category="Out-Of-Context Question",
+        query="Who is the CEO of Google?",
+        output="Sorry, I am not given the tools for this.",
+    ),
+    EvalData(
+        category="Multitool Selections",
+        query="Where can I get a snack near the gate for flight CY 352?",
+        tool_calls=[
+            ToolCall(
+                name="Search Flights By Flight Number",
+                arguments={
+                    "airline": "CY",
+                    "flight_number": "352",
+                },
+            ),
+            ToolCall(
+                name="Search Amenities",
+                arguments={
+                    "query": "snack near gate A2.",
+                },
+            ),
+        ],
+    ),
+    EvalData(
+        category="Multitool Selections",
+        query="What are some flights from SFO to Chicago tomorrow?",
+        tool_calls=[
+            ToolCall(
+                name="Search Airport",
+                arguments={
+                    "city": "Chicago",
+                },
+            ),
+            ToolCall(
+                name="List Flights",
+                arguments={
+                    "departure_airport": "SFO",
+                    "arrival_airport": "ORD",
+                    "date": f"{get_date(1)}",
+                },
+            ),
+        ],
+    ),
+]