Skip to content

Commit

Permalink
[OPIK-886] [SDK] Support for experiment multi-prompt version link (#1185
Browse files Browse the repository at this point in the history
)

* add helpers

* update build_metadata_and_prompt_versions helper func

* add new `prompts` argument to create_experiment()

* add new `prompts` argument to evaluate()

* fix linter warnings

* add/fix tests

* remove redundant code

* rename `check_prompt_args`

* print diff during assertion

* rename tests
  • Loading branch information
japdubengsub authored Feb 4, 2025
1 parent 4b5e746 commit 82abdfd
Show file tree
Hide file tree
Showing 11 changed files with 349 additions and 100 deletions.
8 changes: 6 additions & 2 deletions sdks/python/src/opik/api_objects/experiment/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
from .experiment import Experiment
from .helpers import build_metadata_and_prompt_version
from .helpers import build_metadata_and_prompt_versions, handle_prompt_args

__all__ = ["Experiment", "build_metadata_and_prompt_version"]
__all__ = [
"Experiment",
"build_metadata_and_prompt_versions",
"handle_prompt_args",
]
11 changes: 5 additions & 6 deletions sdks/python/src/opik/api_objects/experiment/experiment.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
import logging
import functools
import logging
from typing import List, Optional

from opik.message_processing.batching import sequence_splitter
from opik.rest_api import client as rest_api_client
from opik.rest_api.types import experiment_item as rest_experiment_item
from opik.message_processing.batching import sequence_splitter

from . import experiment_item
from .. import helpers, constants
from .. import constants, helpers
from ...api_objects.prompt import Prompt

LOGGER = logging.getLogger(__name__)
Expand All @@ -20,13 +19,13 @@ def __init__(
name: Optional[str],
dataset_name: str,
rest_client: rest_api_client.OpikApi,
prompt: Optional[Prompt] = None,
prompts: Optional[List[Prompt]] = None,
) -> None:
self._id = id
self._name = name
self._dataset_name = dataset_name
self._rest_client = rest_client
self._prompt = prompt
self._prompts = prompts

@property
def id(self) -> str:
Expand Down
54 changes: 38 additions & 16 deletions sdks/python/src/opik/api_objects/experiment/helpers.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,21 @@
from typing import Optional, Dict, Mapping, Tuple, Any
from .. import prompt
import logging
from opik import jsonable_encoder
from typing import Any, Dict, List, Mapping, Optional, Tuple

from opik import exceptions, jsonable_encoder
from opik.rest_api import OpikApi
from opik.rest_api.types import experiment_public
from opik import exceptions
from .. import prompt

LOGGER = logging.getLogger(__name__)

PromptVersion = Dict[str, str]

def build_metadata_and_prompt_version(
experiment_config: Optional[Dict[str, Any]], prompt: Optional[prompt.Prompt]
) -> Tuple[Optional[Dict[str, Any]], Optional[Dict[str, str]]]:
metadata = None
prompt_version: Optional[Dict[str, str]] = None

def build_metadata_and_prompt_versions(
experiment_config: Optional[Dict[str, Any]],
prompts: Optional[List[prompt.Prompt]],
) -> Tuple[Optional[Dict[str, Any]], Optional[List[PromptVersion]]]:
prompt_versions: Optional[List[PromptVersion]] = None

if experiment_config is None:
experiment_config = {}
Expand All @@ -25,22 +27,26 @@ def build_metadata_and_prompt_version(
)
experiment_config = {}

if prompt is not None and "prompt" in experiment_config:
if prompts is not None and len(prompts) > 0 and "prompts" in experiment_config:
LOGGER.warning(
"The prompt parameter will not be added to experiment since there is already `prompt` specified in experiment_config"
"The `prompts` parameter will not be added to experiment since there is already `prompts` specified in experiment_config"
)
return (experiment_config, None)
return experiment_config, None

if prompts is not None and len(prompts) > 0:
prompt_versions = []
experiment_config["prompts"] = []

if prompt is not None:
prompt_version = {"id": prompt.__internal_api__version_id__}
experiment_config["prompt"] = prompt.prompt
for prompt in prompts:
prompt_versions.append({"id": prompt.__internal_api__version_id__})
experiment_config["prompts"].append(prompt.prompt)

if experiment_config == {}:
return None, None

metadata = jsonable_encoder.jsonable_encoder(experiment_config)

return metadata, prompt_version
return metadata, prompt_versions


def get_experiment_data_by_name(
Expand All @@ -59,3 +65,19 @@ def get_experiment_data_by_name(
for experiment in experiment_page_public.content:
if experiment.name == name:
return experiment


def handle_prompt_args(
prompt: Optional[prompt.Prompt] = None,
prompts: Optional[List[prompt.Prompt]] = None,
) -> Optional[List[prompt.Prompt]]:
if prompts is not None and len(prompts) > 0 and prompt is not None:
LOGGER.warning(
"Arguments `prompt` and `prompts` are mutually exclusive, `prompts` will be used`."
)
elif prompt is not None:
prompts = [prompt]
elif prompts is not None and len(prompts) == 0:
prompts = None

return prompts
18 changes: 13 additions & 5 deletions sdks/python/src/opik/api_objects/opik_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -497,6 +497,7 @@ def create_experiment(
name: Optional[str] = None,
experiment_config: Optional[Dict[str, Any]] = None,
prompt: Optional[Prompt] = None,
prompts: Optional[List[Prompt]] = None,
) -> experiment.Experiment:
"""
Creates a new experiment using the given dataset name and optional parameters.
Expand All @@ -505,31 +506,38 @@ def create_experiment(
dataset_name: The name of the dataset to associate with the experiment.
name: The optional name for the experiment. If None, a generated name will be used.
experiment_config: Optional experiment configuration parameters. Must be a dictionary if provided.
prompt: Prompt object to associate with the experiment.
prompt: Prompt object to associate with the experiment. Deprecated, use `prompts` argument instead.
prompts: List of Prompt objects to associate with the experiment.
Returns:
experiment.Experiment: The newly created experiment object.
"""
id = helpers.generate_id()

metadata, prompt_version = experiment.build_metadata_and_prompt_version(
experiment_config=experiment_config, prompt=prompt
checked_prompts = experiment_helpers.handle_prompt_args(
prompt=prompt,
prompts=prompts,
)

metadata, prompt_versions = experiment.build_metadata_and_prompt_versions(
experiment_config=experiment_config,
prompts=checked_prompts,
)

self._rest_client.experiments.create_experiment(
name=name,
dataset_name=dataset_name,
id=id,
metadata=metadata,
prompt_version=prompt_version,
prompt_versions=prompt_versions,
)

experiment_ = experiment.Experiment(
id=id,
name=name,
dataset_name=dataset_name,
rest_client=self._rest_client,
prompt=prompt,
prompts=checked_prompts,
)

return experiment_
Expand Down
38 changes: 28 additions & 10 deletions sdks/python/src/opik/evaluation/evaluator.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
from typing import List, Dict, Any, Optional, Union, Callable
import logging
import time
from typing import Any, Callable, Dict, List, Optional, Union

from .types import LLMTask
from . import asyncio_support, evaluation_result, report, scorer, scores_logger, utils
from .metrics import base_metric
from .models import base_model, models_factory
from .types import LLMTask
from .. import Prompt
from ..api_objects.prompt import prompt_template
from ..api_objects.dataset import dataset
from ..api_objects import opik_client
from . import scorer, scores_logger, report, evaluation_result, utils, asyncio_support
from ..api_objects.dataset import dataset
from ..api_objects.experiment.helpers import handle_prompt_args
from ..api_objects.prompt import prompt_template

LOGGER = logging.getLogger(__name__)


def evaluate(
Expand All @@ -22,6 +26,7 @@ def evaluate(
nb_samples: Optional[int] = None,
task_threads: int = 16,
prompt: Optional[Prompt] = None,
prompts: Optional[List[Prompt]] = None,
scoring_key_mapping: Optional[
Dict[str, Union[str, Callable[[Dict[str, Any]], Any]]]
] = None,
Expand Down Expand Up @@ -59,7 +64,9 @@ def evaluate(
are executed sequentially in the current thread.
Use more than 1 worker if your task object is compatible with sharing across threads.
prompt: Prompt object to link with experiment.
prompt: Prompt object to link with experiment. Deprecated, use `prompts` argument instead.
prompts: A list of Prompt objects to link with experiment.
scoring_key_mapping: A dictionary that allows you to rename keys present in either the dataset item or the task output
so that they match the keys expected by the scoring metrics. For example if you have a dataset item with the following content:
Expand All @@ -69,13 +76,18 @@ def evaluate(
if scoring_metrics is None:
scoring_metrics = []

checked_prompts = handle_prompt_args(
prompt=prompt,
prompts=prompts,
)

client = opik_client.get_client_cached()

experiment = client.create_experiment(
name=experiment_name,
dataset_name=dataset.name,
experiment_config=experiment_config,
prompt=prompt,
prompts=checked_prompts,
)

start_time = time.time()
Expand Down Expand Up @@ -246,13 +258,17 @@ def evaluate_prompt(
experiment_name: name of the experiment.
project_name: The name of the project to log data
experiment_config: configuration of the experiment.
scoring_threads: amount of thread workers to run scoring metrics.
verbose: an integer value that controls evaluation output logs such as summary and tqdm progress bar.
nb_samples: number of samples to evaluate.
verbose: an integer value that controls evaluation output logs such as summary and tqdm progress bar.
task_threads: amount of thread workers to run scoring metrics.
prompt: Prompt object to link with experiment.
"""
if isinstance(model, str):
model = models_factory.get(model_name=model)
Expand All @@ -273,11 +289,13 @@ def evaluate_prompt(

client = opik_client.get_client_cached()

prompts = [prompt] if prompt else None

experiment = client.create_experiment(
name=experiment_name,
dataset_name=dataset.name,
experiment_config=experiment_config,
prompt=prompt,
prompts=prompts,
)

start_time = time.time()
Expand Down
6 changes: 6 additions & 0 deletions sdks/python/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import random
import string
from typing import cast

import mock
Expand Down Expand Up @@ -109,3 +111,7 @@ def fake_backend_without_batching(patch_streamer_without_batching):
mock_construct_online_streamer,
):
yield fake_message_processor_


def random_chars(n: int = 6) -> str:
return "".join(random.choice(string.ascii_letters) for _ in range(n))
11 changes: 3 additions & 8 deletions sdks/python/tests/e2e/conftest.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,15 @@
import random
import string
from typing import Final

import pytest

import opik
import opik.api_objects.opik_client
from .. import testlib
from ..conftest import random_chars

OPIK_E2E_TESTS_PROJECT_NAME: Final[str] = "e2e-tests"


def _random_chars(n: int = 6) -> str:
return "".join(random.choice(string.ascii_letters) for _ in range(n))


@pytest.fixture()
def configure_e2e_tests_env():
with testlib.patch_environ({"OPIK_PROJECT_NAME": OPIK_E2E_TESTS_PROJECT_NAME}):
Expand All @@ -32,11 +27,11 @@ def opik_client(configure_e2e_tests_env, shutdown_cached_client_after_test):

@pytest.fixture
def dataset_name(opik_client: opik.Opik):
name = f"e2e-tests-dataset-{_random_chars()}"
name = f"e2e-tests-dataset-{random_chars()}"
yield name


@pytest.fixture
def experiment_name(opik_client: opik.Opik):
name = f"e2e-tests-experiment-{_random_chars()}"
name = f"e2e-tests-experiment-{random_chars()}"
yield name
Loading

0 comments on commit 82abdfd

Please sign in to comment.