Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Multiple request rates #2

Merged
merged 26 commits into from
Jul 24, 2024
Merged
Show file tree
Hide file tree
Changes from 23 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
652fd68
add ability to pass in multiple request rates
DaltheCow Jun 19, 2024
bfcc329
running tests command is added to the Makefile
parfeniukink Jun 26, 2024
44f8e41
rename to fixed rate profile generator
DaltheCow Jul 2, 2024
d8469cb
increment rate index
DaltheCow Jul 2, 2024
eb03706
combine constanct and poisson Profile creation
DaltheCow Jul 2, 2024
9a0c582
throw error if user passes in rate in synchronous mode
DaltheCow Jul 2, 2024
fb1ebcb
refactor to better handle creating profile generator
DaltheCow Jul 2, 2024
bf72422
test base ProfileGenerator class
DaltheCow Jul 3, 2024
98f79c4
update fixed rate profile generator, add tests, set up initial execut…
DaltheCow Jul 9, 2024
c0284a3
test executor run method
DaltheCow Jul 10, 2024
d915515
test sweep profile generator
DaltheCow Jul 11, 2024
993610a
merge main
DaltheCow Jul 15, 2024
ffc0e7b
fix test indentation
DaltheCow Jul 15, 2024
b41b74a
fix some improper imports
DaltheCow Jul 17, 2024
c78d2ba
Merge branch 'main' into multiple-request-rates
DaltheCow Jul 18, 2024
98ff5e4
merge main, handle merge conflicts minus the tests
DaltheCow Jul 23, 2024
21774cd
wip
DaltheCow Jul 23, 2024
1d3a7cf
wip
DaltheCow Jul 23, 2024
79c03ce
fix all broken tests
DaltheCow Jul 23, 2024
8833ae3
run make style
DaltheCow Jul 23, 2024
2ec371b
fix linting issues
DaltheCow Jul 23, 2024
2361916
remove unused import
DaltheCow Jul 23, 2024
ca8e553
fix type issue
DaltheCow Jul 23, 2024
ce460de
pytest.init_options section is restored
Jul 24, 2024
3062fba
format pyproject.toml file
Jul 24, 2024
7524977
Merge branch 'main' into multiple-request-rates
markurtz Jul 24, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 0 additions & 9 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -99,12 +99,3 @@ ignore_missing_imports=true
line-length = 88
exclude = ["build", "dist", "env", ".venv"]
lint.select = ["E", "F", "W"]


[tool.pytest.ini_options]
addopts = '-s -vvv --cache-clear --cov-report=term-missing --cov --cov-fail-under=75'
markers = [
"smoke: quick tests to check basic functionality",
"sanity: detailed tests to ensure major functions work correctly",
"regression: tests to ensure that new changes do not break existing functionality"
]
8 changes: 6 additions & 2 deletions src/guidellm/executor/__init__.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,21 @@
from .executor import Executor
from .profile_generator import (
FixedRateProfileGenerator,
Profile,
ProfileGenerationMode,
ProfileGenerator,
SingleProfileGenerator,
SweepProfileGenerator,
rate_type_to_load_gen_mode,
rate_type_to_profile_mode,
)

__all__ = [
"rate_type_to_load_gen_mode",
"rate_type_to_profile_mode",
"Executor",
"ProfileGenerationMode",
"Profile",
"ProfileGenerator",
"SingleProfileGenerator",
"FixedRateProfileGenerator",
"SweepProfileGenerator",
]
2 changes: 1 addition & 1 deletion src/guidellm/executor/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def __init__(
self,
backend: Backend,
request_generator: RequestGenerator,
profile_mode: ProfileGenerationMode = ProfileGenerationMode.SINGLE,
profile_mode: ProfileGenerationMode = ProfileGenerationMode.SWEEP,
profile_args: Optional[Dict[str, Any]] = None,
max_requests: Optional[int] = None,
max_duration: Optional[float] = None,
Expand Down
72 changes: 48 additions & 24 deletions src/guidellm/executor/profile_generator.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from abc import ABC, abstractmethod
from dataclasses import dataclass
from enum import Enum
from typing import Dict, Optional, Type, Union
from typing import Dict, List, Optional, Type, Union

import numpy

Expand All @@ -12,16 +12,30 @@
"ProfileGenerationMode",
"Profile",
"ProfileGenerator",
"SingleProfileGenerator",
"FixedRateProfileGenerator",
"SweepProfileGenerator",
]

rate_type_to_load_gen_mode = {
"synchronous": LoadGenerationMode.SYNCHRONOUS,
"constant": LoadGenerationMode.CONSTANT,
"poisson": LoadGenerationMode.POISSON,
}


class ProfileGenerationMode(Enum):
SINGLE = "single"
FIXED_RATE = "fixed_rate"
SWEEP = "sweep"


rate_type_to_profile_mode = {
"synchronous": ProfileGenerationMode.FIXED_RATE,
"constant": ProfileGenerationMode.FIXED_RATE,
"poisson": ProfileGenerationMode.FIXED_RATE,
"sweep": ProfileGenerationMode.SWEEP,
}


@dataclass
class Profile:
load_gen_mode: LoadGenerationMode
Expand Down Expand Up @@ -55,34 +69,44 @@ def next(self, current_report: TextGenerationBenchmarkReport) -> Optional[Profil
pass


@ProfileGenerator.register(ProfileGenerationMode.SINGLE)
class SingleProfileGenerator(ProfileGenerator):
def __init__(self, rate: float, rate_type: LoadGenerationMode):
super().__init__(ProfileGenerationMode.SINGLE)
self._rate: float = rate
self._rate_type: LoadGenerationMode = rate_type
@ProfileGenerator.register(ProfileGenerationMode.FIXED_RATE)
class FixedRateProfileGenerator(ProfileGenerator):
def __init__(
self,
load_gen_mode: Optional[LoadGenerationMode],
rates: Optional[List[float]] = None,
**kwargs,
):
super().__init__(ProfileGenerationMode.FIXED_RATE)
if load_gen_mode == LoadGenerationMode.SYNCHRONOUS and rates and len(rates) > 0:
raise ValueError("custom rates are not supported in synchronous mode")
self._rates: Optional[List[float]] = rates
self._load_gen_mode = load_gen_mode
self._generated: bool = False
self._rate_index: int = 0

def next(self, current_report: TextGenerationBenchmarkReport) -> Optional[Profile]:
if self._generated:
return None

self._generated = True

if self._rate_type == LoadGenerationMode.CONSTANT:
return Profile(
load_gen_mode=LoadGenerationMode.CONSTANT, load_gen_rate=self._rate
)
elif self._rate_type == LoadGenerationMode.SYNCHRONOUS:
if self._load_gen_mode == LoadGenerationMode.SYNCHRONOUS:
if self._generated:
return None
self._generated = True
return Profile(
load_gen_mode=LoadGenerationMode.SYNCHRONOUS, load_gen_rate=None
)
elif self._rate_type == LoadGenerationMode.POISSON:
return Profile(
load_gen_mode=LoadGenerationMode.POISSON, load_gen_rate=self._rate
)
elif self._load_gen_mode in {
LoadGenerationMode.CONSTANT,
LoadGenerationMode.POISSON,
}:
if self._rates:
if self._rate_index >= len(self._rates):
return None
current_rate = self._rates[self._rate_index]
self._rate_index += 1
return Profile(
load_gen_mode=self._load_gen_mode, load_gen_rate=current_rate
)

raise ValueError(f"Invalid rate type: {self._rate_type}")
raise ValueError(f"Invalid rate type: {self._load_gen_mode}")


@ProfileGenerator.register(ProfileGenerationMode.SWEEP)
Expand Down
17 changes: 13 additions & 4 deletions src/guidellm/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@

from guidellm.backend import Backend
from guidellm.core import TextGenerationBenchmarkReport
from guidellm.executor import Executor
from guidellm.executor import (
Executor,
rate_type_to_load_gen_mode,
rate_type_to_profile_mode,
)
from guidellm.request import (
EmulatedRequestGenerator,
FileRequestGenerator,
Expand Down Expand Up @@ -45,8 +49,9 @@
@click.option(
"--rate",
type=float,
default="1.0",
default=[1.0],
help="Rate to use for constant and poisson rate types",
multiple=True,
)
@click.option(
"--num-seconds",
Expand Down Expand Up @@ -106,12 +111,16 @@ def main(
else:
raise ValueError(f"Unknown data type: {data_type}")

profile_mode = rate_type_to_profile_mode.get(rate_type)
load_gen_mode = rate_type_to_load_gen_mode.get(rate_type, None)
if not profile_mode or not load_gen_mode:
raise ValueError("Invalid rate type")
# Create executor
executor = Executor(
request_generator=request_generator,
backend=backend,
profile_mode=rate_type,
profile_args={"rate_type": rate_type, "rate": rate},
profile_mode=profile_mode,
profile_args={"load_gen_mode": load_gen_mode, "rates": rate},
max_requests=num_requests,
max_duration=num_seconds,
)
Expand Down
2 changes: 1 addition & 1 deletion src/guidellm/scheduler/load_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class LoadGenerationMode(str, Enum):

"""

SYNCHRONOUS = "sync"
SYNCHRONOUS = "synchronous"
CONSTANT = "constant"
POISSON = "poisson"

Expand Down
29 changes: 14 additions & 15 deletions tests/integration/executor/test_report_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,9 @@ def test_executor_openai_single_report_generation_sync_mode(
request_genrator = dummy.services.TestRequestGenerator(
tokenizer="bert-base-uncased"
)
profile_generation_mode = ProfileGenerationMode.SINGLE
profile_generation_mode = ProfileGenerationMode.FIXED_RATE
profile_generator_kwargs = {
"rate_type": LoadGenerationMode.SYNCHRONOUS,
"rate": 1.0,
"load_gen_mode": LoadGenerationMode.SYNCHRONOUS,
}

executor = Executor(
Expand Down Expand Up @@ -55,10 +54,10 @@ def test_executor_openai_single_report_generation_constant_mode_infinite(
request_genrator = dummy.services.TestRequestGenerator(
tokenizer="bert-base-uncased"
)
profile_generation_mode = ProfileGenerationMode.SINGLE
profile_generation_mode = ProfileGenerationMode.FIXED_RATE
profile_generator_kwargs = {
"rate_type": LoadGenerationMode.CONSTANT,
"rate": 1.0,
"load_gen_mode": LoadGenerationMode.CONSTANT,
"rates": [1.0],
}

executor = Executor(
Expand Down Expand Up @@ -88,10 +87,10 @@ def test_executor_openai_single_report_generation_constant_mode_limited(
request_genrator = dummy.services.TestRequestGenerator(
tokenizer="bert-base-uncased"
)
profile_generation_mode = ProfileGenerationMode.SINGLE
profile_generation_mode = ProfileGenerationMode.FIXED_RATE
profile_generator_kwargs = {
"rate_type": LoadGenerationMode.CONSTANT,
"rate": 1.0,
"load_gen_mode": LoadGenerationMode.CONSTANT,
"rates": [1.0],
}

executor = Executor(
Expand Down Expand Up @@ -124,10 +123,10 @@ def test_executor_openai_single_report_generation_constant_mode_failed(
request_genrator = dummy.services.TestRequestGenerator(
tokenizer="bert-base-uncased"
)
profile_generation_mode = ProfileGenerationMode.SINGLE
profile_generation_mode = ProfileGenerationMode.FIXED_RATE
profile_generator_kwargs = {
"rate_type": LoadGenerationMode.CONSTANT,
"rate": 1.0,
"load_gen_mode": LoadGenerationMode.CONSTANT,
"rates": [1.0],
}

executor = Executor(
Expand All @@ -153,10 +152,10 @@ def test_executor_openai_single_report_generation_constant_mode_cancelled_report
request_genrator = dummy.services.TestRequestGenerator(
tokenizer="bert-base-uncased"
)
profile_generation_mode = ProfileGenerationMode.SINGLE
profile_generation_mode = ProfileGenerationMode.FIXED_RATE
profile_generator_kwargs = {
"rate_type": LoadGenerationMode.CONSTANT,
"rate": 1.0,
"load_gen_mode": LoadGenerationMode.CONSTANT,
"rates": [1.0],
}

executor = Executor(
Expand Down
87 changes: 87 additions & 0 deletions tests/unit/executor/test_executor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
from unittest.mock import MagicMock, patch

import pytest

from guidellm.backend.base import Backend
from guidellm.executor import Executor, Profile, ProfileGenerator
from guidellm.executor.profile_generator import ProfileGenerationMode
from guidellm.request.base import RequestGenerator
from guidellm.scheduler import LoadGenerationMode


def test_executor_creation():
mock_request_generator = MagicMock(spec=RequestGenerator)
mock_backend = MagicMock(spec=Backend)
profile_mode = ProfileGenerationMode.SWEEP
profile_args = None
max_requests = None
max_duration = None
executor = Executor(
mock_backend,
mock_request_generator,
profile_mode,
profile_args,
max_requests,
max_duration,
)
assert executor.request_generator == mock_request_generator
assert executor.backend == mock_backend
assert executor.max_requests == max_requests
assert executor.max_duration == max_duration


@pytest.fixture
def mock_request_generator():
return MagicMock(spec=RequestGenerator)


@pytest.fixture
def mock_backend():
return MagicMock(spec=Backend)


@pytest.fixture
def mock_scheduler():
with patch("guidellm.executor.executor.Scheduler") as MockScheduler:
yield MockScheduler


def test_executor_run(mock_request_generator, mock_backend, mock_scheduler):
mock_profile_generator = MagicMock(spec=ProfileGenerator)
profiles = [
Profile(load_gen_mode=LoadGenerationMode.CONSTANT, load_gen_rate=1.0),
Profile(load_gen_mode=LoadGenerationMode.CONSTANT, load_gen_rate=2.0),
None,
]
mock_profile_generator.next.side_effect = profiles

with patch(
"guidellm.executor.executor.ProfileGenerator.create",
return_value=mock_profile_generator,
):
executor = Executor(
request_generator=mock_request_generator,
backend=mock_backend,
profile_mode=ProfileGenerationMode.FIXED_RATE,
profile_args={
"load_gen_mode": LoadGenerationMode.CONSTANT,
"rates": [1.0, 2.0],
},
max_requests=10,
max_duration=100,
)

mock_benchmark = MagicMock()
mock_scheduler.return_value.run.return_value = mock_benchmark

report = executor.run()

assert mock_scheduler.call_count == 2
assert len(report.benchmarks) == 2
assert report.benchmarks[0] == mock_benchmark
assert report.benchmarks[1] == mock_benchmark
calls = mock_scheduler.call_args_list
assert calls[0][1]["load_gen_mode"] == LoadGenerationMode.CONSTANT
assert calls[0][1]["load_gen_rate"] == 1.0
assert calls[1][1]["load_gen_mode"] == LoadGenerationMode.CONSTANT
assert calls[1][1]["load_gen_rate"] == 2.0
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import List, Optional

import pytest

from guidellm.core import TextGenerationBenchmark, TextGenerationBenchmarkReport
Expand All @@ -24,12 +26,15 @@ def test_executor_single_profile_generator_benchmark_report(
request_genrator = dummy.services.TestRequestGenerator(
tokenizer="bert-base-uncased"
)
profile_generator_kwargs = {"rate_type": load_gen_mode, "rate": 1.0}
rates: Optional[List[float]] = [1.0]
if load_gen_mode == LoadGenerationMode.SYNCHRONOUS:
rates = None
profile_generator_kwargs = {"load_gen_mode": load_gen_mode, "rates": rates}

executor = Executor(
backend=openai_backend_factory(),
request_generator=request_genrator,
profile_mode=ProfileGenerationMode.SINGLE,
profile_mode=ProfileGenerationMode.FIXED_RATE,
profile_args=profile_generator_kwargs,
max_requests=1,
max_duration=None,
Expand Down
Loading