diff --git a/openhands/core/config.py b/openhands/core/config.py index 634eb9d20983..4578b74cc5ad 100644 --- a/openhands/core/config.py +++ b/openhands/core/config.py @@ -52,6 +52,7 @@ class LLMConfig: output_cost_per_token: The cost per output token. This will available in logs for the user to check. ollama_base_url: The base URL for the OLLAMA API. drop_params: Drop any unmapped (unsupported) params without causing an exception. + enable_cache: Whether to enable caching. """ model: str = 'gpt-4o' @@ -80,6 +81,7 @@ class LLMConfig: ollama_base_url: str | None = None message_summary_trunc_tokens_frac: float = 0.75 drop_params: bool | None = None + enable_cache: bool = True def defaults_to_dict(self) -> dict: """Serialize fields to a dict for the frontend, including type hints, defaults, and whether it's optional.""" diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py index 60ce6157f247..20c5f0f42bdd 100644 --- a/openhands/llm/llm.py +++ b/openhands/llm/llm.py @@ -12,6 +12,7 @@ import litellm from litellm import completion as litellm_completion from litellm import completion_cost as litellm_completion_cost +from litellm.caching import Cache from litellm.exceptions import ( APIConnectionError, ContentPolicyViolationError, @@ -38,6 +39,7 @@ from openhands.core.metrics import Metrics message_separator = '\n\n----------\n\n' +litellm.cache = Cache() class LLM(CondenserMixin): @@ -124,6 +126,7 @@ def __init__( timeout=self.config.timeout, temperature=self.config.temperature, top_p=self.config.top_p, + caching=self.config.enable_cache, ) def attempt_on_error(retry_state):