From ccf99b2d78803f001930ef11e7ebefb1ec30d173 Mon Sep 17 00:00:00 2001 From: George Sakkis Date: Tue, 11 Feb 2025 10:11:19 +0200 Subject: [PATCH] Add env_nested_max_split setting (#534) --- docs/index.md | 52 ++++++++++++++++++++++++++++++++++++ pydantic_settings/main.py | 13 +++++++++ pydantic_settings/sources.py | 15 ++++++++--- tests/conftest.py | 5 ++++ tests/test_settings.py | 36 ++++++++++++++++++++++++- 5 files changed, 117 insertions(+), 4 deletions(-) diff --git a/docs/index.md b/docs/index.md index dd2214a6..ca7903c9 100644 --- a/docs/index.md +++ b/docs/index.md @@ -324,6 +324,58 @@ print(Settings().model_dump()) `env_nested_delimiter` can be configured via the `model_config` as shown above, or via the `_env_nested_delimiter` keyword argument on instantiation. +By default environment variables are split by `env_nested_delimiter` into arbitrarily deep nested fields. You can limit +the depth of the nested fields with the `env_nested_max_split` config setting. A common use case this is particularly useful +is for two-level deep settings, where the `env_nested_delimiter` (usually a single `_`) may be a substring of model +field names. For example: + +```bash +# your environment +export GENERATION_LLM_PROVIDER='anthropic' +export GENERATION_LLM_API_KEY='your-api-key' +export GENERATION_LLM_API_VERSION='2024-03-15' +``` + +You could load them into the following settings model: + +```py +from pydantic import BaseModel + +from pydantic_settings import BaseSettings, SettingsConfigDict + + +class LLMConfig(BaseModel): + provider: str = 'openai' + api_key: str + api_type: str = 'azure' + api_version: str = '2023-03-15-preview' + + +class GenerationConfig(BaseSettings): + model_config = SettingsConfigDict( + env_nested_delimiter='_', env_nested_max_split=1, env_prefix='GENERATION_' + ) + + llm: LLMConfig + ... + + +print(GenerationConfig().model_dump()) +""" +{ + 'llm': { + 'provider': 'anthropic', + 'api_key': 'your-api-key', + 'api_type': 'azure', + 'api_version': '2024-03-15', + } +} +""" +``` + +Without `env_nested_max_split=1` set, `GENERATION_LLM_API_KEY` would be parsed as `llm.api.key` instead of `llm.api_key` +and it would raise a `ValidationError`. + Nested environment variables take precedence over the top-level environment variable JSON (e.g. in the example above, `SUB_MODEL__V2` trumps `SUB_MODEL`). diff --git a/pydantic_settings/main.py b/pydantic_settings/main.py index f376361e..e9f119e4 100644 --- a/pydantic_settings/main.py +++ b/pydantic_settings/main.py @@ -38,6 +38,7 @@ class SettingsConfigDict(ConfigDict, total=False): env_file_encoding: str | None env_ignore_empty: bool env_nested_delimiter: str | None + env_nested_max_split: int | None env_parse_none_str: str | None env_parse_enums: bool | None cli_prog_name: str | None @@ -112,6 +113,7 @@ class BaseSettings(BaseModel): _env_file_encoding: The env file encoding, e.g. `'latin-1'`. Defaults to `None`. _env_ignore_empty: Ignore environment variables where the value is an empty string. Default to `False`. _env_nested_delimiter: The nested env values delimiter. Defaults to `None`. + _env_nested_max_split: The nested env values maximum nesting. Defaults to `None`, which means no limit. _env_parse_none_str: The env string value that should be parsed (e.g. "null", "void", "None", etc.) into `None` type(None). Defaults to `None` type(None), which means no parsing should occur. _env_parse_enums: Parse enum field names to values. Defaults to `None.`, which means no parsing should occur. @@ -148,6 +150,7 @@ def __init__( _env_file_encoding: str | None = None, _env_ignore_empty: bool | None = None, _env_nested_delimiter: str | None = None, + _env_nested_max_split: int | None = None, _env_parse_none_str: str | None = None, _env_parse_enums: bool | None = None, _cli_prog_name: str | None = None, @@ -178,6 +181,7 @@ def __init__( _env_file_encoding=_env_file_encoding, _env_ignore_empty=_env_ignore_empty, _env_nested_delimiter=_env_nested_delimiter, + _env_nested_max_split=_env_nested_max_split, _env_parse_none_str=_env_parse_none_str, _env_parse_enums=_env_parse_enums, _cli_prog_name=_cli_prog_name, @@ -232,6 +236,7 @@ def _settings_build_values( _env_file_encoding: str | None = None, _env_ignore_empty: bool | None = None, _env_nested_delimiter: str | None = None, + _env_nested_max_split: int | None = None, _env_parse_none_str: str | None = None, _env_parse_enums: bool | None = None, _cli_prog_name: str | None = None, @@ -270,6 +275,11 @@ def _settings_build_values( if _env_nested_delimiter is not None else self.model_config.get('env_nested_delimiter') ) + env_nested_max_split = ( + _env_nested_max_split + if _env_nested_max_split is not None + else self.model_config.get('env_nested_max_split') + ) env_parse_none_str = ( _env_parse_none_str if _env_parse_none_str is not None else self.model_config.get('env_parse_none_str') ) @@ -333,6 +343,7 @@ def _settings_build_values( case_sensitive=case_sensitive, env_prefix=env_prefix, env_nested_delimiter=env_nested_delimiter, + env_nested_max_split=env_nested_max_split, env_ignore_empty=env_ignore_empty, env_parse_none_str=env_parse_none_str, env_parse_enums=env_parse_enums, @@ -344,6 +355,7 @@ def _settings_build_values( case_sensitive=case_sensitive, env_prefix=env_prefix, env_nested_delimiter=env_nested_delimiter, + env_nested_max_split=env_nested_max_split, env_ignore_empty=env_ignore_empty, env_parse_none_str=env_parse_none_str, env_parse_enums=env_parse_enums, @@ -412,6 +424,7 @@ def _settings_build_values( env_file_encoding=None, env_ignore_empty=False, env_nested_delimiter=None, + env_nested_max_split=None, env_parse_none_str=None, env_parse_enums=None, cli_prog_name=None, diff --git a/pydantic_settings/sources.py b/pydantic_settings/sources.py index 584c7cdd..4d719cc3 100644 --- a/pydantic_settings/sources.py +++ b/pydantic_settings/sources.py @@ -735,6 +735,7 @@ def __init__( case_sensitive: bool | None = None, env_prefix: str | None = None, env_nested_delimiter: str | None = None, + env_nested_max_split: int | None = None, env_ignore_empty: bool | None = None, env_parse_none_str: str | None = None, env_parse_enums: bool | None = None, @@ -745,6 +746,10 @@ def __init__( self.env_nested_delimiter = ( env_nested_delimiter if env_nested_delimiter is not None else self.config.get('env_nested_delimiter') ) + self.env_nested_max_split = ( + env_nested_max_split if env_nested_max_split is not None else self.config.get('env_nested_max_split') + ) + self.maxsplit = (self.env_nested_max_split or 0) - 1 self.env_prefix_len = len(self.env_prefix) self.env_vars = self._load_env_vars() @@ -910,11 +915,13 @@ def explode_env_vars(self, field_name: str, field: FieldInfo, env_vars: Mapping[ ] result: dict[str, Any] = {} for env_name, env_val in env_vars.items(): - if not any(env_name.startswith(prefix) for prefix in prefixes): + try: + prefix = next(prefix for prefix in prefixes if env_name.startswith(prefix)) + except StopIteration: continue # we remove the prefix before splitting in case the prefix has characters in common with the delimiter - env_name_without_prefix = env_name[self.env_prefix_len :] - _, *keys, last_key = env_name_without_prefix.split(self.env_nested_delimiter) + env_name_without_prefix = env_name[len(prefix) :] + *keys, last_key = env_name_without_prefix.split(self.env_nested_delimiter, self.maxsplit) env_var = result target_field: FieldInfo | None = field for key in keys: @@ -964,6 +971,7 @@ def __init__( case_sensitive: bool | None = None, env_prefix: str | None = None, env_nested_delimiter: str | None = None, + env_nested_max_split: int | None = None, env_ignore_empty: bool | None = None, env_parse_none_str: str | None = None, env_parse_enums: bool | None = None, @@ -977,6 +985,7 @@ def __init__( case_sensitive, env_prefix, env_nested_delimiter, + env_nested_max_split, env_ignore_empty, env_parse_none_str, env_parse_enums, diff --git a/tests/conftest.py b/tests/conftest.py index 7a968c57..bb9137f2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -79,6 +79,11 @@ def docs_test_env(): setenv.set('SUB_MODEL__V3', '3') setenv.set('SUB_MODEL__DEEP__V4', 'v4') + # envs for parsing environment variable values example with env_nested_max_split=1 + setenv.set('GENERATION_LLM_PROVIDER', 'anthropic') + setenv.set('GENERATION_LLM_API_KEY', 'your-api-key') + setenv.set('GENERATION_LLM_API_VERSION', '2024-03-15') + yield setenv setenv.clear() diff --git a/tests/test_settings.py b/tests/test_settings.py index 2a6578ba..d63f2b78 100644 --- a/tests/test_settings.py +++ b/tests/test_settings.py @@ -4,7 +4,7 @@ import pathlib import sys import uuid -from datetime import datetime, timezone +from datetime import date, datetime, timezone from enum import IntEnum from pathlib import Path from typing import Any, Callable, Dict, Generic, Hashable, List, Optional, Set, Tuple, Type, TypeVar, Union @@ -398,6 +398,40 @@ class Cfg(BaseSettings): assert Cfg().model_dump() == {'sub_model': {'v1': '-1-', 'v2': '-2-'}} +@pytest.mark.parametrize('env_prefix', [None, 'prefix_', 'prefix__']) +def test_nested_env_max_split(env, env_prefix): + class Person(BaseModel): + sex: Literal['M', 'F'] + first_name: str + date_of_birth: date + + class Cfg(BaseSettings): + caregiver: Person + significant_other: Optional[Person] = None + next_of_kin: Optional[Person] = None + + model_config = SettingsConfigDict(env_nested_delimiter='_', env_nested_max_split=1) + if env_prefix is not None: + model_config['env_prefix'] = env_prefix + + env_prefix = env_prefix or '' + env.set(env_prefix + 'caregiver_sex', 'M') + env.set(env_prefix + 'caregiver_first_name', 'Joe') + env.set(env_prefix + 'caregiver_date_of_birth', '1975-09-12') + env.set(env_prefix + 'significant_other_sex', 'F') + env.set(env_prefix + 'significant_other_first_name', 'Jill') + env.set(env_prefix + 'significant_other_date_of_birth', '1998-04-19') + env.set(env_prefix + 'next_of_kin_sex', 'M') + env.set(env_prefix + 'next_of_kin_first_name', 'Jack') + env.set(env_prefix + 'next_of_kin_date_of_birth', '1999-04-19') + + assert Cfg().model_dump() == { + 'caregiver': {'sex': 'M', 'first_name': 'Joe', 'date_of_birth': date(1975, 9, 12)}, + 'significant_other': {'sex': 'F', 'first_name': 'Jill', 'date_of_birth': date(1998, 4, 19)}, + 'next_of_kin': {'sex': 'M', 'first_name': 'Jack', 'date_of_birth': date(1999, 4, 19)}, + } + + class DateModel(BaseModel): pips: bool = False