From 9956f66994c50527bc5ac0da8701e720f77eae49 Mon Sep 17 00:00:00 2001 From: y0z Date: Wed, 14 Feb 2024 11:14:47 +0900 Subject: [PATCH 01/21] Migrate LightGBM integration. --- docs/source/reference/integration.rst | 12 - .../integration/_lightgbm_tuner/__init__.py | 19 - optuna/integration/_lightgbm_tuner/_train.py | 138 -- optuna/integration/_lightgbm_tuner/alias.py | 153 --- .../integration/_lightgbm_tuner/optimize.py | 1063 ---------------- optuna/integration/_lightgbm_tuner/sklearn.py | 46 - optuna/integration/lightgbm.py | 172 +-- pyproject.toml | 1 - .../lightgbm_tuner_tests/__init__.py | 0 .../lightgbm_tuner_tests/test_alias.py | 130 -- .../lightgbm_tuner_tests/test_optimize.py | 1105 ----------------- tests/integration_tests/test_integration.py | 6 - tests/integration_tests/test_lightgbm.py | 192 --- .../003_efficient_optimization_algorithms.py | 11 +- tutorial/10_key_features/005_visualization.py | 20 +- tutorial/20_recipes/008_specify_params.py | 15 +- 16 files changed, 44 insertions(+), 3039 deletions(-) delete mode 100644 optuna/integration/_lightgbm_tuner/__init__.py delete mode 100644 optuna/integration/_lightgbm_tuner/_train.py delete mode 100644 optuna/integration/_lightgbm_tuner/alias.py delete mode 100644 optuna/integration/_lightgbm_tuner/optimize.py delete mode 100644 optuna/integration/_lightgbm_tuner/sklearn.py delete mode 100644 tests/integration_tests/lightgbm_tuner_tests/__init__.py delete mode 100644 tests/integration_tests/lightgbm_tuner_tests/test_alias.py delete mode 100644 tests/integration_tests/lightgbm_tuner_tests/test_optimize.py delete mode 100644 tests/integration_tests/test_lightgbm.py diff --git a/docs/source/reference/integration.rst b/docs/source/reference/integration.rst index 5f63654c3d..b559f641b5 100644 --- a/docs/source/reference/integration.rst +++ b/docs/source/reference/integration.rst @@ -18,18 +18,6 @@ For most of the ML frameworks supported by Optuna, the corresponding Optuna inte For scikit-learn, an integrated :class:`~optuna.integration.OptunaSearchCV` estimator is available that combines scikit-learn BaseEstimator functionality with access to a class-level ``Study`` object. -LightGBM --------- - -.. autosummary:: - :toctree: generated/ - :nosignatures: - - optuna.integration.LightGBMPruningCallback - optuna.integration.lightgbm.train - optuna.integration.lightgbm.LightGBMTuner - optuna.integration.lightgbm.LightGBMTunerCV - MLflow ------ diff --git a/optuna/integration/_lightgbm_tuner/__init__.py b/optuna/integration/_lightgbm_tuner/__init__.py deleted file mode 100644 index a3467101d6..0000000000 --- a/optuna/integration/_lightgbm_tuner/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -from optuna.integration._lightgbm_tuner._train import train -from optuna.integration._lightgbm_tuner.optimize import _imports -from optuna.integration._lightgbm_tuner.optimize import LightGBMTuner -from optuna.integration._lightgbm_tuner.optimize import LightGBMTunerCV - - -if _imports.is_successful(): - from optuna.integration._lightgbm_tuner.sklearn import LGBMClassifier - from optuna.integration._lightgbm_tuner.sklearn import LGBMModel - from optuna.integration._lightgbm_tuner.sklearn import LGBMRegressor - -__all__ = [ - "LightGBMTuner", - "LightGBMTunerCV", - "LGBMClassifier", - "LGBMModel", - "LGBMRegressor", - "train", -] diff --git a/optuna/integration/_lightgbm_tuner/_train.py b/optuna/integration/_lightgbm_tuner/_train.py deleted file mode 100644 index 692f55b9ac..0000000000 --- a/optuna/integration/_lightgbm_tuner/_train.py +++ /dev/null @@ -1,138 +0,0 @@ -from __future__ import annotations - -from collections.abc import Callable -from typing import Any - -from optuna._imports import try_import -from optuna.integration._lightgbm_tuner.optimize import _imports -from optuna.integration._lightgbm_tuner.optimize import LightGBMTuner -from optuna.study import Study -from optuna.trial import FrozenTrial - - -with try_import(): - import lightgbm as lgb - - -def train( - params: dict[str, Any], - train_set: "lgb.Dataset", - num_boost_round: int = 1000, - valid_sets: list["lgb.Dataset"] | tuple["lgb.Dataset", ...] | "lgb.Dataset" | None = None, - valid_names: Any | None = None, - feval: Callable[..., Any] | None = None, - feature_name: str = "auto", - categorical_feature: str = "auto", - keep_training_booster: bool = False, - callbacks: list[Callable[..., Any]] | None = None, - time_budget: int | None = None, - sample_size: int | None = None, - study: Study | None = None, - optuna_callbacks: list[Callable[[Study, FrozenTrial], None]] | None = None, - model_dir: str | None = None, - verbosity: int | None = None, - show_progress_bar: bool = True, - *, - optuna_seed: int | None = None, -) -> "lgb.Booster": - """Wrapper of LightGBM Training API to tune hyperparameters. - - It optimizes the following hyperparameters in a stepwise manner: - ``lambda_l1``, ``lambda_l2``, ``num_leaves``, ``feature_fraction``, ``bagging_fraction``, - ``bagging_freq`` and ``min_child_samples``. - It is a drop-in replacement for `lightgbm.train()`_. See - `a simple example of LightGBM Tuner `_ which optimizes the validation log loss of cancer - detection. - - :func:`~optuna.integration.lightgbm.train` is a wrapper function of - :class:`~optuna.integration.lightgbm.LightGBMTuner`. To use feature in Optuna such as - suspended/resumed optimization and/or parallelization, refer to - :class:`~optuna.integration.lightgbm.LightGBMTuner` instead of this function. - - .. note:: - Arguments and keyword arguments for `lightgbm.train()`_ can be passed. - For ``params``, please check `the official documentation for LightGBM - `_. - - Args: - time_budget: - A time budget for parameter tuning in seconds. - - study: - A :class:`~optuna.study.Study` instance to store optimization results. The - :class:`~optuna.trial.Trial` instances in it has the following user attributes: - ``elapsed_secs`` is the elapsed time since the optimization starts. - ``average_iteration_time`` is the average time of iteration to train the booster - model in the trial. ``lgbm_params`` is a JSON-serialized dictionary of LightGBM - parameters used in the trial. - - optuna_callbacks: - List of Optuna callback functions that are invoked at the end of each trial. - Each function must accept two parameters with the following types in this order: - :class:`~optuna.study.Study` and :class:`~optuna.trial.FrozenTrial`. - Please note that this is not a ``callbacks`` argument of `lightgbm.train()`_ . - - model_dir: - A directory to save boosters. By default, it is set to :obj:`None` and no boosters are - saved. Please set shared directory (e.g., directories on NFS) if you want to access - :meth:`~optuna.integration.lightgbm.LightGBMTuner.get_best_booster` in distributed - environments. Otherwise, it may raise :obj:`ValueError`. If the directory does not - exist, it will be created. The filenames of the boosters will be - ``{model_dir}/{trial_number}.pkl`` (e.g., ``./boosters/0.pkl``). - - verbosity: - A verbosity level to change Optuna's logging level. The level is aligned to - `LightGBM's verbosity`_ . - - .. warning:: - Deprecated in v2.0.0. ``verbosity`` argument will be removed in the future. - The removal of this feature is currently scheduled for v4.0.0, - but this schedule is subject to change. - - Please use :func:`~optuna.logging.set_verbosity` instead. - - show_progress_bar: - Flag to show progress bars or not. To disable progress bar, set this :obj:`False`. - - .. note:: - Progress bars will be fragmented by logging messages of LightGBM and Optuna. - Please suppress such messages to show the progress bars properly. - - optuna_seed: - ``seed`` of :class:`~optuna.samplers.TPESampler` for random number generator - that affects sampling for ``num_leaves``, ``bagging_fraction``, ``bagging_freq``, - ``lambda_l1``, and ``lambda_l2``. - - .. note:: - The `deterministic`_ parameter of LightGBM makes training reproducible. - Please enable it when you use this argument. - - .. _lightgbm.train(): https://lightgbm.readthedocs.io/en/latest/pythonapi/lightgbm.train.html - .. _LightGBM's verbosity: https://lightgbm.readthedocs.io/en/latest/Parameters.html#verbosity - .. _deterministic: https://lightgbm.readthedocs.io/en/latest/Parameters.html#deterministic - """ - _imports.check() - - auto_booster = LightGBMTuner( - params=params, - train_set=train_set, - num_boost_round=num_boost_round, - valid_sets=valid_sets, - valid_names=valid_names, - feval=feval, - feature_name=feature_name, - categorical_feature=categorical_feature, - keep_training_booster=keep_training_booster, - callbacks=callbacks, - time_budget=time_budget, - sample_size=sample_size, - study=study, - optuna_callbacks=optuna_callbacks, - model_dir=model_dir, - verbosity=verbosity, - show_progress_bar=show_progress_bar, - optuna_seed=optuna_seed, - ) - auto_booster.run() - return auto_booster.get_best_booster() diff --git a/optuna/integration/_lightgbm_tuner/alias.py b/optuna/integration/_lightgbm_tuner/alias.py deleted file mode 100644 index 4937f4a8e2..0000000000 --- a/optuna/integration/_lightgbm_tuner/alias.py +++ /dev/null @@ -1,153 +0,0 @@ -from __future__ import annotations - -from collections.abc import Iterable -from typing import Any -import warnings - - -_ALIAS_GROUP_LIST: list[dict[str, Any]] = [ - {"param_name": "bagging_fraction", "alias_names": ["sub_row", "subsample", "bagging"]}, - {"param_name": "learning_rate", "alias_names": ["shrinkage_rate", "eta"]}, - { - "param_name": "min_child_samples", - "alias_names": ["min_data_per_leaf", "min_data", "min_data_in_leaf", "min_samples_leaf"], - }, - { - "param_name": "min_sum_hessian_in_leaf", - "alias_names": [ - "min_sum_hessian_per_leaf", - "min_sum_hessian", - "min_hessian", - "min_child_weight", - ], - }, - { - "param_name": "num_leaves", - "alias_names": [ - "num_leaf", - "max_leaves", - "max_leaf", - "max_leaf_nodes", - ], - }, - {"param_name": "bagging_freq", "alias_names": ["subsample_freq"]}, - {"param_name": "feature_fraction", "alias_names": ["sub_feature", "colsample_bytree"]}, - {"param_name": "lambda_l1", "alias_names": ["reg_alpha", "l1_regularization"]}, - {"param_name": "lambda_l2", "alias_names": ["reg_lambda", "lambda", "l2_regularization"]}, - {"param_name": "min_gain_to_split", "alias_names": ["min_split_gain"]}, -] - - -def _handling_alias_parameters(lgbm_params: dict[str, Any]) -> None: - """Handling alias parameters.""" - - for alias_group in _ALIAS_GROUP_LIST: - param_name = alias_group["param_name"] - alias_names = alias_group["alias_names"] - duplicated_alias: dict[str, Any] = {} - - for alias_name in alias_names: - if alias_name in lgbm_params: - duplicated_alias[alias_name] = lgbm_params[alias_name] - lgbm_params[param_name] = lgbm_params[alias_name] - del lgbm_params[alias_name] - - if len(duplicated_alias) > 1: - msg = ( - f"{param_name} in param detected multiple identical aliases {duplicated_alias}, " - f"but we use {param_name}={lgbm_params[param_name]}." - ) - warnings.warn(msg) - - -_ALIAS_METRIC_LIST: list[dict[str, Any]] = [ - # The list `alias_names` do not include the `metric_name` itself. - { - "metric_name": "ndcg", - "alias_names": [ - "lambdarank", - "rank_xendcg", - "xendcg", - "xe_ndcg", - "xe_ndcg_mart", - "xendcg_mart", - ], - }, - {"metric_name": "map", "alias_names": ["mean_average_precision"]}, - { - "metric_name": "l2", - "alias_names": ["regression", "regression_l2", "mean_squared_error", "mse"], - }, - { - "metric_name": "l1", - "alias_names": ["regression_l1", "mean_absolute_error", "mae"], - }, - { - "metric_name": "binary_logloss", - "alias_names": ["binary"], - }, - { - "metric_name": "multi_logloss", - "alias_names": [ - "multiclass", - "softmax", - "multiclassova", - "multiclass_ova", - "ova", - "ovr", - ], - }, - { - "metric_name": "cross_entropy", - "alias_names": ["xentropy"], - }, - { - "metric_name": "cross_entropy_lambda", - "alias_names": ["xentlambda"], - }, - { - "metric_name": "kullback_leibler", - "alias_names": ["kldiv"], - }, - { - "metric_name": "mape", - "alias_names": ["mean_absolute_percentage_error"], - }, - { - "metric_name": "custom", - "alias_names": ["none", "null", "na"], - }, - { - "metric_name": "rmse", - "alias_names": ["l2_root", "root_mean_squared_error"], - }, -] - -_ALIAS_METRIC_MAP: dict[str, str] = { - alias_name: canonical_metric["metric_name"] - for canonical_metric in _ALIAS_METRIC_LIST - for alias_name in canonical_metric["alias_names"] -} - - -def _handling_alias_metrics(lgbm_params: dict[str, Any]) -> None: - """Handling alias metrics.""" - if "metric" not in lgbm_params.keys(): - return - - if not isinstance(lgbm_params["metric"], (str, Iterable)): - raise ValueError( - "The `metric` parameter is expected to be a string or an iterable object, but got " - f"{type(lgbm_params['metric'])}." - ) - - if isinstance(lgbm_params["metric"], str): - lgbm_params["metric"] = ( - _ALIAS_METRIC_MAP.get(lgbm_params["metric"]) or lgbm_params["metric"] - ) - return - - canonical_metrics = [] - for metric in lgbm_params["metric"]: - canonical_metrics.append(_ALIAS_METRIC_MAP.get(metric) or metric) - lgbm_params["metric"] = canonical_metrics diff --git a/optuna/integration/_lightgbm_tuner/optimize.py b/optuna/integration/_lightgbm_tuner/optimize.py deleted file mode 100644 index fbe3448059..0000000000 --- a/optuna/integration/_lightgbm_tuner/optimize.py +++ /dev/null @@ -1,1063 +0,0 @@ -from __future__ import annotations - -import abc -from collections.abc import Callable -from collections.abc import Container -from collections.abc import Generator -from collections.abc import Iterable -from collections.abc import Iterator -from collections.abc import Sequence -import copy -import json -import os -import pickle -import time -from typing import Any -from typing import cast -import warnings - -import numpy as np -import tqdm - -import optuna -from optuna._imports import try_import -from optuna.integration._lightgbm_tuner.alias import _handling_alias_metrics -from optuna.integration._lightgbm_tuner.alias import _handling_alias_parameters -from optuna.study import Study -from optuna.trial import FrozenTrial -from optuna.trial import TrialState - - -with try_import() as _imports: - import lightgbm as lgb - from sklearn.model_selection import BaseCrossValidator - - -# Define key names of `Trial.system_attrs`. -_ELAPSED_SECS_KEY = "lightgbm_tuner:elapsed_secs" -_AVERAGE_ITERATION_TIME_KEY = "lightgbm_tuner:average_iteration_time" -_STEP_NAME_KEY = "lightgbm_tuner:step_name" -_LGBM_PARAMS_KEY = "lightgbm_tuner:lgbm_params" - -# EPS is used to ensure that a sampled parameter value is in pre-defined value range. -_EPS = 1e-12 - -# Default value of tree_depth, used for upper bound of num_leaves. -_DEFAULT_TUNER_TREE_DEPTH = 8 - -# Default parameter values described in the official webpage. -_DEFAULT_LIGHTGBM_PARAMETERS = { - "lambda_l1": 0.0, - "lambda_l2": 0.0, - "num_leaves": 31, - "feature_fraction": 1.0, - "bagging_fraction": 1.0, - "bagging_freq": 0, - "min_child_samples": 20, -} - -_logger = optuna.logging.get_logger(__name__) - - -class _BaseTuner: - def __init__( - self, - lgbm_params: dict[str, Any] | None = None, - lgbm_kwargs: dict[str, Any] | None = None, - ) -> None: - # Handling alias metrics. - if lgbm_params is not None: - _handling_alias_metrics(lgbm_params) - - self.lgbm_params = lgbm_params or {} - self.lgbm_kwargs = lgbm_kwargs or {} - - def _get_metric_for_objective(self) -> str: - metric = self.lgbm_params.get("metric", "binary_logloss") - - # todo (smly): This implementation is different logic from the LightGBM's python bindings. - if isinstance(metric, str): - pass - elif isinstance(metric, Sequence): - metric = metric[-1] - elif isinstance(metric, Iterable): - metric = list(metric)[-1] - else: - raise NotImplementedError - metric = self._metric_with_eval_at(metric) - - return metric - - def _get_booster_best_score(self, booster: "lgb.Booster") -> float: - metric = self._get_metric_for_objective() - valid_sets: list["lgb.Dataset"] | tuple[ - "lgb.Dataset", ... - ] | "lgb.Dataset" | None = self.lgbm_kwargs.get("valid_sets") - - if self.lgbm_kwargs.get("valid_names") is not None: - if isinstance(self.lgbm_kwargs["valid_names"], str): - valid_name = self.lgbm_kwargs["valid_names"] - elif isinstance(self.lgbm_kwargs["valid_names"], Sequence): - valid_name = self.lgbm_kwargs["valid_names"][-1] - else: - raise NotImplementedError - - elif isinstance(valid_sets, lgb.Dataset): - valid_name = "valid_0" - - elif isinstance(valid_sets, Sequence) and len(valid_sets) > 0: - valid_set_idx = len(valid_sets) - 1 - valid_name = f"valid_{valid_set_idx}" - - else: - raise NotImplementedError - - val_score = booster.best_score[valid_name][metric] - return val_score - - def _metric_with_eval_at(self, metric: str) -> str: - # The parameter eval_at is only available when the metric is ndcg or map - if metric not in ["ndcg", "map"]: - return metric - - eval_at = ( - self.lgbm_params.get("eval_at") - or self.lgbm_params.get(f"{metric}_at") - or self.lgbm_params.get(f"{metric}_eval_at") - # Set default value of LightGBM when no possible key is absent. - # See https://lightgbm.readthedocs.io/en/latest/Parameters.html#eval_at. - or [1, 2, 3, 4, 5] - ) - - # Optuna can handle only a single metric. Choose first one. - if isinstance(eval_at, (list, tuple)): - return f"{metric}@{eval_at[0]}" - if isinstance(eval_at, int): - return f"{metric}@{eval_at}" - raise ValueError( - f"The value of eval_at is expected to be int or a list/tuple of int. '{eval_at}' is " - "specified." - ) - - def higher_is_better(self) -> bool: - metric_name = self.lgbm_params.get("metric", "binary_logloss") - return metric_name in ("auc", "auc_mu", "ndcg", "map", "average_precision") - - def compare_validation_metrics(self, val_score: float, best_score: float) -> bool: - if self.higher_is_better(): - return val_score > best_score - else: - return val_score < best_score - - -class _OptunaObjective(_BaseTuner): - """Objective for hyperparameter-tuning with Optuna.""" - - def __init__( - self, - target_param_names: list[str], - lgbm_params: dict[str, Any], - train_set: "lgb.Dataset", - lgbm_kwargs: dict[str, Any], - best_score: float, - step_name: str, - model_dir: str | None, - pbar: tqdm.tqdm | None = None, - ): - self.target_param_names = target_param_names - self.pbar = pbar - self.lgbm_params = lgbm_params - self.lgbm_kwargs = lgbm_kwargs - self.train_set = train_set - - self.trial_count = 0 - self.best_score = best_score - self.best_booster_with_trial_number: tuple[ - "lgb.Booster" | "lgb.CVBooster", int - ] | None = None - self.step_name = step_name - self.model_dir = model_dir - - self._check_target_names_supported() - self.pbar_fmt = "{}, val_score: {:.6f}" - - def _check_target_names_supported(self) -> None: - for target_param_name in self.target_param_names: - if target_param_name in _DEFAULT_LIGHTGBM_PARAMETERS: - continue - raise NotImplementedError( - f"Parameter `{target_param_name}` is not supported for tuning." - ) - - def _preprocess(self, trial: optuna.trial.Trial) -> None: - if self.pbar is not None: - self.pbar.set_description(self.pbar_fmt.format(self.step_name, self.best_score)) - - if "lambda_l1" in self.target_param_names: - self.lgbm_params["lambda_l1"] = trial.suggest_float("lambda_l1", 1e-8, 10.0, log=True) - if "lambda_l2" in self.target_param_names: - self.lgbm_params["lambda_l2"] = trial.suggest_float("lambda_l2", 1e-8, 10.0, log=True) - if "num_leaves" in self.target_param_names: - tree_depth = self.lgbm_params.get("max_depth", _DEFAULT_TUNER_TREE_DEPTH) - max_num_leaves = 2**tree_depth if tree_depth > 0 else 2**_DEFAULT_TUNER_TREE_DEPTH - self.lgbm_params["num_leaves"] = trial.suggest_int("num_leaves", 2, max_num_leaves) - if "feature_fraction" in self.target_param_names: - # `GridSampler` is used for sampling feature_fraction value. - # The value 1.0 for the hyperparameter is always sampled. - param_value = min(trial.suggest_float("feature_fraction", 0.4, 1.0 + _EPS), 1.0) - self.lgbm_params["feature_fraction"] = param_value - if "bagging_fraction" in self.target_param_names: - # `TPESampler` is used for sampling bagging_fraction value. - # The value 1.0 for the hyperparameter might by sampled. - param_value = min(trial.suggest_float("bagging_fraction", 0.4, 1.0 + _EPS), 1.0) - self.lgbm_params["bagging_fraction"] = param_value - if "bagging_freq" in self.target_param_names: - self.lgbm_params["bagging_freq"] = trial.suggest_int("bagging_freq", 1, 7) - if "min_child_samples" in self.target_param_names: - # `GridSampler` is used for sampling min_child_samples value. - # The value 1.0 for the hyperparameter is always sampled. - param_value = trial.suggest_int("min_child_samples", 5, 100) - self.lgbm_params["min_child_samples"] = param_value - - def _copy_valid_sets( - self, valid_sets: list["lgb.Dataset"] | tuple["lgb.Dataset", ...] | "lgb.Dataset" - ) -> list["lgb.Dataset"] | tuple["lgb.Dataset", ...] | "lgb.Dataset": - if isinstance(valid_sets, list): - return [copy.copy(d) for d in valid_sets] - if isinstance(valid_sets, tuple): - return tuple([copy.copy(d) for d in valid_sets]) - return copy.copy(valid_sets) - - def __call__(self, trial: optuna.trial.Trial) -> float: - self._preprocess(trial) - - start_time = time.time() - train_set = copy.copy(self.train_set) - kwargs = copy.copy(self.lgbm_kwargs) - kwargs["valid_sets"] = self._copy_valid_sets(kwargs["valid_sets"]) - booster = lgb.train(self.lgbm_params, train_set, **kwargs) - - val_score = self._get_booster_best_score(booster) - elapsed_secs = time.time() - start_time - average_iteration_time = elapsed_secs / booster.current_iteration() - - if self.model_dir is not None: - path = os.path.join(self.model_dir, f"{trial.number}.pkl") - with open(path, "wb") as fout: - pickle.dump(booster, fout) - _logger.info(f"The booster of trial#{trial.number} was saved as {path}.") - - if self.compare_validation_metrics(val_score, self.best_score): - self.best_score = val_score - self.best_booster_with_trial_number = (booster, trial.number) - - self._postprocess(trial, elapsed_secs, average_iteration_time) - - return val_score - - def _postprocess( - self, trial: optuna.trial.Trial, elapsed_secs: float, average_iteration_time: float - ) -> None: - if self.pbar is not None: - self.pbar.set_description(self.pbar_fmt.format(self.step_name, self.best_score)) - self.pbar.update(1) - - trial.storage.set_trial_system_attr(trial._trial_id, _ELAPSED_SECS_KEY, elapsed_secs) - trial.storage.set_trial_system_attr( - trial._trial_id, _AVERAGE_ITERATION_TIME_KEY, average_iteration_time - ) - trial.storage.set_trial_system_attr(trial._trial_id, _STEP_NAME_KEY, self.step_name) - trial.storage.set_trial_system_attr( - trial._trial_id, _LGBM_PARAMS_KEY, json.dumps(self.lgbm_params) - ) - - self.trial_count += 1 - - -class _OptunaObjectiveCV(_OptunaObjective): - def __init__( - self, - target_param_names: list[str], - lgbm_params: dict[str, Any], - train_set: "lgb.Dataset", - lgbm_kwargs: dict[str, Any], - best_score: float, - step_name: str, - model_dir: str | None, - pbar: tqdm.tqdm | None = None, - ): - super().__init__( - target_param_names, - lgbm_params, - train_set, - lgbm_kwargs, - best_score, - step_name, - model_dir, - pbar=pbar, - ) - - def _get_cv_scores(self, cv_results: dict[str, list[float] | "lgb.CVBooster"]) -> list[float]: - metric = self._get_metric_for_objective() - metric_key = f"{metric}-mean" - # The prefix "valid " is added to metric name since LightGBM v4.0.0. - val_scores = ( - cv_results[metric_key] - if metric_key in cv_results - else cv_results["valid " + metric_key] - ) - assert not isinstance(val_scores, lgb.CVBooster) - return val_scores - - def __call__(self, trial: optuna.trial.Trial) -> float: - self._preprocess(trial) - - start_time = time.time() - train_set = copy.copy(self.train_set) - cv_results = lgb.cv(self.lgbm_params, train_set, **self.lgbm_kwargs) - - val_scores = self._get_cv_scores(cv_results) - val_score = val_scores[-1] - elapsed_secs = time.time() - start_time - average_iteration_time = elapsed_secs / len(val_scores) - - if self.model_dir is not None and self.lgbm_kwargs.get("return_cvbooster"): - path = os.path.join(self.model_dir, f"{trial.number}.pkl") - with open(path, "wb") as fout: - # At version `lightgbm==3.0.0`, :class:`lightgbm.CVBooster` does not - # have `__getstate__` which is required for pickle serialization. - cvbooster = cv_results["cvbooster"] - assert isinstance(cvbooster, lgb.CVBooster) - pickle.dump((cvbooster.boosters, cvbooster.best_iteration), fout) - _logger.info(f"The booster of trial#{trial.number} was saved as {path}.") - - if self.compare_validation_metrics(val_score, self.best_score): - self.best_score = val_score - if self.lgbm_kwargs.get("return_cvbooster"): - assert not isinstance(cv_results["cvbooster"], list) - self.best_booster_with_trial_number = (cv_results["cvbooster"], trial.number) - - self._postprocess(trial, elapsed_secs, average_iteration_time) - - return val_score - - -class _LightGBMBaseTuner(_BaseTuner): - """Base class of LightGBM Tuners. - - This class has common attributes and methods of - :class:`~optuna.integration.lightgbm.LightGBMTuner` and - :class:`~optuna.integration.lightgbm.LightGBMTunerCV`. - """ - - def __init__( - self, - params: dict[str, Any], - train_set: "lgb.Dataset", - callbacks: list[Callable[..., Any]] | None = None, - num_boost_round: int = 1000, - feval: Callable[..., Any] | None = None, - feature_name: str = "auto", - categorical_feature: str = "auto", - time_budget: int | None = None, - sample_size: int | None = None, - study: optuna.study.Study | None = None, - optuna_callbacks: list[Callable[[Study, FrozenTrial], None]] | None = None, - verbosity: int | None = None, - show_progress_bar: bool = True, - model_dir: str | None = None, - *, - optuna_seed: int | None = None, - ) -> None: - _imports.check() - - params = copy.deepcopy(params) - - # Handling alias metrics. - _handling_alias_metrics(params) - args = [params, train_set] - kwargs: dict[str, Any] = dict( - num_boost_round=num_boost_round, - feval=feval, - feature_name=feature_name, - categorical_feature=categorical_feature, - callbacks=callbacks, - time_budget=time_budget, - sample_size=sample_size, - verbosity=verbosity, - show_progress_bar=show_progress_bar, - ) - self._parse_args(*args, **kwargs) - self._start_time: float | None = None - self._optuna_callbacks = optuna_callbacks - self._best_booster_with_trial_number: tuple[lgb.Booster | lgb.CVBooster, int] | None = None - self._model_dir = model_dir - self._optuna_seed = optuna_seed - - # Should not alter data since `min_child_samples` is tuned. - # https://lightgbm.readthedocs.io/en/latest/Parameters.html#feature_pre_filter - if self.lgbm_params.get("feature_pre_filter", False): - warnings.warn( - "feature_pre_filter is given as True but will be set to False. This is required " - "for the tuner to tune min_child_samples." - ) - self.lgbm_params["feature_pre_filter"] = False - - if study is None: - self.study = optuna.create_study( - direction="maximize" if self.higher_is_better() else "minimize" - ) - else: - self.study = study - - if self.higher_is_better(): - if self.study.direction != optuna.study.StudyDirection.MAXIMIZE: - metric_name = self.lgbm_params.get("metric", "binary_logloss") - raise ValueError( - f"Study direction is inconsistent with the metric {metric_name}. " - "Please set 'maximize' as the direction." - ) - else: - if self.study.direction != optuna.study.StudyDirection.MINIMIZE: - metric_name = self.lgbm_params.get("metric", "binary_logloss") - raise ValueError( - f"Study direction is inconsistent with the metric {metric_name}. " - "Please set 'minimize' as the direction." - ) - - if verbosity is not None: - warnings.warn( - "`verbosity` argument is deprecated and will be removed in the future. " - "The removal of this feature is currently scheduled for v4.0.0, " - "but this schedule is subject to change. Please use optuna.logging.set_verbosity()" - " instead.", - FutureWarning, - ) - - if self._model_dir is not None and not os.path.exists(self._model_dir): - os.mkdir(self._model_dir) - - @property - def best_score(self) -> float: - """Return the score of the best booster.""" - try: - return self.study.best_value - except ValueError: - # Return the default score because no trials have completed. - return -np.inf if self.higher_is_better() else np.inf - - @property - def best_params(self) -> dict[str, Any]: - """Return parameters of the best booster.""" - try: - return json.loads(self.study.best_trial.system_attrs[_LGBM_PARAMS_KEY]) - except ValueError: - # Return the default score because no trials have completed. - params = copy.deepcopy(_DEFAULT_LIGHTGBM_PARAMETERS) - # self.lgbm_params may contain parameters given by users. - params.update(self.lgbm_params) - return params - - def _parse_args(self, *args: Any, **kwargs: Any) -> None: - self.auto_options = { - option_name: kwargs.get(option_name) - for option_name in ["time_budget", "sample_size", "verbosity", "show_progress_bar"] - } - - # Split options. - for option_name in self.auto_options.keys(): - if option_name in kwargs: - del kwargs[option_name] - - self.lgbm_params = args[0] - self.train_set = args[1] - self.train_subset = None # Use for sampling. - self.lgbm_kwargs = kwargs - - def run(self) -> None: - """Perform the hyperparameter-tuning with given parameters.""" - verbosity = self.auto_options["verbosity"] - if verbosity is not None: - if verbosity > 1: - optuna.logging.set_verbosity(optuna.logging.DEBUG) - elif verbosity == 1: - optuna.logging.set_verbosity(optuna.logging.INFO) - elif verbosity == 0: - optuna.logging.set_verbosity(optuna.logging.WARNING) - else: - optuna.logging.set_verbosity(optuna.logging.CRITICAL) - - # Handling aliases. - _handling_alias_parameters(self.lgbm_params) - - # Sampling. - self.sample_train_set() - - self.tune_feature_fraction() - self.tune_num_leaves() - self.tune_bagging() - self.tune_feature_fraction_stage2() - self.tune_regularization_factors() - self.tune_min_data_in_leaf() - - def sample_train_set(self) -> None: - """Make subset of `self.train_set` Dataset object.""" - - if self.auto_options["sample_size"] is None: - return - - self.train_set.construct() - n_train_instance = self.train_set.get_label().shape[0] - if n_train_instance > self.auto_options["sample_size"]: - offset = n_train_instance - self.auto_options["sample_size"] - idx_list = offset + np.arange(self.auto_options["sample_size"]) - self.train_subset = self.train_set.subset(idx_list) - - def tune_feature_fraction(self, n_trials: int = 7) -> None: - param_name = "feature_fraction" - param_values = np.linspace(0.4, 1.0, n_trials).tolist() - - sampler = optuna.samplers.GridSampler({param_name: param_values}, seed=self._optuna_seed) - self._tune_params([param_name], len(param_values), sampler, "feature_fraction") - - def tune_num_leaves(self, n_trials: int = 20) -> None: - self._tune_params( - ["num_leaves"], - n_trials, - optuna.samplers.TPESampler(seed=self._optuna_seed), - "num_leaves", - ) - - def tune_bagging(self, n_trials: int = 10) -> None: - self._tune_params( - ["bagging_fraction", "bagging_freq"], - n_trials, - optuna.samplers.TPESampler(seed=self._optuna_seed), - "bagging", - ) - - def tune_feature_fraction_stage2(self, n_trials: int = 6) -> None: - param_name = "feature_fraction" - best_feature_fraction = self.best_params[param_name] - param_values = np.linspace( - best_feature_fraction - 0.08, best_feature_fraction + 0.08, n_trials - ).tolist() - param_values = [val for val in param_values if val >= 0.4 and val <= 1.0] - - sampler = optuna.samplers.GridSampler({param_name: param_values}, seed=self._optuna_seed) - self._tune_params([param_name], len(param_values), sampler, "feature_fraction_stage2") - - def tune_regularization_factors(self, n_trials: int = 20) -> None: - self._tune_params( - ["lambda_l1", "lambda_l2"], - n_trials, - optuna.samplers.TPESampler(seed=self._optuna_seed), - "regularization_factors", - ) - - def tune_min_data_in_leaf(self) -> None: - param_name = "min_child_samples" - param_values = [5, 10, 25, 50, 100] - - sampler = optuna.samplers.GridSampler({param_name: param_values}, seed=self._optuna_seed) - self._tune_params([param_name], len(param_values), sampler, "min_child_samples") - - def _tune_params( - self, - target_param_names: list[str], - n_trials: int, - sampler: optuna.samplers.BaseSampler, - step_name: str, - ) -> _OptunaObjective: - pbar = ( - tqdm.tqdm(total=n_trials, ascii=True) - if self.auto_options["show_progress_bar"] - else None - ) - - # Set current best parameters. - self.lgbm_params.update(self.best_params) - - train_set = self.train_set - if self.train_subset is not None: - train_set = self.train_subset - - objective = self._create_objective(target_param_names, train_set, step_name, pbar) - - study = self._create_stepwise_study(self.study, step_name) - study.sampler = sampler - - complete_trials = study.get_trials( - deepcopy=True, - states=(optuna.trial.TrialState.COMPLETE, optuna.trial.TrialState.PRUNED), - ) - _n_trials = n_trials - len(complete_trials) - - if self._start_time is None: - self._start_time = time.time() - - if self.auto_options["time_budget"] is not None: - _timeout = self.auto_options["time_budget"] - (time.time() - self._start_time) - else: - _timeout = None - if _n_trials > 0: - study.optimize( - objective, - n_trials=_n_trials, - timeout=_timeout, - catch=(), - callbacks=self._optuna_callbacks, - ) - - if pbar: - pbar.close() - del pbar - - if objective.best_booster_with_trial_number is not None: - self._best_booster_with_trial_number = objective.best_booster_with_trial_number - - return objective - - @abc.abstractmethod - def _create_objective( - self, - target_param_names: list[str], - train_set: "lgb.Dataset", - step_name: str, - pbar: tqdm.tqdm | None, - ) -> _OptunaObjective: - raise NotImplementedError - - def _create_stepwise_study( - self, study: optuna.study.Study, step_name: str - ) -> optuna.study.Study: - # This class is assumed to be passed to a sampler and a pruner corresponding to the step. - class _StepwiseStudy(optuna.study.Study): - def __init__(self, study: optuna.study.Study, step_name: str) -> None: - super().__init__( - study_name=study.study_name, - storage=study._storage, - sampler=study.sampler, - pruner=study.pruner, - ) - self._step_name = step_name - - def get_trials( - self, - deepcopy: bool = True, - states: Container[TrialState] | None = None, - ) -> list[optuna.trial.FrozenTrial]: - trials = super()._get_trials(deepcopy=deepcopy, states=states) - return [t for t in trials if t.system_attrs.get(_STEP_NAME_KEY) == self._step_name] - - @property - def best_trial(self) -> optuna.trial.FrozenTrial: - """Return the best trial in the study. - - Returns: - A :class:`~optuna.trial.FrozenTrial` object of the best trial. - """ - - trials = self.get_trials(deepcopy=False) - trials = [t for t in trials if t.state is optuna.trial.TrialState.COMPLETE] - - if len(trials) == 0: - raise ValueError("No trials are completed yet.") - - if self.direction == optuna.study.StudyDirection.MINIMIZE: - best_trial = min(trials, key=lambda t: cast(float, t.value)) - else: - best_trial = max(trials, key=lambda t: cast(float, t.value)) - return copy.deepcopy(best_trial) - - return _StepwiseStudy(study, step_name) - - -class LightGBMTuner(_LightGBMBaseTuner): - """Hyperparameter tuner for LightGBM. - - It optimizes the following hyperparameters in a stepwise manner: - ``lambda_l1``, ``lambda_l2``, ``num_leaves``, ``feature_fraction``, ``bagging_fraction``, - ``bagging_freq`` and ``min_child_samples``. - - You can find the details of the algorithm and benchmark results in `this blog article `_ by `Kohei Ozaki `_, a Kaggle Grandmaster. - - .. note:: - Arguments and keyword arguments for `lightgbm.train() - `_ can be passed. - For ``params``, please check `the official documentation for LightGBM - `_. - - The arguments that only :class:`~optuna.integration.lightgbm.LightGBMTuner` has are - listed below: - - Args: - time_budget: - A time budget for parameter tuning in seconds. - - study: - A :class:`~optuna.study.Study` instance to store optimization results. The - :class:`~optuna.trial.Trial` instances in it has the following user attributes: - ``elapsed_secs`` is the elapsed time since the optimization starts. - ``average_iteration_time`` is the average time of iteration to train the booster - model in the trial. ``lgbm_params`` is a JSON-serialized dictionary of LightGBM - parameters used in the trial. - - optuna_callbacks: - List of Optuna callback functions that are invoked at the end of each trial. - Each function must accept two parameters with the following types in this order: - :class:`~optuna.study.Study` and :class:`~optuna.trial.FrozenTrial`. - Please note that this is not a ``callbacks`` argument of `lightgbm.train()`_ . - - model_dir: - A directory to save boosters. By default, it is set to :obj:`None` and no boosters are - saved. Please set shared directory (e.g., directories on NFS) if you want to access - :meth:`~optuna.integration.lightgbm.LightGBMTuner.get_best_booster` in distributed - environments. Otherwise, it may raise :obj:`ValueError`. If the directory does not - exist, it will be created. The filenames of the boosters will be - ``{model_dir}/{trial_number}.pkl`` (e.g., ``./boosters/0.pkl``). - - verbosity: - A verbosity level to change Optuna's logging level. The level is aligned to - `LightGBM's verbosity`_ . - - .. warning:: - Deprecated in v2.0.0. ``verbosity`` argument will be removed in the future. - The removal of this feature is currently scheduled for v4.0.0, - but this schedule is subject to change. - - Please use :func:`~optuna.logging.set_verbosity` instead. - - show_progress_bar: - Flag to show progress bars or not. To disable progress bar, set this :obj:`False`. - - .. note:: - Progress bars will be fragmented by logging messages of LightGBM and Optuna. - Please suppress such messages to show the progress bars properly. - - optuna_seed: - ``seed`` of :class:`~optuna.samplers.TPESampler` for random number generator - that affects sampling for ``num_leaves``, ``bagging_fraction``, ``bagging_freq``, - ``lambda_l1``, and ``lambda_l2``. - - .. note:: - The `deterministic`_ parameter of LightGBM makes training reproducible. - Please enable it when you use this argument. - - .. _lightgbm.train(): https://lightgbm.readthedocs.io/en/latest/pythonapi/lightgbm.train.html - .. _LightGBM's verbosity: https://lightgbm.readthedocs.io/en/latest/Parameters.html#verbosity - .. _deterministic: https://lightgbm.readthedocs.io/en/latest/Parameters.html#deterministic - """ - - def __init__( - self, - params: dict[str, Any], - train_set: "lgb.Dataset", - num_boost_round: int = 1000, - valid_sets: list["lgb.Dataset"] | tuple["lgb.Dataset", ...] | "lgb.Dataset" | None = None, - valid_names: Any | None = None, - feval: Callable[..., Any] | None = None, - feature_name: str = "auto", - categorical_feature: str = "auto", - keep_training_booster: bool = False, - callbacks: list[Callable[..., Any]] | None = None, - time_budget: int | None = None, - sample_size: int | None = None, - study: optuna.study.Study | None = None, - optuna_callbacks: list[Callable[[Study, FrozenTrial], None]] | None = None, - model_dir: str | None = None, - verbosity: int | None = None, - show_progress_bar: bool = True, - *, - optuna_seed: int | None = None, - ) -> None: - super().__init__( - params, - train_set, - callbacks=callbacks, - num_boost_round=num_boost_round, - feval=feval, - feature_name=feature_name, - categorical_feature=categorical_feature, - time_budget=time_budget, - sample_size=sample_size, - study=study, - optuna_callbacks=optuna_callbacks, - verbosity=verbosity, - show_progress_bar=show_progress_bar, - model_dir=model_dir, - optuna_seed=optuna_seed, - ) - - self.lgbm_kwargs["valid_sets"] = valid_sets - self.lgbm_kwargs["valid_names"] = valid_names - self.lgbm_kwargs["keep_training_booster"] = keep_training_booster - - self._best_booster_with_trial_number: tuple[lgb.Booster, int] | None = None - - if valid_sets is None: - raise ValueError("`valid_sets` is required.") - - def _create_objective( - self, - target_param_names: list[str], - train_set: "lgb.Dataset", - step_name: str, - pbar: tqdm.tqdm | None, - ) -> _OptunaObjective: - return _OptunaObjective( - target_param_names, - self.lgbm_params, - train_set, - self.lgbm_kwargs, - self.best_score, - step_name=step_name, - model_dir=self._model_dir, - pbar=pbar, - ) - - def get_best_booster(self) -> "lgb.Booster": - """Return the best booster. - - If the best booster cannot be found, :class:`ValueError` will be raised. To prevent the - errors, please save boosters by specifying the ``model_dir`` argument of - :meth:`~optuna.integration.lightgbm.LightGBMTuner.__init__`, - when you resume tuning or you run tuning in parallel. - """ - if self._best_booster_with_trial_number is not None: - if self._best_booster_with_trial_number[1] == self.study.best_trial.number: - return self._best_booster_with_trial_number[0] - if len(self.study.trials) == 0: - raise ValueError("The best booster is not available because no trials completed.") - - # The best booster exists, but this instance does not have it. - # This may be due to resuming or parallelization. - if self._model_dir is None: - raise ValueError( - "The best booster cannot be found. It may be found in the other processes due to " - "resuming or distributed computing. Please set the `model_dir` argument of " - "`LightGBMTuner.__init__` and make sure that boosters are shared with all " - "processes." - ) - - best_trial = self.study.best_trial - path = os.path.join(self._model_dir, f"{best_trial.number}.pkl") - if not os.path.exists(path): - raise ValueError( - f"The best booster cannot be found in {self._model_dir}. If you execute " - "`LightGBMTuner` in distributed environment, please use network file system " - "(e.g., NFS) to share models with multiple workers." - ) - - with open(path, "rb") as fin: - booster = pickle.load(fin) - - return booster - - -class LightGBMTunerCV(_LightGBMBaseTuner): - """Hyperparameter tuner for LightGBM with cross-validation. - - It employs the same stepwise approach as - :class:`~optuna.integration.lightgbm.LightGBMTuner`. - :class:`~optuna.integration.lightgbm.LightGBMTunerCV` invokes `lightgbm.cv()`_ to train - and validate boosters while :class:`~optuna.integration.lightgbm.LightGBMTuner` invokes - `lightgbm.train()`_. See - `a simple example `_ which optimizes the validation log loss of cancer detection. - - .. note:: - Arguments and keyword arguments for `lightgbm.cv()`_ can be passed except - ``metrics``, ``init_model`` and ``eval_train_metric``. - For ``params``, please check `the official documentation for LightGBM - `_. - - The arguments that only :class:`~optuna.integration.lightgbm.LightGBMTunerCV` has are - listed below: - - Args: - time_budget: - A time budget for parameter tuning in seconds. - - study: - A :class:`~optuna.study.Study` instance to store optimization results. The - :class:`~optuna.trial.Trial` instances in it has the following user attributes: - ``elapsed_secs`` is the elapsed time since the optimization starts. - ``average_iteration_time`` is the average time of iteration to train the booster - model in the trial. ``lgbm_params`` is a JSON-serialized dictionary of LightGBM - parameters used in the trial. - - optuna_callbacks: - List of Optuna callback functions that are invoked at the end of each trial. - Each function must accept two parameters with the following types in this order: - :class:`~optuna.study.Study` and :class:`~optuna.trial.FrozenTrial`. - Please note that this is not a ``callbacks`` argument of `lightgbm.train()`_ . - - model_dir: - A directory to save boosters. By default, it is set to :obj:`None` and no boosters are - saved. Please set shared directory (e.g., directories on NFS) if you want to access - :meth:`~optuna.integration.lightgbm.LightGBMTunerCV.get_best_booster` - in distributed environments. - Otherwise, it may raise :obj:`ValueError`. If the directory does not exist, it will be - created. The filenames of the boosters will be ``{model_dir}/{trial_number}.pkl`` - (e.g., ``./boosters/0.pkl``). - - verbosity: - A verbosity level to change Optuna's logging level. The level is aligned to - `LightGBM's verbosity`_ . - - .. warning:: - Deprecated in v2.0.0. ``verbosity`` argument will be removed in the future. - The removal of this feature is currently scheduled for v4.0.0, - but this schedule is subject to change. - - Please use :func:`~optuna.logging.set_verbosity` instead. - - show_progress_bar: - Flag to show progress bars or not. To disable progress bar, set this :obj:`False`. - - .. note:: - Progress bars will be fragmented by logging messages of LightGBM and Optuna. - Please suppress such messages to show the progress bars properly. - - return_cvbooster: - Flag to enable :meth:`~optuna.integration.lightgbm.LightGBMTunerCV.get_best_booster`. - - optuna_seed: - ``seed`` of :class:`~optuna.samplers.TPESampler` for random number generator - that affects sampling for ``num_leaves``, ``bagging_fraction``, ``bagging_freq``, - ``lambda_l1``, and ``lambda_l2``. - - .. note:: - The `deterministic`_ parameter of LightGBM makes training reproducible. - Please enable it when you use this argument. - - .. _lightgbm.train(): https://lightgbm.readthedocs.io/en/latest/pythonapi/lightgbm.train.html - .. _lightgbm.cv(): https://lightgbm.readthedocs.io/en/latest/pythonapi/lightgbm.cv.html - .. _LightGBM's verbosity: https://lightgbm.readthedocs.io/en/latest/Parameters.html#verbosity - .. _deterministic: https://lightgbm.readthedocs.io/en/latest/Parameters.html#deterministic - """ - - def __init__( - self, - params: dict[str, Any], - train_set: "lgb.Dataset", - num_boost_round: int = 1000, - folds: Generator[tuple[int, int], None, None] - | Iterator[tuple[int, int]] - | "BaseCrossValidator" - | None = None, - nfold: int = 5, - stratified: bool = True, - shuffle: bool = True, - feval: Callable[..., Any] | None = None, - feature_name: str = "auto", - categorical_feature: str = "auto", - fpreproc: Callable[..., Any] | None = None, - seed: int = 0, - callbacks: list[Callable[..., Any]] | None = None, - time_budget: int | None = None, - sample_size: int | None = None, - study: optuna.study.Study | None = None, - optuna_callbacks: list[Callable[[Study, FrozenTrial], None]] | None = None, - verbosity: int | None = None, - show_progress_bar: bool = True, - model_dir: str | None = None, - return_cvbooster: bool = False, - *, - optuna_seed: int | None = None, - ) -> None: - super().__init__( - params, - train_set, - callbacks=callbacks, - num_boost_round=num_boost_round, - feval=feval, - feature_name=feature_name, - categorical_feature=categorical_feature, - time_budget=time_budget, - sample_size=sample_size, - study=study, - optuna_callbacks=optuna_callbacks, - verbosity=verbosity, - show_progress_bar=show_progress_bar, - model_dir=model_dir, - optuna_seed=optuna_seed, - ) - - self.lgbm_kwargs["folds"] = folds - self.lgbm_kwargs["nfold"] = nfold - self.lgbm_kwargs["stratified"] = stratified - self.lgbm_kwargs["shuffle"] = shuffle - self.lgbm_kwargs["seed"] = seed - self.lgbm_kwargs["fpreproc"] = fpreproc - self.lgbm_kwargs["return_cvbooster"] = return_cvbooster - - def _create_objective( - self, - target_param_names: list[str], - train_set: "lgb.Dataset", - step_name: str, - pbar: tqdm.tqdm | None, - ) -> _OptunaObjective: - return _OptunaObjectiveCV( - target_param_names, - self.lgbm_params, - train_set, - self.lgbm_kwargs, - self.best_score, - step_name=step_name, - model_dir=self._model_dir, - pbar=pbar, - ) - - def get_best_booster(self) -> "lgb.CVBooster": - """Return the best cvbooster. - - If the best booster cannot be found, :class:`ValueError` will be raised. - To prevent the errors, please save boosters by specifying - both of the ``model_dir`` and the ``return_cvbooster`` arguments of - :meth:`~optuna.integration.lightgbm.LightGBMTunerCV.__init__`, - when you resume tuning or you run tuning in parallel. - """ - if self.lgbm_kwargs.get("return_cvbooster") is not True: - raise ValueError( - "LightGBMTunerCV requires `return_cvbooster=True` for method `get_best_booster()`." - ) - if self._best_booster_with_trial_number is not None: - if self._best_booster_with_trial_number[1] == self.study.best_trial.number: - assert isinstance(self._best_booster_with_trial_number[0], lgb.CVBooster) - return self._best_booster_with_trial_number[0] - if len(self.study.trials) == 0: - raise ValueError("The best booster is not available because no trials completed.") - - # The best booster exists, but this instance does not have it. - # This may be due to resuming or parallelization. - if self._model_dir is None: - raise ValueError( - "The best booster cannot be found. It may be found in the other processes due to " - "resuming or distributed computing. Please set the `model_dir` argument of " - "`LightGBMTunerCV.__init__` and make sure that boosters are shared with all " - "processes." - ) - - best_trial = self.study.best_trial - path = os.path.join(self._model_dir, f"{best_trial.number}.pkl") - if not os.path.exists(path): - raise ValueError( - f"The best booster cannot be found in {self._model_dir}. If you execute " - "`LightGBMTunerCV` in distributed environment, please use network file system " - "(e.g., NFS) to share models with multiple workers." - ) - - with open(path, "rb") as fin: - boosters, best_iteration = pickle.load(fin) - # At version `lightgbm==3.0.0`, :class:`lightgbm.CVBooster` does not - # have `__getstate__` which is required for pickle serialization. - cvbooster = lgb.CVBooster() - cvbooster.boosters = boosters - cvbooster.best_iteration = best_iteration - - return cvbooster diff --git a/optuna/integration/_lightgbm_tuner/sklearn.py b/optuna/integration/_lightgbm_tuner/sklearn.py deleted file mode 100644 index 93336d2648..0000000000 --- a/optuna/integration/_lightgbm_tuner/sklearn.py +++ /dev/null @@ -1,46 +0,0 @@ -from typing import Any -import warnings - -import lightgbm as lgb - - -class LGBMModel(lgb.LGBMModel): - """Proxy of lightgbm.LGBMModel. - - See: `pydoc lightgbm.LGBMModel` - """ - - def __init__(self, *args: Any, **kwargs: Any) -> None: - warnings.warn( - "LightGBMTuner doesn't support sklearn API. " - "Use `train()` or `LightGBMTuner` for hyperparameter tuning." - ) - super().__init__(*args, **kwargs) - - -class LGBMClassifier(lgb.LGBMClassifier): - """Proxy of lightgbm.LGBMClassifier. - - See: `pydoc lightgbm.LGBMClassifier` - """ - - def __init__(self, *args: Any, **kwargs: Any) -> None: - warnings.warn( - "LightGBMTuner doesn't support sklearn API. " - "Use `train()` or `LightGBMTuner` for hyperparameter tuning." - ) - super().__init__(*args, **kwargs) - - -class LGBMRegressor(lgb.LGBMRegressor): - """Proxy of LGBMRegressor. - - See: `pydoc lightgbm.LGBMRegressor` - """ - - def __init__(self, *args: Any, **kwargs: Any) -> None: - warnings.warn( - "LightGBMTuner doesn't support sklearn API. " - "Use `train()` or `LightGBMTuner` for hyperparameter tuning." - ) - super().__init__(*args, **kwargs) diff --git a/optuna/integration/lightgbm.py b/optuna/integration/lightgbm.py index b9fd0e0cdb..24c590e89c 100644 --- a/optuna/integration/lightgbm.py +++ b/optuna/integration/lightgbm.py @@ -1,166 +1,10 @@ -from __future__ import annotations +from optuna_integration.lightgbm import LightGBMPruningCallback +from optuna_integration.lightgbm import LightGBMTuner +from optuna_integration.lightgbm import LightGBMTunerCV -import sys -from typing import TYPE_CHECKING -import optuna -from optuna._imports import try_import -from optuna.integration import _lightgbm_tuner as tuner - - -if TYPE_CHECKING: - from lightgbm.basic import _LGBM_BoosterEvalMethodResultType - from lightgbm.basic import _LGBM_BoosterEvalMethodResultWithStandardDeviationType - from lightgbm.callback import CallbackEnv - - -with try_import() as _imports: - import lightgbm as lgb - -# Attach lightgbm API. -if _imports.is_successful(): - # To pass tests/integration_tests/lightgbm_tuner_tests/test_optimize.py. - from lightgbm import Dataset - - from optuna.integration._lightgbm_tuner import LightGBMTuner - from optuna.integration._lightgbm_tuner import LightGBMTunerCV - - _names_from_tuners = ["train", "LGBMModel", "LGBMClassifier", "LGBMRegressor"] - - # API from lightgbm. - for api_name in lgb.__dict__["__all__"]: - if api_name in _names_from_tuners: - continue - setattr(sys.modules[__name__], api_name, lgb.__dict__[api_name]) - - # API from lightgbm_tuner. - for api_name in _names_from_tuners: - setattr(sys.modules[__name__], api_name, tuner.__dict__[api_name]) -else: - # To create docstring of train. - setattr(sys.modules[__name__], "train", tuner.__dict__["train"]) - setattr(sys.modules[__name__], "LightGBMTuner", tuner.__dict__["LightGBMTuner"]) - setattr(sys.modules[__name__], "LightGBMTunerCV", tuner.__dict__["LightGBMTunerCV"]) - -__all__ = ["Dataset", "LightGBMTuner", "LightGBMTunerCV"] - - -class LightGBMPruningCallback: - """Callback for LightGBM to prune unpromising trials. - - See `the example `__ - if you want to add a pruning callback which observes accuracy - of a LightGBM model. - - Args: - trial: - A :class:`~optuna.trial.Trial` corresponding to the current evaluation of - the objective function. - metric: - An evaluation metric for pruning, e.g., ``binary_error`` and ``multi_error``. - Please refer to - `LightGBM reference - `_ - for further details. - valid_name: - The name of the target validation. - Validation names are specified by ``valid_names`` option of - `train method - `_. - If omitted, ``valid_0`` is used which is the default name of the first validation. - Note that this argument will be ignored if you are calling - `cv method `_ - instead of train method. - report_interval: - Check if the trial should report intermediate values for pruning every n-th boosting - iteration. By default ``report_interval=1`` and reporting is performed after every - iteration. Note that the pruning itself is performed according to the interval - definition of the pruner. - """ - - def __init__( - self, - trial: optuna.trial.Trial, - metric: str, - valid_name: str = "valid_0", - report_interval: int = 1, - ) -> None: - _imports.check() - - self._trial = trial - self._valid_name = valid_name - self._metric = metric - self._report_interval = report_interval - - def _find_evaluation_result( - self, target_valid_name: str, env: CallbackEnv - ) -> ( - _LGBM_BoosterEvalMethodResultType - | _LGBM_BoosterEvalMethodResultWithStandardDeviationType - | None - ): - evaluation_result_list = env.evaluation_result_list - if evaluation_result_list is None: - return None - - for evaluation_result in evaluation_result_list: - valid_name, metric, current_score, is_higher_better = evaluation_result[:4] - # The prefix "valid " is added to metric name since LightGBM v4.0.0. - if valid_name != target_valid_name or ( - metric != "valid " + self._metric and metric != self._metric - ): - continue - return evaluation_result - - return None - - def __call__(self, env: CallbackEnv) -> None: - if (env.iteration + 1) % self._report_interval == 0: - # If this callback has been passed to `lightgbm.cv` function, - # the value of `is_cv` becomes `True`. See also: - # /~https://github.com/microsoft/LightGBM/blob/v4.1.0/python-package/lightgbm/engine.py#L533 - # Note that `5` is not the number of folds but the length of sequence. - evaluation_result_list = env.evaluation_result_list - is_cv = ( - evaluation_result_list is not None - and len(evaluation_result_list) > 0 - and len(evaluation_result_list[0]) == 5 - ) - if is_cv: - target_valid_name = "cv_agg" - else: - target_valid_name = self._valid_name - - evaluation_result = self._find_evaluation_result(target_valid_name, env) - - if evaluation_result is None: - raise ValueError( - 'The entry associated with the validation name "{}" and the metric name "{}" ' - "is not found in the evaluation result list {}.".format( - target_valid_name, self._metric, str(env.evaluation_result_list) - ) - ) - - valid_name, metric, current_score, is_higher_better = evaluation_result[:4] - - if is_higher_better: - if self._trial.study.direction != optuna.study.StudyDirection.MAXIMIZE: - raise ValueError( - "The intermediate values are inconsistent with the objective values" - "in terms of study directions. Please specify a metric to be minimized" - "for LightGBMPruningCallback." - ) - else: - if self._trial.study.direction != optuna.study.StudyDirection.MINIMIZE: - raise ValueError( - "The intermediate values are inconsistent with the objective values" - "in terms of study directions. Please specify a metric to be" - "maximized for LightGBMPruningCallback." - ) - - self._trial.report(current_score, step=env.iteration) - - if self._trial.should_prune(): - message = "Trial was pruned at iteration {}.".format(env.iteration) - raise optuna.TrialPruned(message) +__all__ = [ + "LightGBMPruningCallback", + "LightGBMTuner", + "LightGBMTunerCV", +] diff --git a/pyproject.toml b/pyproject.toml index ebdc285b23..4ba5e36296 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -84,7 +84,6 @@ document = [ "torchvision", ] integration = [ - "lightgbm", "lightning", "mlflow", "pytorch-ignite", diff --git a/tests/integration_tests/lightgbm_tuner_tests/__init__.py b/tests/integration_tests/lightgbm_tuner_tests/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/integration_tests/lightgbm_tuner_tests/test_alias.py b/tests/integration_tests/lightgbm_tuner_tests/test_alias.py deleted file mode 100644 index 01c58e885d..0000000000 --- a/tests/integration_tests/lightgbm_tuner_tests/test_alias.py +++ /dev/null @@ -1,130 +0,0 @@ -from typing import List - -import pytest - -from optuna.integration._lightgbm_tuner.alias import _handling_alias_metrics -from optuna.integration._lightgbm_tuner.alias import _handling_alias_parameters - - -pytestmark = pytest.mark.integration - - -def test__handling_alias_parameters() -> None: - params = {"reg_alpha": 0.1} - _handling_alias_parameters(params) - assert "reg_alpha" not in params - assert "lambda_l1" in params - - -def test_handling_alias_parameter_with_user_supplied_param() -> None: - params = { - "num_boost_round": 5, - "early_stopping_rounds": 2, - "eta": 0.5, - } - _handling_alias_parameters(params) - - assert "eta" not in params - assert "learning_rate" in params - assert params["learning_rate"] == 0.5 - - -def test_handling_alias_parameter() -> None: - params = { - "num_boost_round": 5, - "early_stopping_rounds": 2, - "min_data": 0.2, - } - _handling_alias_parameters(params) - assert "min_data" not in params - assert "min_child_samples" in params - assert params["min_child_samples"] == 0.2 - - -def test_handling_alias_parameter_duplication() -> None: - params = { - "num_boost_round": 5, - "early_stopping_rounds": 2, - "min_data": 0.2, - "min_child_samples": 0.3, - "l1_regularization": 0.0, - "l2_regularization": 0.0, - "reg_alpha": 0.0, - "reg_lambda": 0.0, - } - _handling_alias_parameters(params) - # Here are the main alias names - assert "min_child_samples" in params - assert "lambda_l1" in params - assert "lambda_l2" in params - - # None of them are the main alias names - assert "min_data" not in params - assert "l1_regularization" not in params - assert "l2_regularization" not in params - assert "reg_alpha" not in params - assert "reg_lambda" not in params - - -@pytest.mark.parametrize( - "aliases, expect", - [ - ( - [ - "ndcg", - "lambdarank", - "rank_xendcg", - "xendcg", - "xe_ndcg", - "xe_ndcg_mart", - "xendcg_mart", - ], - "ndcg", - ), - (["mean_average_precision", "map"], "map"), - (["rmse", "l2_root", "root_mean_squared_error"], "rmse"), - (["l1", "regression_l1", "mean_absolute_error", "mae"], "l1"), - (["l2", "regression", "regression_l2", "mean_squared_error", "mse"], "l2"), - (["auc"], "auc"), - (["binary_logloss", "binary"], "binary_logloss"), - ( - [ - "multi_logloss", - "multiclass", - "softmax", - "multiclassova", - "multiclass_ova", - "ova", - "ovr", - ], - "multi_logloss", - ), - (["cross_entropy", "xentropy"], "cross_entropy"), - (["cross_entropy_lambda", "xentlambda"], "cross_entropy_lambda"), - (["kullback_leibler", "kldiv"], "kullback_leibler"), - (["mape", "mean_absolute_percentage_error"], "mape"), - (["auc_mu"], "auc_mu"), - (["custom", "none", "null", "na"], "custom"), - ([], None), # If "metric" not in lgbm_params.keys(): return None. - ([["lambdarank"]], ["ndcg"]), - ( - [["lambdarank", "mean_average_precision", "root_mean_squared_error"]], - ["ndcg", "map", "rmse"], - ), - ], -) -def test_handling_alias_metrics(aliases: List[str], expect: str) -> None: - if len(aliases) > 0: - for alias in aliases: - lgbm_params = {"metric": alias} - _handling_alias_metrics(lgbm_params) - assert lgbm_params["metric"] == expect - else: - lgbm_params = {} - _handling_alias_metrics(lgbm_params) - assert lgbm_params == {} - - -def test_handling_unexpected_alias_metrics() -> None: - with pytest.raises(ValueError): - _handling_alias_metrics({"metric": 1}) diff --git a/tests/integration_tests/lightgbm_tuner_tests/test_optimize.py b/tests/integration_tests/lightgbm_tuner_tests/test_optimize.py deleted file mode 100644 index a5c2484558..0000000000 --- a/tests/integration_tests/lightgbm_tuner_tests/test_optimize.py +++ /dev/null @@ -1,1105 +0,0 @@ -from __future__ import annotations - -from collections.abc import Generator -import contextlib -from tempfile import TemporaryDirectory -from typing import Any -from typing import TYPE_CHECKING -from unittest import mock -import warnings - -import numpy as np -import pytest - -import optuna -from optuna._imports import try_import -from optuna.integration._lightgbm_tuner.optimize import _BaseTuner -from optuna.integration._lightgbm_tuner.optimize import _OptunaObjective -from optuna.integration._lightgbm_tuner.optimize import _OptunaObjectiveCV -from optuna.integration._lightgbm_tuner.optimize import LightGBMTuner -from optuna.integration._lightgbm_tuner.optimize import LightGBMTunerCV -import optuna.integration.lightgbm as lgb -from optuna.study import Study - - -with try_import(): - from lightgbm import early_stopping - from lightgbm import log_evaluation - import sklearn.datasets - from sklearn.model_selection import KFold - from sklearn.model_selection import train_test_split - -pytestmark = pytest.mark.integration - - -@contextlib.contextmanager -def turnoff_train(metric: str = "binary_logloss") -> Generator[None, None, None]: - unexpected_value = 0.5 - dummy_num_iterations = 1234 - - class DummyBooster: - def __init__(self) -> None: - self.best_score = { - "valid_0": {metric: unexpected_value}, - } - - def current_iteration(self) -> int: - return dummy_num_iterations - - dummy_booster = DummyBooster() - - with mock.patch("lightgbm.train", return_value=dummy_booster): - yield - - -@contextlib.contextmanager -def turnoff_cv(metric: str = "binary_logloss") -> Generator[None, None, None]: - unexpected_value = 0.5 - dummy_results = {"valid {}-mean".format(metric): [unexpected_value]} - - with mock.patch("lightgbm.cv", return_value=dummy_results): - yield - - -class TestOptunaObjective: - def test_init_(self) -> None: - target_param_names = ["learning_rate"] # Invalid parameter name. - - with pytest.raises(NotImplementedError): - dataset = mock.MagicMock(spec="lgb.Dataset") - _OptunaObjective(target_param_names, {}, dataset, {}, 0, "tune_learning_rate", None) - - def test_call(self) -> None: - target_param_names = ["lambda_l1"] - lgbm_params: dict[str, Any] = {} - train_set = lgb.Dataset(None) - val_set = lgb.Dataset(None) - - lgbm_kwargs = {"valid_sets": val_set} - best_score = -np.inf - - with turnoff_train(): - objective = _OptunaObjective( - target_param_names, - lgbm_params, - train_set, - lgbm_kwargs, - best_score, - "tune_lambda_l1", - None, - ) - study = optuna.create_study(direction="minimize") - study.optimize(objective, n_trials=10) - - assert study.best_value == 0.5 - - -class TestOptunaObjectiveCV: - def test_call(self) -> None: - target_param_names = ["lambda_l1"] - lgbm_params: dict[str, Any] = {} - train_set = lgb.Dataset(None) - lgbm_kwargs: dict[str, Any] = {} - best_score = -np.inf - - with turnoff_cv(): - objective = _OptunaObjectiveCV( - target_param_names, - lgbm_params, - train_set, - lgbm_kwargs, - best_score, - "tune_lambda_l1", - None, - ) - study = optuna.create_study(direction="minimize") - study.optimize(objective, n_trials=10) - - assert study.best_value == 0.5 - - -class TestBaseTuner: - def test_get_booster_best_score(self) -> None: - expected_value = 1.0 - - booster = mock.MagicMock( - spec="lgb.Booster", best_score={"valid_0": {"binary_logloss": expected_value}} - ) - dummy_dataset = lgb.Dataset(None) - - tuner = _BaseTuner(lgbm_kwargs=dict(valid_sets=dummy_dataset)) - val_score = tuner._get_booster_best_score(booster) - assert val_score == expected_value - - def test_higher_is_better(self) -> None: - for metric in [ - "auc", - "auc_mu", - "ndcg", - "lambdarank", - "rank_xendcg", - "xendcg", - "xe_ndcg", - "xe_ndcg_mart", - "xendcg_mart", - "map", - "mean_average_precision", - "average_precision", - ]: - tuner = _BaseTuner(lgbm_params={"metric": metric}) - assert tuner.higher_is_better() - - for metric in [ - "mae", - "rmse", - "quantile", - "mape", - "binary_logloss", - "multi_logloss", - "cross_entropy", - ]: - tuner = _BaseTuner(lgbm_params={"metric": metric}) - assert not tuner.higher_is_better() - - def test_get_booster_best_score_using_valid_names_as_str(self) -> None: - expected_value = 1.0 - - booster = mock.MagicMock( - spec="lgb.Booster", best_score={"dev": {"binary_logloss": expected_value}} - ) - dummy_dataset = lgb.Dataset(None) - - tuner = _BaseTuner(lgbm_kwargs={"valid_names": "dev", "valid_sets": dummy_dataset}) - val_score = tuner._get_booster_best_score(booster) - assert val_score == expected_value - - def test_get_booster_best_score_using_valid_names_as_list(self) -> None: - unexpected_value = 0.5 - expected_value = 1.0 - - booster = mock.MagicMock( - spec="lgb.Booster", - best_score={ - "train": {"binary_logloss": unexpected_value}, - "val": {"binary_logloss": expected_value}, - }, - ) - dummy_train_dataset = lgb.Dataset(None) - dummy_val_dataset = lgb.Dataset(None) - - tuner = _BaseTuner( - lgbm_kwargs={ - "valid_names": ["train", "val"], - "valid_sets": [dummy_train_dataset, dummy_val_dataset], - } - ) - val_score = tuner._get_booster_best_score(booster) - assert val_score == expected_value - - def test_compare_validation_metrics(self) -> None: - for metric in [ - "auc", - "ndcg", - "lambdarank", - "rank_xendcg", - "xendcg", - "xe_ndcg", - "xe_ndcg_mart", - "xendcg_mart", - "map", - "mean_average_precision", - ]: - tuner = _BaseTuner(lgbm_params={"metric": metric}) - assert tuner.compare_validation_metrics(0.5, 0.1) - assert not tuner.compare_validation_metrics(0.5, 0.5) - assert not tuner.compare_validation_metrics(0.1, 0.5) - - for metric in ["rmsle", "rmse", "binary_logloss"]: - tuner = _BaseTuner(lgbm_params={"metric": metric}) - assert not tuner.compare_validation_metrics(0.5, 0.1) - assert not tuner.compare_validation_metrics(0.5, 0.5) - assert tuner.compare_validation_metrics(0.1, 0.5) - - @pytest.mark.parametrize( - "metric, eval_at_param, expected", - [ - ("auc", {"eval_at": 5}, "auc"), - ("accuracy", {"eval_at": 5}, "accuracy"), - ("rmsle", {"eval_at": 5}, "rmsle"), - ("rmse", {"eval_at": 5}, "rmse"), - ("binary_logloss", {"eval_at": 5}, "binary_logloss"), - ("ndcg", {"eval_at": 5}, "ndcg@5"), - ("ndcg", {"ndcg_at": 5}, "ndcg@5"), - ("ndcg", {"ndcg_eval_at": 5}, "ndcg@5"), - ("ndcg", {"eval_at": [20]}, "ndcg@20"), - ("ndcg", {"eval_at": [10, 20]}, "ndcg@10"), - ("ndcg", {}, "ndcg@1"), - ("map", {"eval_at": 5}, "map@5"), - ("map", {"eval_at": [20]}, "map@20"), - ("map", {"eval_at": [10, 20]}, "map@10"), - ("map", {}, "map@1"), - ], - ) - def test_metric_with_eval_at( - self, metric: str, eval_at_param: dict[str, int | list[int]], expected: str - ) -> None: - params: dict[str, str | int | list[int]] = {"metric": metric} - params.update(eval_at_param) - tuner = _BaseTuner(lgbm_params=params) - assert tuner._metric_with_eval_at(metric) == expected - - def test_metric_with_eval_at_error(self) -> None: - tuner = _BaseTuner(lgbm_params={"metric": "ndcg", "eval_at": "1"}) - with pytest.raises(ValueError): - tuner._metric_with_eval_at("ndcg") - - -class TestLightGBMTuner: - def _get_tuner_object( - self, - params: dict[str, Any] = {}, - train_set: "lgb.Dataset" | None = None, - kwargs_options: dict[str, Any] = {}, - study: Study | None = None, - ) -> lgb.LightGBMTuner: - # Required keyword arguments. - dummy_dataset = lgb.Dataset(None) - train_set = train_set or mock.MagicMock(spec="lgb.Dataset") - - runner = lgb.LightGBMTuner( - params, - train_set, - num_boost_round=5, - valid_sets=dummy_dataset, - callbacks=[early_stopping(stopping_rounds=2)], - study=study, - **kwargs_options, - ) - return runner - - def test_deprecated_args(self) -> None: - dummy_dataset = lgb.Dataset(None) - - with pytest.warns(FutureWarning): - LightGBMTuner({}, dummy_dataset, valid_sets=[dummy_dataset], verbosity=1) - - def test_no_eval_set_args(self) -> None: - params: dict[str, Any] = {} - train_set = lgb.Dataset(None) - with pytest.raises(ValueError) as excinfo: - lgb.LightGBMTuner( - params, - train_set, - num_boost_round=5, - callbacks=[early_stopping(stopping_rounds=2)], - ) - - assert excinfo.type == ValueError - assert str(excinfo.value) == "`valid_sets` is required." - - @pytest.mark.parametrize( - "metric, study_direction", - [ - ("auc", "minimize"), - ("mse", "maximize"), - (None, "maximize"), # The default metric is binary_logloss. - ], - ) - def test_inconsistent_study_direction(self, metric: str, study_direction: str) -> None: - params: dict[str, Any] = {} - if metric is not None: - params["metric"] = metric - train_set = lgb.Dataset(None) - valid_set = lgb.Dataset(None) - study = optuna.create_study(direction=study_direction) - with pytest.raises(ValueError) as excinfo: - lgb.LightGBMTuner( - params, - train_set, - valid_sets=[train_set, valid_set], - num_boost_round=5, - callbacks=[early_stopping(stopping_rounds=2)], - study=study, - ) - - assert excinfo.type == ValueError - assert str(excinfo.value).startswith("Study direction is inconsistent with the metric") - - def test_with_minimum_required_args(self) -> None: - runner = self._get_tuner_object() - assert "num_boost_round" in runner.lgbm_kwargs - assert "num_boost_round" not in runner.auto_options - assert runner.lgbm_kwargs["num_boost_round"] == 5 - - def test_parse_args_wrapper_args(self) -> None: - params: dict[str, Any] = {} - train_set = lgb.Dataset(None) - val_set = lgb.Dataset(None) - runner = lgb.LightGBMTuner( - params, - train_set, - num_boost_round=12, - callbacks=[early_stopping(stopping_rounds=10)], - valid_sets=val_set, - time_budget=600, - sample_size=1000, - ) - new_args = ["time_budget", "time_budget", "sample_size"] - for new_arg in new_args: - assert new_arg not in runner.lgbm_kwargs - assert new_arg in runner.auto_options - - @pytest.mark.parametrize( - "metric, study_direction, expected", - [("auc", "maximize", -np.inf), ("l2", "minimize", np.inf)], - ) - def test_best_score(self, metric: str, study_direction: str, expected: float) -> None: - with turnoff_train(metric=metric): - study = optuna.create_study(direction=study_direction) - runner = self._get_tuner_object( - params=dict(lambda_l1=0.0, metric=metric), kwargs_options={}, study=study - ) - assert runner.best_score == expected - runner.tune_regularization_factors() - assert runner.best_score == 0.5 - - def test_best_params(self) -> None: - unexpected_value = 20 # out of scope. - - with turnoff_train(): - study = optuna.create_study() - runner = self._get_tuner_object( - params=dict(lambda_l1=unexpected_value), kwargs_options={}, study=study - ) - assert runner.best_params["lambda_l1"] == unexpected_value - runner.tune_regularization_factors() - assert runner.best_params["lambda_l1"] != unexpected_value - - def test_sample_train_set(self) -> None: - sample_size = 3 - - X_trn = np.random.uniform(10, size=50).reshape((10, 5)) - y_trn = np.random.randint(2, size=10) - train_dataset = lgb.Dataset(X_trn, label=y_trn) - runner = self._get_tuner_object( - train_set=train_dataset, kwargs_options=dict(sample_size=sample_size) - ) - runner.sample_train_set() - - # Workaround for mypy. - if not TYPE_CHECKING: - runner.train_subset.construct() # Cannot get label before construct `lgb.Dataset`. - assert runner.train_subset.get_label().shape[0] == sample_size - - def test_time_budget(self) -> None: - unexpected_value = 1.1 # out of scope. - - with turnoff_train(): - runner = self._get_tuner_object( - params=dict( - feature_fraction=unexpected_value, # set default as unexpected value. - ), - kwargs_options=dict(time_budget=0), - ) - assert len(runner.study.trials) == 0 - # No trials run because `time_budget` is set to zero. - runner.tune_feature_fraction() - assert runner.lgbm_params["feature_fraction"] == unexpected_value - assert len(runner.study.trials) == 0 - - def test_tune_feature_fraction(self) -> None: - unexpected_value = 1.1 # out of scope. - - with turnoff_train(): - runner = self._get_tuner_object( - params=dict( - feature_fraction=unexpected_value, # set default as unexpected value. - ), - ) - assert len(runner.study.trials) == 0 - runner.tune_feature_fraction() - - assert runner.lgbm_params["feature_fraction"] != unexpected_value - assert len(runner.study.trials) == 7 - - def test_tune_num_leaves(self) -> None: - unexpected_value = 1 # out of scope. - - with turnoff_train(): - runner = self._get_tuner_object(params=dict(num_leaves=unexpected_value)) - assert len(runner.study.trials) == 0 - runner.tune_num_leaves() - - assert runner.lgbm_params["num_leaves"] != unexpected_value - assert len(runner.study.trials) == 20 - - def test_tune_num_leaves_negative_max_depth(self) -> None: - params: dict[str, Any] = {"metric": "binary_logloss", "max_depth": -1, "verbose": -1} - X_trn = np.random.uniform(10, size=(10, 5)) - y_trn = np.random.randint(2, size=10) - train_dataset = lgb.Dataset(X_trn, label=y_trn) - valid_dataset = lgb.Dataset(X_trn, label=y_trn) - - runner = lgb.LightGBMTuner( - params, - train_dataset, - num_boost_round=3, - valid_sets=valid_dataset, - callbacks=[early_stopping(stopping_rounds=2), log_evaluation(-1)], - ) - runner.tune_num_leaves() - assert len(runner.study.trials) == 20 - - def test_tune_bagging(self) -> None: - unexpected_value = 1 # out of scope. - - with turnoff_train(): - runner = self._get_tuner_object(params=dict(bagging_fraction=unexpected_value)) - assert len(runner.study.trials) == 0 - runner.tune_bagging() - - assert runner.lgbm_params["bagging_fraction"] != unexpected_value - assert len(runner.study.trials) == 10 - - def test_tune_feature_fraction_stage2(self) -> None: - unexpected_value = 0.5 - - with turnoff_train(): - runner = self._get_tuner_object(params=dict(feature_fraction=unexpected_value)) - assert len(runner.study.trials) == 0 - runner.tune_feature_fraction_stage2() - - assert runner.lgbm_params["feature_fraction"] != unexpected_value - assert len(runner.study.trials) == 6 - - def test_tune_regularization_factors(self) -> None: - unexpected_value = 20 # out of scope. - - with turnoff_train(): - runner = self._get_tuner_object( - params=dict(lambda_l1=unexpected_value) # set default as unexpected value. - ) - assert len(runner.study.trials) == 0 - runner.tune_regularization_factors() - - assert runner.lgbm_params["lambda_l1"] != unexpected_value - assert len(runner.study.trials) == 20 - - def test_tune_min_data_in_leaf(self) -> None: - unexpected_value = 1 # out of scope. - - with turnoff_train(): - runner = self._get_tuner_object( - params=dict( - min_child_samples=unexpected_value, # set default as unexpected value. - ), - ) - assert len(runner.study.trials) == 0 - runner.tune_min_data_in_leaf() - - assert runner.lgbm_params["min_child_samples"] != unexpected_value - assert len(runner.study.trials) == 5 - - def test_when_a_step_does_not_improve_best_score(self) -> None: - params: dict = {} - valid_data = np.zeros((10, 10)) - valid_sets = lgb.Dataset(valid_data) - - dataset = mock.MagicMock(spec="lgb.Dataset") - tuner = LightGBMTuner(params, dataset, valid_sets=valid_sets) - assert not tuner.higher_is_better() - - with mock.patch("lightgbm.train"), mock.patch.object( - _BaseTuner, "_get_booster_best_score", return_value=0.9 - ): - tuner.tune_feature_fraction() - - assert "feature_fraction" in tuner.best_params - assert tuner.best_score == 0.9 - - # Assume that tuning `num_leaves` doesn't improve the `best_score`. - with mock.patch("lightgbm.train"), mock.patch.object( - _BaseTuner, "_get_booster_best_score", return_value=1.1 - ): - tuner.tune_num_leaves() - - def test_resume_run(self) -> None: - params: dict = {"verbose": -1} - dataset = lgb.Dataset(np.zeros((10, 10))) - - study = optuna.create_study() - tuner = LightGBMTuner( - params, dataset, valid_sets=dataset, study=study, callbacks=[log_evaluation(-1)] - ) - - with mock.patch.object(_BaseTuner, "_get_booster_best_score", return_value=1.0): - tuner.tune_regularization_factors() - - n_trials = len(study.trials) - assert n_trials == len(study.trials) - - tuner2 = LightGBMTuner(params, dataset, valid_sets=dataset, study=study) - with mock.patch.object(_BaseTuner, "_get_booster_best_score", return_value=1.0): - tuner2.tune_regularization_factors() - assert n_trials == len(study.trials) - - @pytest.mark.parametrize( - "verbosity, level", - [ - (None, optuna.logging.INFO), - (-2, optuna.logging.CRITICAL), - (-1, optuna.logging.CRITICAL), - (0, optuna.logging.WARNING), - (1, optuna.logging.INFO), - (2, optuna.logging.DEBUG), - ], - ) - def test_run_verbosity(self, verbosity: int, level: int) -> None: - # We need to reconstruct our default handler to properly capture stderr. - optuna.logging._reset_library_root_logger() - optuna.logging.set_verbosity(optuna.logging.INFO) - - params: dict = {"verbose": -1} - dataset = lgb.Dataset(np.zeros((10, 10))) - - study = optuna.create_study() - with warnings.catch_warnings(): - warnings.simplefilter("ignore", category=FutureWarning) - tuner = LightGBMTuner( - params, - dataset, - valid_sets=dataset, - study=study, - verbosity=verbosity, - callbacks=[log_evaluation(-1)], - time_budget=1, - ) - - with mock.patch.object(_BaseTuner, "_get_booster_best_score", return_value=1.0): - tuner.run() - - assert optuna.logging.get_verbosity() == level - assert tuner.lgbm_params["verbose"] == -1 - - @pytest.mark.parametrize("show_progress_bar, expected", [(True, 6), (False, 0)]) - def test_run_show_progress_bar(self, show_progress_bar: bool, expected: int) -> None: - params: dict = {"verbose": -1} - dataset = lgb.Dataset(np.zeros((10, 10))) - - study = optuna.create_study() - tuner = LightGBMTuner( - params, - dataset, - valid_sets=dataset, - study=study, - callbacks=[log_evaluation(-1)], - time_budget=1, - show_progress_bar=show_progress_bar, - ) - - with mock.patch.object( - _BaseTuner, "_get_booster_best_score", return_value=1.0 - ), mock.patch("tqdm.tqdm") as mock_tqdm: - tuner.run() - - assert mock_tqdm.call_count == expected - - def test_get_best_booster(self) -> None: - unexpected_value = 20 # out of scope. - - params: dict = {"verbose": -1, "lambda_l1": unexpected_value} - dataset = lgb.Dataset(np.zeros((10, 10))) - - study = optuna.create_study() - tuner = LightGBMTuner( - params, dataset, valid_sets=dataset, study=study, callbacks=[log_evaluation(-1)] - ) - - with pytest.raises(ValueError): - tuner.get_best_booster() - - with mock.patch.object(_BaseTuner, "_get_booster_best_score", return_value=0.0): - tuner.tune_regularization_factors() - - best_booster = tuner.get_best_booster() - assert isinstance(best_booster.params, dict) - assert best_booster.params["lambda_l1"] != unexpected_value - - tuner2 = LightGBMTuner(params, dataset, valid_sets=dataset, study=study) - - # Resumed study does not have the best booster. - with pytest.raises(ValueError): - tuner2.get_best_booster() - - @pytest.mark.parametrize("dir_exists, expected", [(False, True), (True, False)]) - def test_model_dir(self, dir_exists: bool, expected: bool) -> None: - params: dict = {"verbose": -1} - dataset = lgb.Dataset(np.zeros((10, 10))) - - with mock.patch("optuna.integration._lightgbm_tuner.optimize.os.mkdir") as m: - with mock.patch("os.path.exists", return_value=dir_exists): - LightGBMTuner(params, dataset, valid_sets=dataset, model_dir="./booster") - assert m.called == expected - - def test_best_booster_with_model_dir(self) -> None: - params: dict = {"verbose": -1} - dataset = lgb.Dataset(np.zeros((10, 10))) - - study = optuna.create_study() - with TemporaryDirectory() as tmpdir: - tuner = LightGBMTuner( - params, - dataset, - valid_sets=dataset, - study=study, - model_dir=tmpdir, - callbacks=[log_evaluation(-1)], - ) - - with mock.patch.object(_BaseTuner, "_get_booster_best_score", return_value=0.0): - tuner.tune_regularization_factors() - - best_booster = tuner.get_best_booster() - - tuner2 = LightGBMTuner( - params, dataset, valid_sets=dataset, study=study, model_dir=tmpdir - ) - best_booster2 = tuner2.get_best_booster() - - assert best_booster.params == best_booster2.params - - @pytest.mark.parametrize("direction, overall_best", [("minimize", 1), ("maximize", 2)]) - def test_create_stepwise_study(self, direction: str, overall_best: int) -> None: - dataset = mock.MagicMock(spec="lgb.Dataset") - tuner = LightGBMTuner({}, dataset, valid_sets=lgb.Dataset(np.zeros((10, 10)))) - - def objective(trial: optuna.trial.Trial, value: float) -> float: - trial.storage.set_trial_system_attr( - trial._trial_id, - optuna.integration._lightgbm_tuner.optimize._STEP_NAME_KEY, - "step{:.0f}".format(value), - ) - return trial.suggest_float("x", value, value) - - study = optuna.create_study(direction=direction) - study_step1 = tuner._create_stepwise_study(study, "step1") - - with pytest.raises(ValueError): - study_step1.best_trial - - study_step1.optimize(lambda t: objective(t, 1), n_trials=1) - - study_step2 = tuner._create_stepwise_study(study, "step2") - - # `study` has a trial, but `study_step2` has no trials. - with pytest.raises(ValueError): - study_step2.best_trial - - study_step2.optimize(lambda t: objective(t, 2), n_trials=2) - - assert len(study_step1.trials) == 1 - assert len(study_step2.trials) == 2 - assert len(study.trials) == 3 - - assert study_step1.best_trial.value == 1 - assert study_step2.best_trial.value == 2 - assert study.best_trial.value == overall_best - - def test_optuna_callback(self) -> None: - params: dict[str, Any] = {"verbose": -1} - dataset = lgb.Dataset(np.zeros((10, 10))) - - callback_mock = mock.MagicMock() - - study = optuna.create_study() - tuner = LightGBMTuner( - params, - dataset, - valid_sets=dataset, - study=study, - callbacks=[log_evaluation(-1)], - optuna_callbacks=[callback_mock], - ) - - with mock.patch.object(_BaseTuner, "_get_booster_best_score", return_value=1.0): - tuner._tune_params(["num_leaves"], 10, optuna.samplers.TPESampler(), "num_leaves") - - assert callback_mock.call_count == 10 - - @pytest.mark.skip(reason="Fail since 28 Jan 2024. TODO(nabenabe0928): Fix here.") - def test_tune_best_score_reproducibility(self) -> None: - iris = sklearn.datasets.load_iris() - X_trainval, X_test, y_trainval, y_test = train_test_split( - iris.data, iris.target, random_state=0 - ) - - train = lgb.Dataset(X_trainval, y_trainval) - valid = lgb.Dataset(X_test, y_test) - params = { - "objective": "regression", - "metric": "rmse", - "random_seed": 0, - "deterministic": True, - "force_col_wise": True, - "verbosity": -1, - } - - tuner_first_try = lgb.LightGBMTuner( - params, - train, - valid_sets=valid, - callbacks=[early_stopping(stopping_rounds=3), log_evaluation(-1)], - optuna_seed=10, - ) - tuner_first_try.run() - best_score_first_try = tuner_first_try.best_score - - tuner_second_try = lgb.LightGBMTuner( - params, - train, - valid_sets=valid, - callbacks=[early_stopping(stopping_rounds=3), log_evaluation(-1)], - optuna_seed=10, - ) - tuner_second_try.run() - best_score_second_try = tuner_second_try.best_score - - assert best_score_second_try == best_score_first_try - - first_try_trials = tuner_first_try.study.trials - second_try_trials = tuner_second_try.study.trials - assert len(first_try_trials) == len(second_try_trials) - for first_trial, second_trial in zip(first_try_trials, second_try_trials): - assert first_trial.value == second_trial.value - assert first_trial.params == second_trial.params - - -class TestLightGBMTunerCV: - def _get_tunercv_object( - self, - params: dict[str, Any] = {}, - train_set: lgb.Dataset | None = None, - kwargs_options: dict[str, Any] = {}, - study: optuna.study.Study | None = None, - ) -> LightGBMTunerCV: - # Required keyword arguments. - kwargs: dict[str, Any] = dict(num_boost_round=5, study=study) - kwargs.update(kwargs_options) - - train_set = train_set or mock.MagicMock(spec="lgb.Dataset") - runner = LightGBMTunerCV( - params, train_set, callbacks=[early_stopping(stopping_rounds=2)], **kwargs - ) - return runner - - def test_deprecated_args(self) -> None: - dummy_dataset = lgb.Dataset(None) - - with pytest.warns(FutureWarning): - LightGBMTunerCV({}, dummy_dataset, verbosity=1) - - @pytest.mark.parametrize( - "metric, study_direction", - [ - ("auc", "minimize"), - ("mse", "maximize"), - (None, "maximize"), # The default metric is binary_logloss. - ], - ) - def test_inconsistent_study_direction(self, metric: str, study_direction: str) -> None: - params: dict[str, Any] = {} - if metric is not None: - params["metric"] = metric - train_set = lgb.Dataset(None) - study = optuna.create_study(direction=study_direction) - with pytest.raises(ValueError) as excinfo: - LightGBMTunerCV( - params, - train_set, - num_boost_round=5, - callbacks=[early_stopping(stopping_rounds=2)], - study=study, - ) - - assert excinfo.type == ValueError - assert str(excinfo.value).startswith("Study direction is inconsistent with the metric") - - def test_with_minimum_required_args(self) -> None: - runner = self._get_tunercv_object() - assert "num_boost_round" in runner.lgbm_kwargs - assert "num_boost_round" not in runner.auto_options - assert runner.lgbm_kwargs["num_boost_round"] == 5 - - def test_tune_feature_fraction(self) -> None: - unexpected_value = 1.1 # out of scope. - - with turnoff_cv(): - runner = self._get_tunercv_object( - params=dict( - feature_fraction=unexpected_value, # set default as unexpected value. - ), - ) - assert len(runner.study.trials) == 0 - runner.tune_feature_fraction() - - assert runner.lgbm_params["feature_fraction"] != unexpected_value - assert len(runner.study.trials) == 7 - - def test_tune_num_leaves(self) -> None: - unexpected_value = 1 # out of scope. - - with turnoff_cv(): - runner = self._get_tunercv_object(params=dict(num_leaves=unexpected_value)) - assert len(runner.study.trials) == 0 - runner.tune_num_leaves() - - assert runner.lgbm_params["num_leaves"] != unexpected_value - assert len(runner.study.trials) == 20 - - def test_tune_bagging(self) -> None: - unexpected_value = 1 # out of scope. - - with turnoff_cv(): - runner = self._get_tunercv_object(params=dict(bagging_fraction=unexpected_value)) - assert len(runner.study.trials) == 0 - runner.tune_bagging() - - assert runner.lgbm_params["bagging_fraction"] != unexpected_value - assert len(runner.study.trials) == 10 - - def test_tune_feature_fraction_stage2(self) -> None: - unexpected_value = 0.5 - - with turnoff_cv(): - runner = self._get_tunercv_object(params=dict(feature_fraction=unexpected_value)) - assert len(runner.study.trials) == 0 - runner.tune_feature_fraction_stage2() - - assert runner.lgbm_params["feature_fraction"] != unexpected_value - assert len(runner.study.trials) == 6 - - def test_tune_regularization_factors(self) -> None: - unexpected_value = 20 # out of scope. - - with turnoff_cv(): - runner = self._get_tunercv_object( - params=dict(lambda_l1=unexpected_value) # set default as unexpected value. - ) - assert len(runner.study.trials) == 0 - runner.tune_regularization_factors() - - assert runner.lgbm_params["lambda_l1"] != unexpected_value - assert len(runner.study.trials) == 20 - - def test_tune_min_data_in_leaf(self) -> None: - unexpected_value = 1 # out of scope. - - with turnoff_cv(): - runner = self._get_tunercv_object( - params=dict( - min_child_samples=unexpected_value, # set default as unexpected value. - ), - ) - assert len(runner.study.trials) == 0 - runner.tune_min_data_in_leaf() - - assert runner.lgbm_params["min_child_samples"] != unexpected_value - assert len(runner.study.trials) == 5 - - def test_resume_run(self) -> None: - params: dict = {"verbose": -1} - dataset = lgb.Dataset(np.zeros((10, 10))) - - study = optuna.create_study() - tuner = LightGBMTunerCV(params, dataset, study=study) - - with mock.patch.object(_OptunaObjectiveCV, "_get_cv_scores", return_value=[1.0]): - tuner.tune_regularization_factors() - - n_trials = len(study.trials) - assert n_trials == len(study.trials) - - tuner2 = LightGBMTuner(params, dataset, valid_sets=dataset, study=study) - with mock.patch.object(_OptunaObjectiveCV, "_get_cv_scores", return_value=[1.0]): - tuner2.tune_regularization_factors() - assert n_trials == len(study.trials) - - @pytest.mark.parametrize( - "verbosity, level", - [ - (None, optuna.logging.INFO), - (-2, optuna.logging.CRITICAL), - (-1, optuna.logging.CRITICAL), - (0, optuna.logging.WARNING), - (1, optuna.logging.INFO), - (2, optuna.logging.DEBUG), - ], - ) - def test_run_verbosity(self, verbosity: int, level: int) -> None: - # We need to reconstruct our default handler to properly capture stderr. - optuna.logging._reset_library_root_logger() - optuna.logging.set_verbosity(optuna.logging.INFO) - - params: dict = {"verbose": -1} - dataset = lgb.Dataset(np.zeros((10, 10))) - - study = optuna.create_study() - with warnings.catch_warnings(): - warnings.simplefilter("ignore", category=FutureWarning) - tuner = LightGBMTunerCV( - params, dataset, study=study, verbosity=verbosity, time_budget=1 - ) - - with mock.patch.object(_OptunaObjectiveCV, "_get_cv_scores", return_value=[1.0]): - tuner.run() - - assert optuna.logging.get_verbosity() == level - assert tuner.lgbm_params["verbose"] == -1 - - @pytest.mark.parametrize("show_progress_bar, expected", [(True, 6), (False, 0)]) - def test_run_show_progress_bar(self, show_progress_bar: bool, expected: int) -> None: - params: dict = {"verbose": -1} - dataset = lgb.Dataset(np.zeros((10, 10))) - - study = optuna.create_study() - tuner = LightGBMTunerCV( - params, dataset, study=study, time_budget=1, show_progress_bar=show_progress_bar - ) - - with mock.patch.object( - _OptunaObjectiveCV, "_get_cv_scores", return_value=[1.0] - ), mock.patch("tqdm.tqdm") as mock_tqdm: - tuner.run() - - assert mock_tqdm.call_count == expected - - def test_optuna_callback(self) -> None: - params: dict[str, Any] = {"verbose": -1} - dataset = lgb.Dataset(np.zeros((10, 10))) - - callback_mock = mock.MagicMock() - - study = optuna.create_study() - tuner = LightGBMTunerCV(params, dataset, study=study, optuna_callbacks=[callback_mock]) - - with mock.patch.object(_OptunaObjectiveCV, "_get_cv_scores", return_value=[1.0]): - tuner._tune_params(["num_leaves"], 10, optuna.samplers.TPESampler(), "num_leaves") - - assert callback_mock.call_count == 10 - - @pytest.mark.parametrize("dir_exists, expected", [(False, True), (True, False)]) - def test_model_dir(self, dir_exists: bool, expected: bool) -> None: - unexpected_value = 20 # out of scope. - - params: dict = {"verbose": -1, "lambda_l1": unexpected_value} - dataset = lgb.Dataset(np.zeros((10, 10))) - - with mock.patch("os.mkdir") as m: - with mock.patch("os.path.exists", return_value=dir_exists): - LightGBMTunerCV(params, dataset, model_dir="./booster") - assert m.called == expected - - def test_get_best_booster(self) -> None: - unexpected_value = 20 # out of scope. - - params: dict = {"verbose": -1, "lambda_l1": unexpected_value} - dataset = lgb.Dataset(np.zeros((10, 10))) - study = optuna.create_study() - - with TemporaryDirectory() as tmpdir: - tuner = LightGBMTunerCV( - params, dataset, study=study, model_dir=tmpdir, return_cvbooster=True - ) - - with pytest.raises(ValueError): - tuner.get_best_booster() - - with mock.patch.object(_OptunaObjectiveCV, "_get_cv_scores", return_value=[1.0]): - tuner.tune_regularization_factors() - - best_boosters = tuner.get_best_booster().boosters - for booster in best_boosters: - assert booster.params["lambda_l1"] != unexpected_value - - tuner2 = LightGBMTunerCV( - params, dataset, study=study, model_dir=tmpdir, return_cvbooster=True - ) - best_boosters2 = tuner2.get_best_booster().boosters - for booster, booster2 in zip(best_boosters, best_boosters2): - assert booster.params == booster2.params - - def test_get_best_booster_with_error(self) -> None: - params: dict = {"verbose": -1} - dataset = lgb.Dataset(np.zeros((10, 10))) - study = optuna.create_study() - - tuner = LightGBMTunerCV( - params, dataset, study=study, model_dir=None, return_cvbooster=True - ) - # No trial is completed yet. - with pytest.raises(ValueError): - tuner.get_best_booster() - - with mock.patch.object(_OptunaObjectiveCV, "_get_cv_scores", return_value=[1.0]): - tuner.tune_regularization_factors() - - tuner2 = LightGBMTunerCV( - params, dataset, study=study, model_dir=None, return_cvbooster=True - ) - # Resumed the study does not have the best booster. - with pytest.raises(ValueError): - tuner2.get_best_booster() - - with TemporaryDirectory() as tmpdir: - tuner3 = LightGBMTunerCV( - params, dataset, study=study, model_dir=tmpdir, return_cvbooster=True - ) - # The booster was not saved hence not found in the `model_dir`. - with pytest.raises(ValueError): - tuner3.get_best_booster() - - @pytest.mark.skip(reason="Fail since 28 Jan 2024. TODO(nabenabe0928): Fix here.") - def test_tune_best_score_reproducibility(self) -> None: - iris = sklearn.datasets.load_iris() - X_trainval, X_test, y_trainval, y_test = train_test_split( - iris.data, iris.target, random_state=0 - ) - - train = lgb.Dataset(X_trainval, y_trainval) - params = { - "objective": "regression", - "metric": "rmse", - "random_seed": 0, - "deterministic": True, - "force_col_wise": True, - "verbosity": -1, - } - - tuner_first_try = lgb.LightGBMTunerCV( - params, - train, - callbacks=[early_stopping(stopping_rounds=3)], - folds=KFold(n_splits=3), - optuna_seed=10, - ) - tuner_first_try.run() - best_score_first_try = tuner_first_try.best_score - - tuner_second_try = lgb.LightGBMTunerCV( - params, - train, - callbacks=[early_stopping(stopping_rounds=3)], - folds=KFold(n_splits=3), - optuna_seed=10, - ) - tuner_second_try.run() - best_score_second_try = tuner_second_try.best_score - - assert best_score_second_try == best_score_first_try - - first_try_trials = tuner_first_try.study.trials - second_try_trials = tuner_second_try.study.trials - assert len(first_try_trials) == len(second_try_trials) - for first_trial, second_trial in zip(first_try_trials, second_try_trials): - assert first_trial.value == second_trial.value - assert first_trial.params == second_trial.params diff --git a/tests/integration_tests/test_integration.py b/tests/integration_tests/test_integration.py index e66545f617..6ee673d85b 100644 --- a/tests/integration_tests/test_integration.py +++ b/tests/integration_tests/test_integration.py @@ -5,9 +5,6 @@ def test_import() -> None: - from optuna.integration import lightgbm # NOQA - from optuna.integration import LightGBMPruningCallback # NOQA - with pytest.raises(ImportError): from optuna.integration import unknown_module # type: ignore # NOQA @@ -15,8 +12,5 @@ def test_import() -> None: def test_module_attributes() -> None: import optuna - assert hasattr(optuna.integration, "lightgbm") - assert hasattr(optuna.integration, "LightGBMPruningCallback") - with pytest.raises(AttributeError): optuna.integration.unknown_attribute # type: ignore diff --git a/tests/integration_tests/test_lightgbm.py b/tests/integration_tests/test_lightgbm.py deleted file mode 100644 index 3a579f47eb..0000000000 --- a/tests/integration_tests/test_lightgbm.py +++ /dev/null @@ -1,192 +0,0 @@ -from functools import partial -from unittest.mock import patch - -import numpy as np -import pytest - -import optuna -from optuna._imports import try_import -from optuna.integration.lightgbm import LightGBMPruningCallback -from optuna.testing.pruners import DeterministicPruner - - -with try_import(): - import lightgbm as lgb - -pytestmark = pytest.mark.integration - -# If `True`, `lgb.cv(..)` will be used in the test, otherwise `lgb.train(..)` will be used. -CV_FLAGS = [False, True] - - -@pytest.mark.parametrize("cv", CV_FLAGS) -def test_lightgbm_pruning_callback_call(cv: bool) -> None: - callback_env = partial( - lgb.callback.CallbackEnv, - model="test", - params={}, - begin_iteration=0, - end_iteration=1, - iteration=1, - ) - - if cv: - env = callback_env(evaluation_result_list=[(("cv_agg", "binary_error", 1.0, False, 1.0))]) - else: - env = callback_env(evaluation_result_list=[("validation", "binary_error", 1.0, False)]) - - # The pruner is deactivated. - study = optuna.create_study(pruner=DeterministicPruner(False)) - trial = study.ask() - pruning_callback = LightGBMPruningCallback(trial, "binary_error", valid_name="validation") - pruning_callback(env) - - # The pruner is activated. - study = optuna.create_study(pruner=DeterministicPruner(True)) - trial = study.ask() - pruning_callback = LightGBMPruningCallback(trial, "binary_error", valid_name="validation") - with pytest.raises(optuna.TrialPruned): - pruning_callback(env) - - -@pytest.mark.parametrize("cv", CV_FLAGS) -def test_lightgbm_pruning_callback(cv: bool) -> None: - study = optuna.create_study(pruner=DeterministicPruner(True)) - study.optimize(partial(objective, cv=cv), n_trials=1) - assert study.trials[0].state == optuna.trial.TrialState.PRUNED - - study = optuna.create_study(pruner=DeterministicPruner(False)) - study.optimize(partial(objective, cv=cv), n_trials=1) - assert study.trials[0].state == optuna.trial.TrialState.COMPLETE - assert study.trials[0].value == 1.0 - - # Use non default validation name. - custom_valid_name = "my_validation" - study = optuna.create_study(pruner=DeterministicPruner(False)) - study.optimize(lambda trial: objective(trial, valid_name=custom_valid_name, cv=cv), n_trials=1) - assert study.trials[0].state == optuna.trial.TrialState.COMPLETE - assert study.trials[0].value == 1.0 - - # Check "maximize" direction. - study = optuna.create_study(pruner=DeterministicPruner(True), direction="maximize") - study.optimize(lambda trial: objective(trial, metric="auc", cv=cv), n_trials=1, catch=()) - assert study.trials[0].state == optuna.trial.TrialState.PRUNED - - study = optuna.create_study(pruner=DeterministicPruner(False), direction="maximize") - study.optimize(lambda trial: objective(trial, metric="auc", cv=cv), n_trials=1, catch=()) - assert study.trials[0].state == optuna.trial.TrialState.COMPLETE - assert study.trials[0].value == 1.0 - - -@pytest.mark.parametrize( - "cv, interval, num_boost_round", - [ - (True, 1, 1), - (True, 2, 1), - (True, 2, 2), - (False, 1, 1), - (False, 2, 1), - (False, 2, 2), - ], -) -def test_lightgbm_pruning_callback_with_interval( - cv: bool, interval: int, num_boost_round: int -) -> None: - study = optuna.create_study(pruner=DeterministicPruner(False)) - - with patch("optuna.trial.Trial.report") as mock: - study.optimize( - partial(objective, cv=cv, interval=interval, num_boost_round=num_boost_round), - n_trials=1, - ) - - if interval <= num_boost_round: - assert mock.call_count == 1 - else: - assert mock.call_count == 0 - - assert study.trials[0].state == optuna.trial.TrialState.COMPLETE - assert study.trials[0].value == 1.0 - - study = optuna.create_study(pruner=DeterministicPruner(True)) - study.optimize( - partial(objective, cv=cv, interval=interval, num_boost_round=num_boost_round), n_trials=1 - ) - if interval > num_boost_round: - assert study.trials[0].state == optuna.trial.TrialState.COMPLETE - else: - assert study.trials[0].state == optuna.trial.TrialState.PRUNED - - -@pytest.mark.parametrize("cv", CV_FLAGS) -def test_lightgbm_pruning_callback_errors(cv: bool) -> None: - # Unknown metric. - study = optuna.create_study(pruner=DeterministicPruner(False)) - with pytest.raises(ValueError): - study.optimize( - lambda trial: objective(trial, metric="foo_metric", cv=cv), n_trials=1, catch=() - ) - - if not cv: - # Unknown validation name. - study = optuna.create_study(pruner=DeterministicPruner(False)) - with pytest.raises(ValueError): - study.optimize( - lambda trial: objective( - trial, valid_name="valid_1", force_default_valid_names=True - ), - n_trials=1, - catch=(), - ) - - # Check consistency of study direction. - study = optuna.create_study(pruner=DeterministicPruner(False)) - with pytest.raises(ValueError): - study.optimize(lambda trial: objective(trial, metric="auc", cv=cv), n_trials=1, catch=()) - - study = optuna.create_study(pruner=DeterministicPruner(False), direction="maximize") - with pytest.raises(ValueError): - study.optimize( - lambda trial: objective(trial, metric="binary_error", cv=cv), n_trials=1, catch=() - ) - - -def objective( - trial: optuna.trial.Trial, - metric: str = "binary_error", - valid_name: str = "valid_0", - interval: int = 1, - num_boost_round: int = 1, - force_default_valid_names: bool = False, - cv: bool = False, -) -> float: - dtrain = lgb.Dataset(np.asarray([[1.0], [2.0], [3.0], [4.0]]), label=[1.0, 0.0, 1.0, 0.0]) - dtest = lgb.Dataset(np.asarray([[1.0]]), label=[1.0]) - - if force_default_valid_names: - valid_names = None - else: - valid_names = [valid_name] - - verbose_callback = lgb.log_evaluation() - pruning_callback = LightGBMPruningCallback( - trial, metric, valid_name=valid_name, report_interval=interval - ) - if cv: - lgb.cv( - {"objective": "binary", "metric": ["auc", "binary_error"]}, - dtrain, - num_boost_round, - nfold=2, - callbacks=[verbose_callback, pruning_callback], - ) - else: - lgb.train( - {"objective": "binary", "metric": ["auc", "binary_error"]}, - dtrain, - num_boost_round, - valid_sets=[dtest], - valid_names=valid_names, - callbacks=[verbose_callback, pruning_callback], - ) - return 1.0 diff --git a/tutorial/10_key_features/003_efficient_optimization_algorithms.py b/tutorial/10_key_features/003_efficient_optimization_algorithms.py index 7e2a1b9bc5..598b4beaa3 100644 --- a/tutorial/10_key_features/003_efficient_optimization_algorithms.py +++ b/tutorial/10_key_features/003_efficient_optimization_algorithms.py @@ -178,10 +178,15 @@ def objective(trial): # # For the complete list of Optuna's integration modules, see :mod:`~optuna.integration`. # -# For example, :class:`~optuna.integration.LightGBMPruningCallback` introduces pruning without directly changing the logic of training iteration. +# For example, `optuna_integration.LightGBMPruningCallback `_ introduces pruning without directly changing the logic of training iteration. # (See also `example `_ for the entire script.) # # .. code-block:: python # -# pruning_callback = optuna.integration.LightGBMPruningCallback(trial, 'validation-error') -# gbm = lgb.train(param, dtrain, valid_sets=[dvalid], callbacks=[pruning_callback]) +# try: +# import optuna_integration +# +# pruning_callback = optuna_integration.LightGBMPruningCallback(trial, 'validation-error') +# gbm = lgb.train(param, dtrain, valid_sets=[dvalid], callbacks=[pruning_callback]) +# except ImportError: +# ... diff --git a/tutorial/10_key_features/005_visualization.py b/tutorial/10_key_features/005_visualization.py index beeb7d781b..c91bdd7965 100644 --- a/tutorial/10_key_features/005_visualization.py +++ b/tutorial/10_key_features/005_visualization.py @@ -53,6 +53,14 @@ from optuna.visualization import plot_slice from optuna.visualization import plot_timeline +try: + import optuna_integration + + is_integration_available = True +except ImportError: + is_integration_available = False + + SEED = 42 np.random.seed(SEED) @@ -76,9 +84,11 @@ def objective(trial): "min_child_samples": trial.suggest_int("min_child_samples", 5, 100), } - # Add a callback for pruning. - pruning_callback = optuna.integration.LightGBMPruningCallback(trial, "auc") - gbm = lgb.train(param, dtrain, valid_sets=[dvalid], callbacks=[pruning_callback]) + if is_integration_available: + pruning_callback = optuna_integration.LightGBMPruningCallback(trial, "auc") + gbm = lgb.train(param, dtrain, valid_sets=[dvalid], callbacks=[pruning_callback]) + else: + gbm = lgb.train(param, dtrain, valid_sets=[dvalid]) preds = gbm.predict(valid_x) pred_labels = np.rint(preds) @@ -101,7 +111,8 @@ def objective(trial): plot_optimization_history(study) ################################################################################################### -# Visualize the learning curves of the trials. See :func:`~optuna.visualization.plot_intermediate_values` for the details. +# Visualize the learning curves of the trials (note: this example requires `optuna_integration`). +# See :func:`~optuna.visualization.plot_intermediate_values` for the details and another example of visualization. plot_intermediate_values(study) ################################################################################################### @@ -157,6 +168,7 @@ def objective(trial): # :class:`plotly.graph_objects.Figure` or :class:`matplotlib.axes.Axes` depending on the module. # This allows users to modify the generated figure for their demand by using API of the visualization library. # The following example replaces figure titles drawn by Plotly-based :func:`~optuna.visualization.plot_intermediate_values` manually. +# (Note: this example requires `optuna_integration`.) fig = plot_intermediate_values(study) fig.update_layout( diff --git a/tutorial/20_recipes/008_specify_params.py b/tutorial/20_recipes/008_specify_params.py index 02a7e4be30..244ed85751 100644 --- a/tutorial/20_recipes/008_specify_params.py +++ b/tutorial/20_recipes/008_specify_params.py @@ -37,6 +37,13 @@ import optuna +try: + import optuna_integration + + is_integration_available = True +except ImportError: + is_integration_available = False + ################################################################################################### # Define the objective function. @@ -56,9 +63,11 @@ def objective(trial): "min_child_samples": trial.suggest_int("min_child_samples", 5, 100), } - # Add a callback for pruning. - pruning_callback = optuna.integration.LightGBMPruningCallback(trial, "auc") - gbm = lgb.train(param, dtrain, valid_sets=[dvalid], callbacks=[pruning_callback]) + if is_integration_available: + pruning_callback = optuna_integration.LightGBMPruningCallback(trial, "auc") + gbm = lgb.train(param, dtrain, valid_sets=[dvalid], callbacks=[pruning_callback]) + else: + gbm = lgb.train(param, dtrain, valid_sets=[dvalid]) preds = gbm.predict(valid_x) pred_labels = np.rint(preds) From a559902b27598277d57f9035e5cf39780c44560c Mon Sep 17 00:00:00 2001 From: Yoshihiko Ozaki <30489874+y0z@users.noreply.github.com> Date: Wed, 14 Feb 2024 16:14:23 +0900 Subject: [PATCH 02/21] Update tutorial/10_key_features/005_visualization.py Co-authored-by: Shuhei Watanabe <47781922+nabenabe0928@users.noreply.github.com> --- tutorial/10_key_features/005_visualization.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tutorial/10_key_features/005_visualization.py b/tutorial/10_key_features/005_visualization.py index c91bdd7965..30549e99a3 100644 --- a/tutorial/10_key_features/005_visualization.py +++ b/tutorial/10_key_features/005_visualization.py @@ -55,10 +55,8 @@ try: import optuna_integration - - is_integration_available = True -except ImportError: - is_integration_available = False +except ModuleNotFoundError: + raise ModuleNotFoundError("Please run `pip install optuna-integration lightgbm` first.") SEED = 42 From 21dfd98bae727d8acf6dd1d6fdd861f576be7aaf Mon Sep 17 00:00:00 2001 From: Yoshihiko Ozaki <30489874+y0z@users.noreply.github.com> Date: Wed, 14 Feb 2024 16:16:27 +0900 Subject: [PATCH 03/21] Update tutorial/10_key_features/005_visualization.py Co-authored-by: Shuhei Watanabe <47781922+nabenabe0928@users.noreply.github.com> --- tutorial/10_key_features/005_visualization.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tutorial/10_key_features/005_visualization.py b/tutorial/10_key_features/005_visualization.py index 30549e99a3..74cfb4bc6a 100644 --- a/tutorial/10_key_features/005_visualization.py +++ b/tutorial/10_key_features/005_visualization.py @@ -82,11 +82,11 @@ def objective(trial): "min_child_samples": trial.suggest_int("min_child_samples", 5, 100), } - if is_integration_available: + try: pruning_callback = optuna_integration.LightGBMPruningCallback(trial, "auc") gbm = lgb.train(param, dtrain, valid_sets=[dvalid], callbacks=[pruning_callback]) - else: - gbm = lgb.train(param, dtrain, valid_sets=[dvalid]) + except ImportError: + raise ImportError("Please run `pip install lightgbm` to use LightGBMPruningCallback.") preds = gbm.predict(valid_x) pred_labels = np.rint(preds) From b3932fb7682b40a68ef3484518a52683c04661d0 Mon Sep 17 00:00:00 2001 From: y0z Date: Wed, 14 Feb 2024 16:46:59 +0900 Subject: [PATCH 04/21] Revert "Update tutorial/10_key_features/005_visualization.py" This reverts commit a559902b27598277d57f9035e5cf39780c44560c. --- tutorial/10_key_features/005_visualization.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tutorial/10_key_features/005_visualization.py b/tutorial/10_key_features/005_visualization.py index 74cfb4bc6a..01a94fcab7 100644 --- a/tutorial/10_key_features/005_visualization.py +++ b/tutorial/10_key_features/005_visualization.py @@ -55,8 +55,10 @@ try: import optuna_integration -except ModuleNotFoundError: - raise ModuleNotFoundError("Please run `pip install optuna-integration lightgbm` first.") + + is_integration_available = True +except ImportError: + is_integration_available = False SEED = 42 From 02961cf0488d52925d5541de1b8eafc8b2f74995 Mon Sep 17 00:00:00 2001 From: y0z Date: Wed, 14 Feb 2024 16:47:11 +0900 Subject: [PATCH 05/21] Reapply "Update tutorial/10_key_features/005_visualization.py" This reverts commit b3932fb7682b40a68ef3484518a52683c04661d0. --- tutorial/10_key_features/005_visualization.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tutorial/10_key_features/005_visualization.py b/tutorial/10_key_features/005_visualization.py index 01a94fcab7..74cfb4bc6a 100644 --- a/tutorial/10_key_features/005_visualization.py +++ b/tutorial/10_key_features/005_visualization.py @@ -55,10 +55,8 @@ try: import optuna_integration - - is_integration_available = True -except ImportError: - is_integration_available = False +except ModuleNotFoundError: + raise ModuleNotFoundError("Please run `pip install optuna-integration lightgbm` first.") SEED = 42 From 4bf9ea368ee32eedf9ec7b9373695ba95c36805b Mon Sep 17 00:00:00 2001 From: y0z Date: Wed, 14 Feb 2024 16:57:24 +0900 Subject: [PATCH 06/21] Revert "Reapply "Update tutorial/10_key_features/005_visualization.py"" This reverts commit 02961cf0488d52925d5541de1b8eafc8b2f74995. --- tutorial/10_key_features/005_visualization.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tutorial/10_key_features/005_visualization.py b/tutorial/10_key_features/005_visualization.py index 74cfb4bc6a..01a94fcab7 100644 --- a/tutorial/10_key_features/005_visualization.py +++ b/tutorial/10_key_features/005_visualization.py @@ -55,8 +55,10 @@ try: import optuna_integration -except ModuleNotFoundError: - raise ModuleNotFoundError("Please run `pip install optuna-integration lightgbm` first.") + + is_integration_available = True +except ImportError: + is_integration_available = False SEED = 42 From cd00cc756a8f0fb43841b8bb64c575a9686a1015 Mon Sep 17 00:00:00 2001 From: y0z Date: Wed, 14 Feb 2024 16:57:40 +0900 Subject: [PATCH 07/21] Revert "Update tutorial/10_key_features/005_visualization.py" This reverts commit 21dfd98bae727d8acf6dd1d6fdd861f576be7aaf. --- tutorial/10_key_features/005_visualization.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tutorial/10_key_features/005_visualization.py b/tutorial/10_key_features/005_visualization.py index 01a94fcab7..c91bdd7965 100644 --- a/tutorial/10_key_features/005_visualization.py +++ b/tutorial/10_key_features/005_visualization.py @@ -84,11 +84,11 @@ def objective(trial): "min_child_samples": trial.suggest_int("min_child_samples", 5, 100), } - try: + if is_integration_available: pruning_callback = optuna_integration.LightGBMPruningCallback(trial, "auc") gbm = lgb.train(param, dtrain, valid_sets=[dvalid], callbacks=[pruning_callback]) - except ImportError: - raise ImportError("Please run `pip install lightgbm` to use LightGBMPruningCallback.") + else: + gbm = lgb.train(param, dtrain, valid_sets=[dvalid]) preds = gbm.predict(valid_x) pred_labels = np.rint(preds) From 596cb9b4d2aebb4c551ddf8bedc2fcd0979d5aa4 Mon Sep 17 00:00:00 2001 From: y0z Date: Wed, 14 Feb 2024 17:53:40 +0900 Subject: [PATCH 08/21] Fix __init__.py. --- optuna/integration/__init__.py | 70 +++++++++++++++++----------------- optuna/integration/lightgbm.py | 23 +++++++++-- 2 files changed, 56 insertions(+), 37 deletions(-) diff --git a/optuna/integration/__init__.py b/optuna/integration/__init__.py index 34a3005994..713ef819f4 100644 --- a/optuna/integration/__init__.py +++ b/optuna/integration/__init__.py @@ -35,6 +35,41 @@ } +__all__ = [ + "AllenNLPExecutor", + "AllenNLPPruningCallback", + "BoTorchSampler", + "CatalystPruningCallback", + "CatBoostPruningCallback", + "ChainerPruningExtension", + "ChainerMNStudy", + "CmaEsSampler", + "PyCmaSampler", + "DaskStorage", + "MLflowCallback", + "WeightsAndBiasesCallback", + "KerasPruningCallback", + "LightGBMPruningCallback", + "LightGBMTuner", + "LightGBMTunerCV", + "TorchDistributedTrial", + "PyTorchIgnitePruningHandler", + "PyTorchLightningPruningCallback", + "OptunaSearchCV", + "ShapleyImportanceEvaluator", + "SkorchPruningCallback", + "MXNetPruningCallback", + "SkoptSampler", + "TensorBoardCallback", + "TensorFlowPruningHook", + "TFKerasPruningCallback", + "XGBoostPruningCallback", + "FastAIV1PruningCallback", + "FastAIV2PruningCallback", + "FastAIPruningCallback", +] + + if TYPE_CHECKING: from optuna.integration.allennlp import AllenNLPExecutor from optuna.integration.allennlp import AllenNLPPruningCallback @@ -77,6 +112,7 @@ class _IntegrationModule(ModuleType): imports all submodules and their dependencies (e.g., chainer, keras, lightgbm) all at once. """ + __all__ = __all__ __file__ = globals()["__file__"] __path__ = [os.path.dirname(__file__)] @@ -113,37 +149,3 @@ def _get_module(self, module_name: str) -> ModuleType: ) sys.modules[__name__] = _IntegrationModule(__name__) - -__all__ = [ - "AllenNLPExecutor", - "AllenNLPPruningCallback", - "BoTorchSampler", - "CatalystPruningCallback", - "CatBoostPruningCallback", - "ChainerPruningExtension", - "ChainerMNStudy", - "CmaEsSampler", - "PyCmaSampler", - "DaskStorage", - "MLflowCallback", - "WeightsAndBiasesCallback", - "KerasPruningCallback", - "LightGBMPruningCallback", - "LightGBMTuner", - "LightGBMTunerCV", - "TorchDistributedTrial", - "PyTorchIgnitePruningHandler", - "PyTorchLightningPruningCallback", - "OptunaSearchCV", - "ShapleyImportanceEvaluator", - "SkorchPruningCallback", - "MXNetPruningCallback", - "SkoptSampler", - "TensorBoardCallback", - "TensorFlowPruningHook", - "TFKerasPruningCallback", - "XGBoostPruningCallback", - "FastAIV1PruningCallback", - "FastAIV2PruningCallback", - "FastAIPruningCallback", -] diff --git a/optuna/integration/lightgbm.py b/optuna/integration/lightgbm.py index 24c590e89c..3d9beedb92 100644 --- a/optuna/integration/lightgbm.py +++ b/optuna/integration/lightgbm.py @@ -1,6 +1,9 @@ -from optuna_integration.lightgbm import LightGBMPruningCallback -from optuna_integration.lightgbm import LightGBMTuner -from optuna_integration.lightgbm import LightGBMTunerCV +import os +import sys +from types import ModuleType +from typing import Any + +import optuna_integration.lightgbm as lgb __all__ = [ @@ -8,3 +11,17 @@ "LightGBMTuner", "LightGBMTunerCV", ] + + +class _LightGBMModule(ModuleType): + """Module class that implements `optuna.integration.lightgbm` package.""" + + __all__ = __all__ + __file__ = globals()["__file__"] + __path__ = [os.path.dirname(__file__)] + + def __getattr__(self, name: str) -> Any: + return lgb.__dict__[name] + + +sys.modules[__name__] = _LightGBMModule(__name__) From 66f7ba2bbd7e672e9d2d2271ef20664b6dc71a4c Mon Sep 17 00:00:00 2001 From: y0z Date: Wed, 14 Feb 2024 18:00:23 +0900 Subject: [PATCH 09/21] Fix lightgbm.py --- optuna/integration/lightgbm.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/optuna/integration/lightgbm.py b/optuna/integration/lightgbm.py index 3d9beedb92..e9bf70b710 100644 --- a/optuna/integration/lightgbm.py +++ b/optuna/integration/lightgbm.py @@ -2,10 +2,17 @@ import sys from types import ModuleType from typing import Any +from typing import TYPE_CHECKING import optuna_integration.lightgbm as lgb +if TYPE_CHECKING: + from optuna.integration.lightgbm_tuner import LightGBMPruningCallback + from optuna.integration.lightgbm_tuner import LightGBMTuner + from optuna.integration.lightgbm_tuner import LightGBMTunerCV + + __all__ = [ "LightGBMPruningCallback", "LightGBMTuner", From 5e602557891815e7012cbd8951c6e469aef626e4 Mon Sep 17 00:00:00 2001 From: y0z Date: Wed, 14 Feb 2024 18:24:40 +0900 Subject: [PATCH 10/21] Add optuna-integration to docs dependency. --- .github/workflows/sphinx-build.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/sphinx-build.yml b/.github/workflows/sphinx-build.yml index dab00cede7..96b06be292 100644 --- a/.github/workflows/sphinx-build.yml +++ b/.github/workflows/sphinx-build.yml @@ -40,6 +40,9 @@ jobs: python -m pip install -U pip pip install --progress-bar off -U .[document] --extra-index-url https://download.pytorch.org/whl/cpu + # Install optuna integration from optuna-integration master + pip install git+/~https://github.com/optuna/optuna-integration@master + - name: Output installed packages run: | pip freeze --all From 9a5b77a8f89b43c8b220b4e56ab60f69d7e40087 Mon Sep 17 00:00:00 2001 From: y0z Date: Wed, 14 Feb 2024 18:26:46 +0900 Subject: [PATCH 11/21] Fix branch name. --- .github/workflows/sphinx-build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/sphinx-build.yml b/.github/workflows/sphinx-build.yml index 96b06be292..d1f34e4ff6 100644 --- a/.github/workflows/sphinx-build.yml +++ b/.github/workflows/sphinx-build.yml @@ -40,8 +40,8 @@ jobs: python -m pip install -U pip pip install --progress-bar off -U .[document] --extra-index-url https://download.pytorch.org/whl/cpu - # Install optuna integration from optuna-integration master - pip install git+/~https://github.com/optuna/optuna-integration@master + # Install optuna integration from optuna-integration main + pip install git+/~https://github.com/optuna/optuna-integration@main - name: Output installed packages run: | From a4f99fba02d74fc0a393c47b40239de186d16917 Mon Sep 17 00:00:00 2001 From: y0z Date: Wed, 14 Feb 2024 18:44:43 +0900 Subject: [PATCH 12/21] Update sphinx-build.yml. --- .github/workflows/sphinx-build.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/sphinx-build.yml b/.github/workflows/sphinx-build.yml index d1f34e4ff6..79b08d76c3 100644 --- a/.github/workflows/sphinx-build.yml +++ b/.github/workflows/sphinx-build.yml @@ -38,10 +38,8 @@ jobs: - name: Install Dependencies run: | python -m pip install -U pip - pip install --progress-bar off -U .[document] --extra-index-url https://download.pytorch.org/whl/cpu - - # Install optuna integration from optuna-integration main pip install git+/~https://github.com/optuna/optuna-integration@main + pip install --progress-bar off -U .[document] --extra-index-url https://download.pytorch.org/whl/cpu - name: Output installed packages run: | @@ -94,6 +92,7 @@ jobs: - name: Install Dependencies run: | python -m pip install -U pip + pip install git+/~https://github.com/optuna/optuna-integration@main pip install --progress-bar off -U .[document] --extra-index-url https://download.pytorch.org/whl/cpu - name: Output installed packages From 6301d07bf8c58b12e68274525c51ef3e0b3ee27d Mon Sep 17 00:00:00 2001 From: y0z Date: Wed, 14 Feb 2024 20:40:09 +0900 Subject: [PATCH 13/21] Update .readthedocs.yml --- .readthedocs.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.readthedocs.yml b/.readthedocs.yml index 6b83497198..48c37927ad 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -9,6 +9,8 @@ build: os: ubuntu-22.04 tools: python: "3.11" + commands: + pip install git+/~https://github.com/optuna/optuna-integration@main # Build documentation in the docs/ directory with Sphinx sphinx: @@ -19,7 +21,7 @@ formats: all # Optionally set the version of Python and requirements required to build your docs python: - # `sphinx` requires either Python >= 3.8 or `typed-ast` to reflect type comments + # `sphinx` requires either Python >= 3.8 or `typed-ast` to reflect type comments # in the documentation. See: /~https://github.com/sphinx-doc/sphinx/pull/6984 install: - method: pip From 0f617ba0d98468f8078dc2cd3982c1cd4bae9e9c Mon Sep 17 00:00:00 2001 From: y0z Date: Wed, 14 Feb 2024 20:41:01 +0900 Subject: [PATCH 14/21] Update .readthedocs.yml --- .readthedocs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.readthedocs.yml b/.readthedocs.yml index 48c37927ad..fb8a8430b6 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -10,7 +10,7 @@ build: tools: python: "3.11" commands: - pip install git+/~https://github.com/optuna/optuna-integration@main + - pip install git+/~https://github.com/optuna/optuna-integration@main # Build documentation in the docs/ directory with Sphinx sphinx: From 9e1967249048d28bf1319423362f36567419b40a Mon Sep 17 00:00:00 2001 From: y0z Date: Wed, 14 Feb 2024 20:43:53 +0900 Subject: [PATCH 15/21] Update .readthedocs.yml --- .readthedocs.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.readthedocs.yml b/.readthedocs.yml index fb8a8430b6..33b2e582b4 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -9,8 +9,9 @@ build: os: ubuntu-22.04 tools: python: "3.11" - commands: - - pip install git+/~https://github.com/optuna/optuna-integration@main + jobs: + post_create_environment: + - pip install git+/~https://github.com/optuna/optuna-integration@main # Build documentation in the docs/ directory with Sphinx sphinx: From 0aaa07f3460e58191e240fa8d7f6e43924841f46 Mon Sep 17 00:00:00 2001 From: y0z Date: Wed, 14 Feb 2024 20:47:10 +0900 Subject: [PATCH 16/21] Update .readthedocs.yml --- .readthedocs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.readthedocs.yml b/.readthedocs.yml index 33b2e582b4..ce80dbe166 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -10,7 +10,7 @@ build: tools: python: "3.11" jobs: - post_create_environment: + post_install: - pip install git+/~https://github.com/optuna/optuna-integration@main # Build documentation in the docs/ directory with Sphinx From 02c323673bd62328093f1e7957121e607b07810e Mon Sep 17 00:00:00 2001 From: y0z Date: Thu, 15 Feb 2024 10:54:20 +0900 Subject: [PATCH 17/21] Update tutorials. --- .../003_efficient_optimization_algorithms.py | 9 +++------ tutorial/10_key_features/005_visualization.py | 19 +++++-------------- tutorial/20_recipes/008_specify_params.py | 12 ++---------- 3 files changed, 10 insertions(+), 30 deletions(-) diff --git a/tutorial/10_key_features/003_efficient_optimization_algorithms.py b/tutorial/10_key_features/003_efficient_optimization_algorithms.py index 598b4beaa3..c2243cef77 100644 --- a/tutorial/10_key_features/003_efficient_optimization_algorithms.py +++ b/tutorial/10_key_features/003_efficient_optimization_algorithms.py @@ -183,10 +183,7 @@ def objective(trial): # # .. code-block:: python # -# try: -# import optuna_integration +# import optuna_integration # -# pruning_callback = optuna_integration.LightGBMPruningCallback(trial, 'validation-error') -# gbm = lgb.train(param, dtrain, valid_sets=[dvalid], callbacks=[pruning_callback]) -# except ImportError: -# ... +# pruning_callback = optuna_integration.LightGBMPruningCallback(trial, 'validation-error') +# gbm = lgb.train(param, dtrain, valid_sets=[dvalid], callbacks=[pruning_callback]) diff --git a/tutorial/10_key_features/005_visualization.py b/tutorial/10_key_features/005_visualization.py index c91bdd7965..b6a8fb3210 100644 --- a/tutorial/10_key_features/005_visualization.py +++ b/tutorial/10_key_features/005_visualization.py @@ -53,12 +53,7 @@ from optuna.visualization import plot_slice from optuna.visualization import plot_timeline -try: - import optuna_integration - - is_integration_available = True -except ImportError: - is_integration_available = False +import optuna_integration SEED = 42 @@ -84,11 +79,8 @@ def objective(trial): "min_child_samples": trial.suggest_int("min_child_samples", 5, 100), } - if is_integration_available: - pruning_callback = optuna_integration.LightGBMPruningCallback(trial, "auc") - gbm = lgb.train(param, dtrain, valid_sets=[dvalid], callbacks=[pruning_callback]) - else: - gbm = lgb.train(param, dtrain, valid_sets=[dvalid]) + pruning_callback = optuna_integration.LightGBMPruningCallback(trial, "auc") + gbm = lgb.train(param, dtrain, valid_sets=[dvalid], callbacks=[pruning_callback]) preds = gbm.predict(valid_x) pred_labels = np.rint(preds) @@ -111,8 +103,8 @@ def objective(trial): plot_optimization_history(study) ################################################################################################### -# Visualize the learning curves of the trials (note: this example requires `optuna_integration`). -# See :func:`~optuna.visualization.plot_intermediate_values` for the details and another example of visualization. +# Visualize the learning curves of the trials. +# See :func:`~optuna.visualization.plot_intermediate_values` for the details. plot_intermediate_values(study) ################################################################################################### @@ -168,7 +160,6 @@ def objective(trial): # :class:`plotly.graph_objects.Figure` or :class:`matplotlib.axes.Axes` depending on the module. # This allows users to modify the generated figure for their demand by using API of the visualization library. # The following example replaces figure titles drawn by Plotly-based :func:`~optuna.visualization.plot_intermediate_values` manually. -# (Note: this example requires `optuna_integration`.) fig = plot_intermediate_values(study) fig.update_layout( diff --git a/tutorial/20_recipes/008_specify_params.py b/tutorial/20_recipes/008_specify_params.py index 244ed85751..c5c832bf7a 100644 --- a/tutorial/20_recipes/008_specify_params.py +++ b/tutorial/20_recipes/008_specify_params.py @@ -37,13 +37,8 @@ import optuna -try: import optuna_integration - is_integration_available = True -except ImportError: - is_integration_available = False - ################################################################################################### # Define the objective function. @@ -63,11 +58,8 @@ def objective(trial): "min_child_samples": trial.suggest_int("min_child_samples", 5, 100), } - if is_integration_available: - pruning_callback = optuna_integration.LightGBMPruningCallback(trial, "auc") - gbm = lgb.train(param, dtrain, valid_sets=[dvalid], callbacks=[pruning_callback]) - else: - gbm = lgb.train(param, dtrain, valid_sets=[dvalid]) + pruning_callback = optuna_integration.LightGBMPruningCallback(trial, "auc") + gbm = lgb.train(param, dtrain, valid_sets=[dvalid], callbacks=[pruning_callback]) preds = gbm.predict(valid_x) pred_labels = np.rint(preds) From c17e5fe0932812673aaeea4fe547f3e56ad04211 Mon Sep 17 00:00:00 2001 From: y0z Date: Thu, 15 Feb 2024 10:56:07 +0900 Subject: [PATCH 18/21] Fix tutorial. --- tutorial/20_recipes/008_specify_params.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tutorial/20_recipes/008_specify_params.py b/tutorial/20_recipes/008_specify_params.py index c5c832bf7a..cd27b07546 100644 --- a/tutorial/20_recipes/008_specify_params.py +++ b/tutorial/20_recipes/008_specify_params.py @@ -37,7 +37,7 @@ import optuna - import optuna_integration +import optuna_integration ################################################################################################### From a415c1af16b1d1ebda7302fa9a4d9433758afa81 Mon Sep 17 00:00:00 2001 From: y0z Date: Thu, 15 Feb 2024 11:04:04 +0900 Subject: [PATCH 19/21] Update tutorials. --- tutorial/10_key_features/005_visualization.py | 4 ++-- tutorial/20_recipes/008_specify_params.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tutorial/10_key_features/005_visualization.py b/tutorial/10_key_features/005_visualization.py index b6a8fb3210..926b4a9e75 100644 --- a/tutorial/10_key_features/005_visualization.py +++ b/tutorial/10_key_features/005_visualization.py @@ -79,6 +79,7 @@ def objective(trial): "min_child_samples": trial.suggest_int("min_child_samples", 5, 100), } + # Add a callback for pruning. pruning_callback = optuna_integration.LightGBMPruningCallback(trial, "auc") gbm = lgb.train(param, dtrain, valid_sets=[dvalid], callbacks=[pruning_callback]) @@ -103,8 +104,7 @@ def objective(trial): plot_optimization_history(study) ################################################################################################### -# Visualize the learning curves of the trials. -# See :func:`~optuna.visualization.plot_intermediate_values` for the details. +# Visualize the learning curves of the trials. See :func:`~optuna.visualization.plot_intermediate_values` for the details. plot_intermediate_values(study) ################################################################################################### diff --git a/tutorial/20_recipes/008_specify_params.py b/tutorial/20_recipes/008_specify_params.py index cd27b07546..289dcc7d0a 100644 --- a/tutorial/20_recipes/008_specify_params.py +++ b/tutorial/20_recipes/008_specify_params.py @@ -58,6 +58,7 @@ def objective(trial): "min_child_samples": trial.suggest_int("min_child_samples", 5, 100), } + # Add a callback for pruning. pruning_callback = optuna_integration.LightGBMPruningCallback(trial, "auc") gbm = lgb.train(param, dtrain, valid_sets=[dvalid], callbacks=[pruning_callback]) From 4ce4c6ccd300603a150ca91fb5af09efdf34b2c7 Mon Sep 17 00:00:00 2001 From: y0z Date: Thu, 15 Feb 2024 15:04:20 +0900 Subject: [PATCH 20/21] Update docs. --- .github/workflows/sphinx-build.yml | 2 -- .readthedocs.yml | 3 --- .../003_efficient_optimization_algorithms.py | 6 ++--- tutorial/10_key_features/005_visualization.py | 24 +------------------ tutorial/20_recipes/008_specify_params.py | 5 +--- 5 files changed, 5 insertions(+), 35 deletions(-) diff --git a/.github/workflows/sphinx-build.yml b/.github/workflows/sphinx-build.yml index 79b08d76c3..dab00cede7 100644 --- a/.github/workflows/sphinx-build.yml +++ b/.github/workflows/sphinx-build.yml @@ -38,7 +38,6 @@ jobs: - name: Install Dependencies run: | python -m pip install -U pip - pip install git+/~https://github.com/optuna/optuna-integration@main pip install --progress-bar off -U .[document] --extra-index-url https://download.pytorch.org/whl/cpu - name: Output installed packages @@ -92,7 +91,6 @@ jobs: - name: Install Dependencies run: | python -m pip install -U pip - pip install git+/~https://github.com/optuna/optuna-integration@main pip install --progress-bar off -U .[document] --extra-index-url https://download.pytorch.org/whl/cpu - name: Output installed packages diff --git a/.readthedocs.yml b/.readthedocs.yml index ce80dbe166..cc21c52ee8 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -9,9 +9,6 @@ build: os: ubuntu-22.04 tools: python: "3.11" - jobs: - post_install: - - pip install git+/~https://github.com/optuna/optuna-integration@main # Build documentation in the docs/ directory with Sphinx sphinx: diff --git a/tutorial/10_key_features/003_efficient_optimization_algorithms.py b/tutorial/10_key_features/003_efficient_optimization_algorithms.py index c2243cef77..a095c55d54 100644 --- a/tutorial/10_key_features/003_efficient_optimization_algorithms.py +++ b/tutorial/10_key_features/003_efficient_optimization_algorithms.py @@ -181,9 +181,9 @@ def objective(trial): # For example, `optuna_integration.LightGBMPruningCallback `_ introduces pruning without directly changing the logic of training iteration. # (See also `example `_ for the entire script.) # -# .. code-block:: python +# .. code-block:: text # -# import optuna_integration +# import optuna.integration # -# pruning_callback = optuna_integration.LightGBMPruningCallback(trial, 'validation-error') +# pruning_callback = optuna.integration.LightGBMPruningCallback(trial, 'validation-error') # gbm = lgb.train(param, dtrain, valid_sets=[dvalid], callbacks=[pruning_callback]) diff --git a/tutorial/10_key_features/005_visualization.py b/tutorial/10_key_features/005_visualization.py index 926b4a9e75..8e1e444e81 100644 --- a/tutorial/10_key_features/005_visualization.py +++ b/tutorial/10_key_features/005_visualization.py @@ -53,8 +53,6 @@ from optuna.visualization import plot_slice from optuna.visualization import plot_timeline -import optuna_integration - SEED = 42 @@ -80,8 +78,7 @@ def objective(trial): } # Add a callback for pruning. - pruning_callback = optuna_integration.LightGBMPruningCallback(trial, "auc") - gbm = lgb.train(param, dtrain, valid_sets=[dvalid], callbacks=[pruning_callback]) + gbm = lgb.train(param, dtrain, valid_sets=[dvalid]) preds = gbm.predict(valid_x) pred_labels = np.rint(preds) @@ -103,10 +100,6 @@ def objective(trial): # Visualize the optimization history. See :func:`~optuna.visualization.plot_optimization_history` for the details. plot_optimization_history(study) -################################################################################################### -# Visualize the learning curves of the trials. See :func:`~optuna.visualization.plot_intermediate_values` for the details. -plot_intermediate_values(study) - ################################################################################################### # Visualize high-dimensional parameter relationships. See :func:`~optuna.visualization.plot_parallel_coordinate` for the details. plot_parallel_coordinate(study) @@ -152,18 +145,3 @@ def objective(trial): ################################################################################################### # Visualize the optimization timeline of performed trials. See :func:`~optuna.visualization.plot_timeline` for the details. plot_timeline(study) - -################################################################################################### -# Customize generated figures -# --------------------------- -# In :mod:`optuna.visualization` and :mod:`optuna.visualization.matplotlib`, a function returns an editable figure object: -# :class:`plotly.graph_objects.Figure` or :class:`matplotlib.axes.Axes` depending on the module. -# This allows users to modify the generated figure for their demand by using API of the visualization library. -# The following example replaces figure titles drawn by Plotly-based :func:`~optuna.visualization.plot_intermediate_values` manually. -fig = plot_intermediate_values(study) - -fig.update_layout( - title="Hyperparameter optimization for GBDT-based binary classification", - xaxis_title="Iteration", - yaxis_title="Validation AUC", -) diff --git a/tutorial/20_recipes/008_specify_params.py b/tutorial/20_recipes/008_specify_params.py index 289dcc7d0a..6cdea0bae4 100644 --- a/tutorial/20_recipes/008_specify_params.py +++ b/tutorial/20_recipes/008_specify_params.py @@ -37,8 +37,6 @@ import optuna -import optuna_integration - ################################################################################################### # Define the objective function. @@ -59,8 +57,7 @@ def objective(trial): } # Add a callback for pruning. - pruning_callback = optuna_integration.LightGBMPruningCallback(trial, "auc") - gbm = lgb.train(param, dtrain, valid_sets=[dvalid], callbacks=[pruning_callback]) + gbm = lgb.train(param, dtrain, valid_sets=[dvalid]) preds = gbm.predict(valid_x) pred_labels = np.rint(preds) From 591a0ede47f305b0fa81c8667843b2f92a291db5 Mon Sep 17 00:00:00 2001 From: Yoshihiko Ozaki <30489874+y0z@users.noreply.github.com> Date: Thu, 15 Feb 2024 15:04:42 +0900 Subject: [PATCH 21/21] Update tutorial/10_key_features/003_efficient_optimization_algorithms.py Co-authored-by: Hideaki Imamura <38826298+HideakiImamura@users.noreply.github.com> --- .../10_key_features/003_efficient_optimization_algorithms.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tutorial/10_key_features/003_efficient_optimization_algorithms.py b/tutorial/10_key_features/003_efficient_optimization_algorithms.py index a095c55d54..0ac12996f1 100644 --- a/tutorial/10_key_features/003_efficient_optimization_algorithms.py +++ b/tutorial/10_key_features/003_efficient_optimization_algorithms.py @@ -178,7 +178,7 @@ def objective(trial): # # For the complete list of Optuna's integration modules, see :mod:`~optuna.integration`. # -# For example, `optuna_integration.LightGBMPruningCallback `_ introduces pruning without directly changing the logic of training iteration. +# For example, `LightGBMPruningCallback `_ introduces pruning without directly changing the logic of training iteration. # (See also `example `_ for the entire script.) # # .. code-block:: text