From f1b1b2fc4083d2007b4bf7d1a298bd139afa4765 Mon Sep 17 00:00:00 2001 From: nabenabe0928 Date: Sat, 9 Mar 2024 19:15:25 +0100 Subject: [PATCH 01/17] Speedup the non dominated sort --- optuna/study/_multi_objective.py | 151 ++++++++++++++++++------------- 1 file changed, 88 insertions(+), 63 deletions(-) diff --git a/optuna/study/_multi_objective.py b/optuna/study/_multi_objective.py index bb23eb397b..b164c5db45 100644 --- a/optuna/study/_multi_objective.py +++ b/optuna/study/_multi_objective.py @@ -1,6 +1,5 @@ from __future__ import annotations -from collections import defaultdict from collections.abc import Sequence import numpy as np @@ -101,7 +100,7 @@ def _get_pareto_front_trials( def _fast_non_dominated_sort( - objective_values: np.ndarray, + loss_values: np.ndarray, *, penalty: np.ndarray | None = None, n_below: int | None = None, @@ -127,8 +126,8 @@ def _fast_non_dominated_sort( Plus, the algorithm terminates whenever the number of sorted trials reaches n_below. Args: - objective_values: - Objective values of each trials. + loss_values: + Objective values, which is better when it is lower, of each trials. penalty: Constraints values of each trials. Defaults to None. n_below: The minimum number of top trials required to be sorted. The algorithm will @@ -140,89 +139,115 @@ def _fast_non_dominated_sort( the trial was sorted. """ if penalty is None: - ranks, _ = _calculate_nondomination_rank(objective_values, n_below=n_below) - return ranks + return _calculate_nondomination_rank(loss_values, n_below=n_below) - if len(penalty) != len(objective_values): + if len(penalty) != len(loss_values): raise ValueError( - "The length of penalty and objective_values must be same, but got " - "len(penalty)={} and len(objective_values)={}.".format( - len(penalty), len(objective_values) - ) + "The length of penalty and loss_values must be same, but got " + f"len(penalty)={len(penalty)} and len(loss_values)={len(loss_values)}." ) - nondomination_rank = np.full(len(objective_values), -1) + + nondomination_rank = np.full(len(loss_values), -1, dtype=int) is_penalty_nan = np.isnan(penalty) - n_below = n_below or len(objective_values) + n_below = n_below or len(loss_values) # First, we calculate the domination rank for feasible trials. is_feasible = np.logical_and(~is_penalty_nan, penalty <= 0) - ranks, bottom_rank = _calculate_nondomination_rank( - objective_values[is_feasible], n_below=n_below + nondomination_rank[is_feasible] = _calculate_nondomination_rank( + loss_values[is_feasible], n_below=n_below ) - nondomination_rank[is_feasible] += 1 + ranks n_below -= np.count_nonzero(is_feasible) # Second, we calculate the domination rank for infeasible trials. + top_rank_in_infeasible = np.max(nondomination_rank[is_feasible], initial=-1) + 1 is_infeasible = np.logical_and(~is_penalty_nan, penalty > 0) - num_infeasible_trials = np.count_nonzero(is_infeasible) - if num_infeasible_trials > 0: - _, ranks = np.unique(penalty[is_infeasible], return_inverse=True) - ranks += 1 - nondomination_rank[is_infeasible] += 1 + bottom_rank + ranks - bottom_rank += np.max(ranks) - n_below -= num_infeasible_trials + n_infeasible = np.count_nonzero(is_infeasible) + if is_infeasible > 0: + _, ranks_in_infeas = np.unique(penalty[is_infeasible], return_inverse=True) + nondomination_rank[is_infeasible] = ranks_in_infeas + top_rank_in_infeasible + n_below -= n_infeasible # Third, we calculate the domination rank for trials with no penalty information. - ranks, _ = _calculate_nondomination_rank( - objective_values[is_penalty_nan], n_below=n_below, base_rank=bottom_rank + 1 + top_rank_in_penalty_nan = np.max(nondomination_rank[~is_penalty_nan], initial=-1) + 1 + ranks_in_penalty_nan = _calculate_nondomination_rank( + loss_values[is_penalty_nan], n_below=n_below ) - nondomination_rank[is_penalty_nan] += 1 + ranks + nondomination_rank[is_penalty_nan] = ranks_in_penalty_nan + top_rank_in_penalty_nan return nondomination_rank +def _is_pareto_front_nd(unique_lexsorted_loss_values: np.ndarray) -> np.ndarray: + loss_values = unique_lexsorted_loss_values.copy() + n_trials = loss_values.shape[0] + on_front = np.zeros(n_trials, dtype=bool) + nondominated_indices = np.arange(n_trials) + while len(loss_values): + nondominated_and_not_top = np.any(loss_values < loss_values[0], axis=1) + # NOTE: trials[j] cannot dominate trials[j] for i < j because of lexsort. + # Therefore, nondominated_indices[0] is always non-dominated. + on_front[nondominated_indices[0]] = True + loss_values = loss_values[nondominated_and_not_top] + nondominated_indices = nondominated_indices[nondominated_and_not_top] + + return on_front + + +def _is_pareto_front_2d(unique_lexsorted_loss_values: np.ndarray) -> np.ndarray: + n_trials = unique_lexsorted_loss_values.shape[0] + cummin_value1 = np.minimum.accumulate(unique_lexsorted_loss_values[:, 1]) + is_value1_min = cummin_value1 == unique_lexsorted_loss_values[:, 1] + is_value1_new_min = cummin_value1[1:] < cummin_value1[:-1] + on_front = np.ones(n_trials, dtype=bool) + on_front[1:] = is_value1_min[1:] & is_value1_new_min + return on_front + + +def _is_pareto_front(unique_lexsorted_loss_values: np.ndarray) -> np.ndarray: + (n_trials, n_objectives) = unique_lexsorted_loss_values.shape + if n_objectives == 1: + return unique_lexsorted_loss_values[:, 0] == unique_lexsorted_loss_values[0] + elif n_objectives == 2: + return _is_pareto_front_2d(unique_lexsorted_loss_values) + else: + return _is_pareto_front_nd(unique_lexsorted_loss_values) + + def _calculate_nondomination_rank( - objective_values: np.ndarray, - *, - n_below: int | None = None, - base_rank: int = 0, -) -> tuple[np.ndarray, int]: + loss_values: np.ndarray, *, n_below: int | None = None +) -> np.ndarray: if n_below is not None and n_below <= 0: - return np.full(len(objective_values), -1), base_rank + return np.zeros(len(loss_values), dtype=int) + # Normalize n_below. - n_below = n_below or len(objective_values) - n_below = min(n_below, len(objective_values)) - - # The ndarray `domination_mat` is a boolean 2d matrix where - # `domination_mat[i, j] == True` means that the j-th trial dominates the i-th trial in the - # given multi objective minimization problem. - domination_mat = np.all( - objective_values[:, np.newaxis, :] >= objective_values[np.newaxis, :, :], axis=2 - ) & np.any(objective_values[:, np.newaxis, :] > objective_values[np.newaxis, :, :], axis=2) - - domination_list = np.nonzero(domination_mat) - domination_map = defaultdict(list) - for dominated_idx, dominating_idx in zip(*domination_list): - domination_map[dominating_idx].append(dominated_idx) - - ranks = np.full(len(objective_values), -1) - dominated_count = np.sum(domination_mat, axis=1) - - rank = base_rank - 1 - ranked_idx_num = 0 - while ranked_idx_num < n_below: - # Find the non-dominated trials and assign the rank. - (non_dominated_idxs,) = np.nonzero(dominated_count == 0) - ranked_idx_num += len(non_dominated_idxs) - rank += 1 - ranks[non_dominated_idxs] = rank + n_below = n_below or len(loss_values) + n_below = min(n_below, len(loss_values)) - # Update the dominated count. - dominated_count[non_dominated_idxs] = -1 - for non_dominated_idx in non_dominated_idxs: - dominated_count[domination_map[non_dominated_idx]] -= 1 + (n_trials, n_objectives) = loss_values.shape + if n_objectives == 1: + _, ranks = np.unique(loss_values[:, 0], return_inverse=True) + return ranks + else: + # It ensures that trials[j] will not dominate trials[i] for i < j. + # np.unique does lexsort. + unique_lexsorted_loss_values, order_inv = np.unique( + loss_values, return_inverse=True, axis=0 + ) + + n_unique = unique_lexsorted_loss_values.shape[0] + ranks = np.zeros(n_unique, dtype=int) + rank = 0 + indices = np.arange(n_unique) + while n_unique - indices.size < n_below: + on_front = _is_pareto_front(unique_lexsorted_loss_values) + ranks[indices[on_front]] = rank + # Remove the recent Pareto solutions. + indices = indices[~on_front] + unique_lexsorted_loss_values = unique_lexsorted_loss_values[~on_front] + rank += 1 - return ranks, rank + ranks[indices] = rank # Rank worse than the top n_below is defined as the worst rank. + return ranks[order_inv] def _dominates( From e46f193115f0609869cb802a444329ebe1eb5c71 Mon Sep 17 00:00:00 2001 From: nabenabe0928 Date: Sat, 9 Mar 2024 19:21:33 +0100 Subject: [PATCH 02/17] Add an assertion of unexpected non update --- optuna/study/_multi_objective.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optuna/study/_multi_objective.py b/optuna/study/_multi_objective.py index b164c5db45..96d0f54946 100644 --- a/optuna/study/_multi_objective.py +++ b/optuna/study/_multi_objective.py @@ -173,7 +173,7 @@ def _fast_non_dominated_sort( loss_values[is_penalty_nan], n_below=n_below ) nondomination_rank[is_penalty_nan] = ranks_in_penalty_nan + top_rank_in_penalty_nan - + assert np.all(nondomination_rank != -1), "All the rank must be updated." return nondomination_rank From 68493a8720b672218fe5f0411ce810c371ad8c78 Mon Sep 17 00:00:00 2001 From: nabenabe0928 Date: Sun, 10 Mar 2024 09:36:21 +0100 Subject: [PATCH 03/17] Debug the case of duplicated loss values --- optuna/study/_multi_objective.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/optuna/study/_multi_objective.py b/optuna/study/_multi_objective.py index 96d0f54946..7dad60512e 100644 --- a/optuna/study/_multi_objective.py +++ b/optuna/study/_multi_objective.py @@ -135,8 +135,8 @@ def _fast_non_dominated_sort( Returns: An ndarray in the shape of (n_trials,), where each element is the non-dominated rank of - each trial. The rank is 0-indexed and rank -1 means that the algorithm terminated before - the trial was sorted. + each trial. The rank is 0-indexed. All the rank worse than the top-n_below has the same + value larger than or equal to n_below. """ if penalty is None: return _calculate_nondomination_rank(loss_values, n_below=n_below) @@ -219,10 +219,6 @@ def _calculate_nondomination_rank( if n_below is not None and n_below <= 0: return np.zeros(len(loss_values), dtype=int) - # Normalize n_below. - n_below = n_below or len(loss_values) - n_below = min(n_below, len(loss_values)) - (n_trials, n_objectives) = loss_values.shape if n_objectives == 1: _, ranks = np.unique(loss_values[:, 0], return_inverse=True) @@ -235,6 +231,8 @@ def _calculate_nondomination_rank( ) n_unique = unique_lexsorted_loss_values.shape[0] + # Clip n_below. + n_below = min(n_below or len(unique_lexsorted_loss_values), len(unique_lexsorted_loss_values)) ranks = np.zeros(n_unique, dtype=int) rank = 0 indices = np.arange(n_unique) From 96c6ce533902b69d00c59d71957bc530e53f027a Mon Sep 17 00:00:00 2001 From: nabenabe0928 Date: Sun, 10 Mar 2024 10:07:57 +0100 Subject: [PATCH 04/17] Debug in an if statement --- optuna/study/_multi_objective.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optuna/study/_multi_objective.py b/optuna/study/_multi_objective.py index 7dad60512e..ac776601cb 100644 --- a/optuna/study/_multi_objective.py +++ b/optuna/study/_multi_objective.py @@ -162,7 +162,7 @@ def _fast_non_dominated_sort( top_rank_in_infeasible = np.max(nondomination_rank[is_feasible], initial=-1) + 1 is_infeasible = np.logical_and(~is_penalty_nan, penalty > 0) n_infeasible = np.count_nonzero(is_infeasible) - if is_infeasible > 0: + if n_infeasible > 0: _, ranks_in_infeas = np.unique(penalty[is_infeasible], return_inverse=True) nondomination_rank[is_infeasible] = ranks_in_infeas + top_rank_in_infeasible n_below -= n_infeasible From 3e771a24f68b9985744659ad5b088c353168ca02 Mon Sep 17 00:00:00 2001 From: nabenabe0928 Date: Mon, 11 Mar 2024 16:40:27 +0100 Subject: [PATCH 05/17] Add a note about kung's algo --- optuna/study/_multi_objective.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/optuna/study/_multi_objective.py b/optuna/study/_multi_objective.py index ac776601cb..359f808c28 100644 --- a/optuna/study/_multi_objective.py +++ b/optuna/study/_multi_objective.py @@ -178,6 +178,8 @@ def _fast_non_dominated_sort( def _is_pareto_front_nd(unique_lexsorted_loss_values: np.ndarray) -> np.ndarray: + # NOTE(nabenabe0928): I tried the Kung's algorithm below, but it was not really quick. + # /~https://github.com/optuna/optuna/pull/5302#issuecomment-1988665532 loss_values = unique_lexsorted_loss_values.copy() n_trials = loss_values.shape[0] on_front = np.zeros(n_trials, dtype=bool) From b768800544642e02f9a2a14658d2ca3d8d8a058e Mon Sep 17 00:00:00 2001 From: nabenabe0928 Date: Mon, 11 Mar 2024 17:06:32 +0100 Subject: [PATCH 06/17] Replace the default pareto front for trial with the new one --- optuna/study/_multi_objective.py | 66 +++----------------------------- 1 file changed, 5 insertions(+), 61 deletions(-) diff --git a/optuna/study/_multi_objective.py b/optuna/study/_multi_objective.py index 359f808c28..abebd80b5d 100644 --- a/optuna/study/_multi_objective.py +++ b/optuna/study/_multi_objective.py @@ -22,75 +22,19 @@ def _get_feasible_trials(trials: Sequence[FrozenTrial]) -> list[FrozenTrial]: return feasible_trials -def _get_pareto_front_trials_2d( - trials: Sequence[FrozenTrial], - directions: Sequence[StudyDirection], - consider_constraint: bool = False, -) -> list[FrozenTrial]: - trials = [t for t in trials if t.state == TrialState.COMPLETE] - if consider_constraint: - trials = _get_feasible_trials(trials) - - n_trials = len(trials) - if n_trials == 0: - return [] - - trials.sort( - key=lambda trial: ( - _normalize_value(trial.values[0], directions[0]), - _normalize_value(trial.values[1], directions[1]), - ), - ) - - last_nondominated_trial = trials[0] - pareto_front = [last_nondominated_trial] - for i in range(1, n_trials): - trial = trials[i] - if _dominates(last_nondominated_trial, trial, directions): - continue - pareto_front.append(trial) - last_nondominated_trial = trial - - pareto_front.sort(key=lambda trial: trial.number) - return pareto_front - - -def _get_pareto_front_trials_nd( +def _get_pareto_front_trials_by_trials( trials: Sequence[FrozenTrial], directions: Sequence[StudyDirection], consider_constraint: bool = False, ) -> list[FrozenTrial]: - pareto_front = [] trials = [t for t in trials if t.state == TrialState.COMPLETE] if consider_constraint: trials = _get_feasible_trials(trials) - # TODO(vincent): Optimize (use the fast non dominated sort defined in the NSGA-II paper). - for trial in trials: - dominated = False - for other in trials: - if _dominates(other, trial, directions): - dominated = True - break - - if not dominated: - pareto_front.append(trial) - - return pareto_front - - -def _get_pareto_front_trials_by_trials( - trials: Sequence[FrozenTrial], - directions: Sequence[StudyDirection], - consider_constraint: bool = False, -) -> list[FrozenTrial]: - if len(directions) == 2: - return _get_pareto_front_trials_2d( - trials, directions, consider_constraint - ) # Log-linear in number of trials. - return _get_pareto_front_trials_nd( - trials, directions, consider_constraint - ) # Quadratic in number of trials. + loss_values = np.asarray([[_normalize_value(v, d) for v, d in (t.values, directions)] for t in trials]) + unique_lexsorted_loss_values, order_inv = np.unique(loss_values, axis=0) + on_front = _is_pareto_front(unique_lexsorted_loss_values)[order_inv] + return [t for t, is_pareto in zip(trials, on_front) if is_pareto] def _get_pareto_front_trials( From 30821c90e415513f32bcfea04413bc052db25c8a Mon Sep 17 00:00:00 2001 From: nabenabe0928 Date: Mon, 11 Mar 2024 17:15:14 +0100 Subject: [PATCH 07/17] Add an error for mismatched objective counts --- optuna/study/_multi_objective.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/optuna/study/_multi_objective.py b/optuna/study/_multi_objective.py index abebd80b5d..1d32ac971d 100644 --- a/optuna/study/_multi_objective.py +++ b/optuna/study/_multi_objective.py @@ -31,6 +31,11 @@ def _get_pareto_front_trials_by_trials( if consider_constraint: trials = _get_feasible_trials(trials) + if any(len(t.values) != len(directions) for t in trials): + raise ValueError( + "The number of the values and the number of the objectives must be identical." + ) + loss_values = np.asarray([[_normalize_value(v, d) for v, d in (t.values, directions)] for t in trials]) unique_lexsorted_loss_values, order_inv = np.unique(loss_values, axis=0) on_front = _is_pareto_front(unique_lexsorted_loss_values)[order_inv] @@ -226,7 +231,7 @@ def _dominates( return all(v0 <= v1 for v0, v1 in zip(normalized_values0, normalized_values1)) -def _normalize_value(value: None | float, direction: StudyDirection) -> float: +def _normalize_value(value: float | None, direction: StudyDirection) -> float: if value is None: value = float("inf") From 3dc3db733eb43536b12e9aa06206544c72d3cf13 Mon Sep 17 00:00:00 2001 From: nabenabe0928 Date: Tue, 12 Mar 2024 17:15:13 +0100 Subject: [PATCH 08/17] Adapt the tests for get_pareto_front --- optuna/study/_multi_objective.py | 11 +- tests/test_multi_objective.py | 185 +++++++++++-------------------- 2 files changed, 71 insertions(+), 125 deletions(-) diff --git a/optuna/study/_multi_objective.py b/optuna/study/_multi_objective.py index 1d32ac971d..7b97a02696 100644 --- a/optuna/study/_multi_objective.py +++ b/optuna/study/_multi_objective.py @@ -27,19 +27,24 @@ def _get_pareto_front_trials_by_trials( directions: Sequence[StudyDirection], consider_constraint: bool = False, ) -> list[FrozenTrial]: + # NOTE(nabenabe0928): Vectorization relies on all the trials being complete. trials = [t for t in trials if t.state == TrialState.COMPLETE] if consider_constraint: trials = _get_feasible_trials(trials) + if len(trials) == 0: + return [] if any(len(t.values) != len(directions) for t in trials): raise ValueError( "The number of the values and the number of the objectives must be identical." ) - loss_values = np.asarray([[_normalize_value(v, d) for v, d in (t.values, directions)] for t in trials]) - unique_lexsorted_loss_values, order_inv = np.unique(loss_values, axis=0) + loss_values = np.asarray( + [[_normalize_value(v, d) for v, d in zip(t.values, directions)] for t in trials] + ) + unique_lexsorted_loss_values, order_inv = np.unique(loss_values, return_inverse=True, axis=0) on_front = _is_pareto_front(unique_lexsorted_loss_values)[order_inv] - return [t for t, is_pareto in zip(trials, on_front) if is_pareto] + return [t for t, is_pareto in zip(trials, on_front) if is_pareto] def _get_pareto_front_trials( diff --git a/tests/test_multi_objective.py b/tests/test_multi_objective.py index 7ddb964890..0ec302cc64 100644 --- a/tests/test_multi_objective.py +++ b/tests/test_multi_objective.py @@ -1,9 +1,12 @@ from __future__ import annotations +import pytest +from typing import Sequence + from optuna import create_study from optuna import trial -from optuna.study._multi_objective import _get_pareto_front_trials_2d -from optuna.study._multi_objective import _get_pareto_front_trials_nd +from optuna.study import StudyDirection +from optuna.study._multi_objective import _get_pareto_front_trials_by_trials from optuna.trial import FrozenTrial @@ -12,126 +15,64 @@ def _trial_to_values(t: FrozenTrial) -> tuple[float, ...]: return tuple(t.values) -def test_get_pareto_front_trials_2d() -> None: - study = create_study(directions=["minimize", "maximize"]) - assert { - _trial_to_values(t) for t in _get_pareto_front_trials_2d(study.trials, study.directions) - } == set() - - study.optimize(lambda t: [2, 2], n_trials=1) - assert { - _trial_to_values(t) for t in _get_pareto_front_trials_2d(study.trials, study.directions) - } == {(2, 2)} - - study.optimize(lambda t: [1, 1], n_trials=1) - assert { - _trial_to_values(t) for t in _get_pareto_front_trials_2d(study.trials, study.directions) - } == {(1, 1), (2, 2)} - - study.optimize(lambda t: [3, 1], n_trials=1) - assert { - _trial_to_values(t) for t in _get_pareto_front_trials_2d(study.trials, study.directions) - } == {(1, 1), (2, 2)} - - study.optimize(lambda t: [3, 2], n_trials=1) - assert { - _trial_to_values(t) for t in _get_pareto_front_trials_2d(study.trials, study.directions) - } == {(1, 1), (2, 2)} - - study.optimize(lambda t: [1, 3], n_trials=1) - assert { - _trial_to_values(t) for t in _get_pareto_front_trials_2d(study.trials, study.directions) - } == {(1, 3)} - assert len(_get_pareto_front_trials_2d(study.trials, study.directions)) == 1 - - study.optimize(lambda t: [1, 3], n_trials=1) # The trial result is the same as the above one. - assert { - _trial_to_values(t) for t in _get_pareto_front_trials_2d(study.trials, study.directions) - } == {(1, 3)} - assert len(_get_pareto_front_trials_2d(study.trials, study.directions)) == 2 - - -def test_get_pareto_front_trials_2d_with_constraint() -> None: - study = create_study(directions=["minimize", "maximize"]) - trials = [ - trial.create_trial(values=[1, 1], system_attrs={"constraints": [0]}), - trial.create_trial(values=[2, 2], system_attrs={"constraints": [1]}), - trial.create_trial(values=[3, 2], system_attrs={"constraints": [0]}), - ] - study.add_trials(trials) - assert { - _trial_to_values(t) - for t in _get_pareto_front_trials_2d(study.trials, study.directions, False) - } == {(1, 1), (2, 2)} - assert { - _trial_to_values(t) - for t in _get_pareto_front_trials_2d(study.trials, study.directions, True) - } == {(1, 1), (3, 2)} - - -def test_get_pareto_front_trials_nd() -> None: - study = create_study(directions=["minimize", "maximize", "minimize"]) - assert { - _trial_to_values(t) for t in _get_pareto_front_trials_nd(study.trials, study.directions) - } == set() - - study.optimize(lambda t: [2, 2, 2], n_trials=1) - assert { - _trial_to_values(t) for t in _get_pareto_front_trials_nd(study.trials, study.directions) - } == {(2, 2, 2)} - - study.optimize(lambda t: [1, 1, 1], n_trials=1) - assert { - _trial_to_values(t) for t in _get_pareto_front_trials_nd(study.trials, study.directions) - } == { - (1, 1, 1), - (2, 2, 2), - } - - study.optimize(lambda t: [3, 1, 3], n_trials=1) - assert { - _trial_to_values(t) for t in _get_pareto_front_trials_nd(study.trials, study.directions) - } == { - (1, 1, 1), - (2, 2, 2), - } - - study.optimize(lambda t: [3, 2, 3], n_trials=1) - assert { - _trial_to_values(t) for t in _get_pareto_front_trials_nd(study.trials, study.directions) - } == { - (1, 1, 1), - (2, 2, 2), - } - - study.optimize(lambda t: [1, 3, 1], n_trials=1) - assert { - _trial_to_values(t) for t in _get_pareto_front_trials_nd(study.trials, study.directions) - } == {(1, 3, 1)} - assert len(_get_pareto_front_trials_nd(study.trials, study.directions)) == 1 - - study.optimize( - lambda t: [1, 3, 1], n_trials=1 - ) # The trial result is the same as the above one. - assert { - _trial_to_values(t) for t in _get_pareto_front_trials_nd(study.trials, study.directions) - } == {(1, 3, 1)} - assert len(_get_pareto_front_trials_nd(study.trials, study.directions)) == 2 - - -def test_get_pareto_front_trials_nd_with_constraint() -> None: - study = create_study(directions=["minimize", "maximize", "minimize"]) +def assert_is_output_equal_to_ans( + trials: list[FrozenTrial], directions: Sequence[StudyDirection], ans: set[int] +) -> None: + res = {_trial_to_values(t) for t in _get_pareto_front_trials_by_trials(trials, directions)} + assert res == ans + + +@pytest.mark.parametrize("directions,values_set,ans_set", [ + ( + ["minimize", "maximize"], + [[2, 2], [1, 1], [3, 1], [3, 2], [1, 3]], + [{(2, 2)}] + [{(1, 1), (2, 2)}] * 3 + [{(1, 3)}], + ), + ( + ["minimize", "maximize", "minimize"], + [[2, 2, 2], [1, 1, 1], [3, 1, 3], [3, 2, 3], [1, 3, 1]], + [{(2, 2, 2)}] + [{(1, 1, 1), (2, 2, 2)}] * 3 + [{(1, 3, 1)}], + ), +]) +def test_get_pareto_front_trials( + directions: list[str], values_set: list[list[int]], ans_set: list[set[tuple[int]]] +) -> None: + study = create_study(directions=directions) + assert_is_output_equal_to_ans(study.trials, study.directions, set()) + for values, ans in zip(values_set, ans_set): + study.optimize(lambda t: values, n_trials=1) + assert_is_output_equal_to_ans(study.trials, study.directions, ans) + + assert len(_get_pareto_front_trials_by_trials(study.trials, study.directions)) == 1 + # The trial result is the same as the last one. + study.optimize(lambda t: values_set[-1], n_trials=1) + assert_is_output_equal_to_ans(study.trials, study.directions, ans_set[-1]) + assert len(_get_pareto_front_trials_by_trials(study.trials, study.directions)) == 2 + + +@pytest.mark.parametrize("directions,values_set,ans_set", [ + ( + ["minimize", "maximize"], [[1, 1], [2, 2], [3, 2]], [{(1, 1), (2, 2)}, {(1, 1), (3, 2)}] + ), + ( + ["minimize", "maximize", "minimize"], + [[1, 1, 1], [2, 2, 2], [3, 2, 3]], + [{(1, 1, 1), (2, 2, 2)}, {(1, 1, 1), (3, 2, 3)}], + ), +]) +def test_get_pareto_front_trials_with_constraint( + directions: list[str], values_set: list[list[int]], ans_set: list[set[tuple[int]]] +) -> None: + study = create_study(directions=directions) trials = [ - trial.create_trial(values=[1, 1, 1], system_attrs={"constraints": [0]}), - trial.create_trial(values=[2, 2, 2], system_attrs={"constraints": [1]}), - trial.create_trial(values=[3, 2, 3], system_attrs={"constraints": [0]}), + trial.create_trial(values=values, system_attrs={"constraints": [i % 2]}) + for i, values in enumerate(values_set) ] study.add_trials(trials) - assert { - _trial_to_values(t) - for t in _get_pareto_front_trials_nd(study.trials, study.directions, False) - } == {(1, 1, 1), (2, 2, 2)} - assert { - _trial_to_values(t) - for t in _get_pareto_front_trials_nd(study.trials, study.directions, True) - } == {(1, 1, 1), (3, 2, 3)} + for consider_constraint, ans in zip([False, True], ans_set): + trials = study.trials + study_dirs = study.directions + assert ans == { + _trial_to_values(t) + for t in _get_pareto_front_trials_by_trials(trials, study_dirs, consider_constraint) + } From 919f28d140fba785164c16756cc47c69e42da629 Mon Sep 17 00:00:00 2001 From: nabenabe0928 Date: Tue, 12 Mar 2024 17:22:17 +0100 Subject: [PATCH 09/17] Apply formatter --- tests/test_multi_objective.py | 53 +++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 24 deletions(-) diff --git a/tests/test_multi_objective.py b/tests/test_multi_objective.py index 0ec302cc64..55c07fba62 100644 --- a/tests/test_multi_objective.py +++ b/tests/test_multi_objective.py @@ -1,8 +1,9 @@ from __future__ import annotations -import pytest from typing import Sequence +import pytest + from optuna import create_study from optuna import trial from optuna.study import StudyDirection @@ -16,24 +17,27 @@ def _trial_to_values(t: FrozenTrial) -> tuple[float, ...]: def assert_is_output_equal_to_ans( - trials: list[FrozenTrial], directions: Sequence[StudyDirection], ans: set[int] + trials: list[FrozenTrial], directions: Sequence[StudyDirection], ans: set[tuple[int]] ) -> None: res = {_trial_to_values(t) for t in _get_pareto_front_trials_by_trials(trials, directions)} assert res == ans -@pytest.mark.parametrize("directions,values_set,ans_set", [ - ( - ["minimize", "maximize"], - [[2, 2], [1, 1], [3, 1], [3, 2], [1, 3]], - [{(2, 2)}] + [{(1, 1), (2, 2)}] * 3 + [{(1, 3)}], - ), - ( - ["minimize", "maximize", "minimize"], - [[2, 2, 2], [1, 1, 1], [3, 1, 3], [3, 2, 3], [1, 3, 1]], - [{(2, 2, 2)}] + [{(1, 1, 1), (2, 2, 2)}] * 3 + [{(1, 3, 1)}], - ), -]) +@pytest.mark.parametrize( + "directions,values_set,ans_set", + [ + ( + ["minimize", "maximize"], + [[2, 2], [1, 1], [3, 1], [3, 2], [1, 3]], + [{(2, 2)}] + [{(1, 1), (2, 2)}] * 3 + [{(1, 3)}], + ), + ( + ["minimize", "maximize", "minimize"], + [[2, 2, 2], [1, 1, 1], [3, 1, 3], [3, 2, 3], [1, 3, 1]], + [{(2, 2, 2)}] + [{(1, 1, 1), (2, 2, 2)}] * 3 + [{(1, 3, 1)}], + ), + ], +) def test_get_pareto_front_trials( directions: list[str], values_set: list[list[int]], ans_set: list[set[tuple[int]]] ) -> None: @@ -50,16 +54,17 @@ def test_get_pareto_front_trials( assert len(_get_pareto_front_trials_by_trials(study.trials, study.directions)) == 2 -@pytest.mark.parametrize("directions,values_set,ans_set", [ - ( - ["minimize", "maximize"], [[1, 1], [2, 2], [3, 2]], [{(1, 1), (2, 2)}, {(1, 1), (3, 2)}] - ), - ( - ["minimize", "maximize", "minimize"], - [[1, 1, 1], [2, 2, 2], [3, 2, 3]], - [{(1, 1, 1), (2, 2, 2)}, {(1, 1, 1), (3, 2, 3)}], - ), -]) +@pytest.mark.parametrize( + "directions,values_set,ans_set", + [ + (["minimize", "maximize"], [[1, 1], [2, 2], [3, 2]], [{(1, 1), (2, 2)}, {(1, 1), (3, 2)}]), + ( + ["minimize", "maximize", "minimize"], + [[1, 1, 1], [2, 2, 2], [3, 2, 3]], + [{(1, 1, 1), (2, 2, 2)}, {(1, 1, 1), (3, 2, 3)}], + ), + ], +) def test_get_pareto_front_trials_with_constraint( directions: list[str], values_set: list[list[int]], ans_set: list[set[tuple[int]]] ) -> None: From 129618f82f7a7e2fa17d1dba2f92ffa741ed0760 Mon Sep 17 00:00:00 2001 From: nabenabe0928 Date: Mon, 11 Mar 2024 14:01:45 +0100 Subject: [PATCH 10/17] Speed up WFG --- optuna/_hypervolume/wfg.py | 103 ++++++++++++++----------------------- 1 file changed, 38 insertions(+), 65 deletions(-) diff --git a/optuna/_hypervolume/wfg.py b/optuna/_hypervolume/wfg.py index de7db1b553..af4a6ba734 100644 --- a/optuna/_hypervolume/wfg.py +++ b/optuna/_hypervolume/wfg.py @@ -3,8 +3,8 @@ import numpy as np from optuna._hypervolume import _compute_2d -from optuna._hypervolume import _compute_2points_volume from optuna._hypervolume import BaseHypervolume +from optuna.study._multi_objective import _is_pareto_front class WFG(BaseHypervolume): @@ -21,74 +21,47 @@ def __init__(self) -> None: def _compute(self, solution_set: np.ndarray, reference_point: np.ndarray) -> float: self._reference_point = reference_point.astype(np.float64) - return self._compute_rec(solution_set.astype(np.float64)) - - def _compute_rec(self, solution_set: np.ndarray) -> float: - assert self._reference_point is not None - n_points = solution_set.shape[0] - if self._reference_point.shape[0] == 2: return _compute_2d(solution_set, self._reference_point) - if n_points == 1: - return _compute_2points_volume(solution_set[0], self._reference_point) - elif n_points == 2: - volume = 0.0 - volume += _compute_2points_volume(solution_set[0], self._reference_point) - volume += _compute_2points_volume(solution_set[1], self._reference_point) - intersection = self._reference_point - np.maximum(solution_set[0], solution_set[1]) - volume -= np.prod(intersection) - - return volume + return self._compute_rec(solution_set[solution_set[:, 0].argsort()].astype(np.float64)) - solution_set = solution_set[solution_set[:, 0].argsort()] + def _compute_hv_directly(self, solution_set: np.ndarray) -> float: + assert 1 <= solution_set.shape[0] <= 2 + if solution_set.shape[0] == 1: + return np.abs(np.prod(solution_set[0] - self._reference_point)) - # n_points >= 3 - volume = 0.0 - for i in range(n_points): - volume += self._compute_exclusive_hv(solution_set[i], solution_set[i + 1 :]) - return volume + volume_sum = np.sum(np.abs(np.prod(self._reference_point - solution_set, axis=-1))) + intersec = np.prod(self._reference_point - np.maximum(solution_set[0], solution_set[1])) + return volume_sum - intersec - def _compute_exclusive_hv(self, point: np.ndarray, solution_set: np.ndarray) -> float: + def _compute_rec(self, solution_set: np.ndarray) -> float: assert self._reference_point is not None - volume = _compute_2points_volume(point, self._reference_point) - limited_solution_set = self._limit(point, solution_set) - n_points_of_s = limited_solution_set.shape[0] - if n_points_of_s == 1: - volume -= _compute_2points_volume(limited_solution_set[0], self._reference_point) - elif n_points_of_s > 1: - volume -= self._compute_rec(limited_solution_set) - return volume - - @staticmethod - def _limit(point: np.ndarray, solution_set: np.ndarray) -> np.ndarray: - """Limit the points in the solution set for the given point. - - Let `S := solution set`, `p := point` and `d := dim(p)`. - The returned solution set `S'` is - `S' = Pareto({s' | for all i in [d], exists s in S, s'_i = max(s_i, p_i)})`, - where `Pareto(T) = the points in T which are Pareto optimal`. - """ - n_points_of_s = solution_set.shape[0] - - limited_solution_set = np.maximum(solution_set, point) - - # Return almost Pareto optimal points for computational efficiency. - # If the points in the solution set are completely sorted along all coordinates, - # the following procedures return the complete Pareto optimal points. - # For the computational efficiency, we do not completely sort the points, - # but just sort the points according to its 0-th dimension. - if n_points_of_s <= 1: - return limited_solution_set - else: - # Assume limited_solution_set is sorted by its 0th dimension. - # Therefore, we can simply scan the limited solution set from left to right. - returned_limited_solution_set = [limited_solution_set[0]] - left = 0 - right = 1 - while right < n_points_of_s: - if (limited_solution_set[left] > limited_solution_set[right]).any(): - left = right - returned_limited_solution_set.append(limited_solution_set[left]) - right += 1 - return np.asarray(returned_limited_solution_set) + if solution_set.shape[0] == 1: + return np.abs(np.prod(solution_set[0] - self._reference_point)) + elif solution_set.shape[0] == 2: + # S(A v B) = S(A) + S(B) - S(A ^ B). + return ( + np.sum(np.abs(np.prod(self._reference_point - solution_set, axis=-1))) - + np.prod(self._reference_point - np.maximum(solution_set[0], solution_set[1])) + ) + + inclusive_hvs = np.abs(np.prod(self._reference_point - solution_set, axis=-1)) + limited_solutions = np.maximum(solution_set[:, np.newaxis], solution_set) + return sum( + self._compute_exclusive_hv(limited_solutions[i, i + 1 :], inclusive_hv) + for i, inclusive_hv in enumerate(inclusive_hvs) + ) + + def _compute_exclusive_hv(self, limited_solution: np.ndarray, inclusive_hv: float) -> float: + assert self._reference_point is not None + if limited_solution.shape[0] == 0: + return inclusive_hv + elif limited_solution.shape[0] == 1: + inner = np.abs(np.prod(limited_solution[0] - self._reference_point)) + return inclusive_hv - inner + + unique_lexsorted_solutions = np.unique(limited_solution, axis=0) + on_front = _is_pareto_front(unique_lexsorted_solutions) + limited_pareto_sols = unique_lexsorted_solutions[on_front] + return inclusive_hv - self._compute_rec(limited_pareto_sols) From ac962cadef9633eef2d40468c410b2c37db197a7 Mon Sep 17 00:00:00 2001 From: nabenabe0928 Date: Mon, 11 Mar 2024 14:16:06 +0100 Subject: [PATCH 11/17] Refactor --- optuna/_hypervolume/wfg.py | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/optuna/_hypervolume/wfg.py b/optuna/_hypervolume/wfg.py index af4a6ba734..0792360b4e 100644 --- a/optuna/_hypervolume/wfg.py +++ b/optuna/_hypervolume/wfg.py @@ -1,4 +1,4 @@ -from typing import Optional +from __future__ import annotations import numpy as np @@ -7,6 +7,10 @@ from optuna.study._multi_objective import _is_pareto_front +def _rectangular_space(p0: np.ndarray, p1: np.ndarray) -> np.ndarray: + return np.abs(np.prod(p0 - p1, axis=-1)) + + class WFG(BaseHypervolume): """Hypervolume calculator for any dimension. @@ -26,27 +30,19 @@ def _compute(self, solution_set: np.ndarray, reference_point: np.ndarray) -> flo return self._compute_rec(solution_set[solution_set[:, 0].argsort()].astype(np.float64)) - def _compute_hv_directly(self, solution_set: np.ndarray) -> float: - assert 1 <= solution_set.shape[0] <= 2 - if solution_set.shape[0] == 1: - return np.abs(np.prod(solution_set[0] - self._reference_point)) - - volume_sum = np.sum(np.abs(np.prod(self._reference_point - solution_set, axis=-1))) - intersec = np.prod(self._reference_point - np.maximum(solution_set[0], solution_set[1])) - return volume_sum - intersec - def _compute_rec(self, solution_set: np.ndarray) -> float: assert self._reference_point is not None if solution_set.shape[0] == 1: - return np.abs(np.prod(solution_set[0] - self._reference_point)) + return float(_rectangular_space(solution_set[0], self._reference_point)) elif solution_set.shape[0] == 2: # S(A v B) = S(A) + S(B) - S(A ^ B). + intersec_node = np.maximum(solution_set[0], solution_set[1]) return ( - np.sum(np.abs(np.prod(self._reference_point - solution_set, axis=-1))) - - np.prod(self._reference_point - np.maximum(solution_set[0], solution_set[1])) + np.sum(_rectangular_space(self._reference_point, solution_set)) - + _rectangular_space(self._reference_point, intersec_node) ) - inclusive_hvs = np.abs(np.prod(self._reference_point - solution_set, axis=-1)) + inclusive_hvs = _rectangular_space(self._reference_point, solution_set) limited_solutions = np.maximum(solution_set[:, np.newaxis], solution_set) return sum( self._compute_exclusive_hv(limited_solutions[i, i + 1 :], inclusive_hv) @@ -58,7 +54,7 @@ def _compute_exclusive_hv(self, limited_solution: np.ndarray, inclusive_hv: floa if limited_solution.shape[0] == 0: return inclusive_hv elif limited_solution.shape[0] == 1: - inner = np.abs(np.prod(limited_solution[0] - self._reference_point)) + inner = _rectangular_space(limited_solution[0], self._reference_point) return inclusive_hv - inner unique_lexsorted_solutions = np.unique(limited_solution, axis=0) From 4f77c14d641ddb03dd68110443eb25ed40204d21 Mon Sep 17 00:00:00 2001 From: nabenabe0928 Date: Mon, 11 Mar 2024 14:26:20 +0100 Subject: [PATCH 12/17] Apply formatter --- optuna/_hypervolume/wfg.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/optuna/_hypervolume/wfg.py b/optuna/_hypervolume/wfg.py index 0792360b4e..d08d038e75 100644 --- a/optuna/_hypervolume/wfg.py +++ b/optuna/_hypervolume/wfg.py @@ -8,6 +8,7 @@ def _rectangular_space(p0: np.ndarray, p1: np.ndarray) -> np.ndarray: + # NOTE: If both p0 and p1 are 1d array, this func returns np.float, which is castable to float. return np.abs(np.prod(p0 - p1, axis=-1)) @@ -21,7 +22,7 @@ class WFG(BaseHypervolume): """ def __init__(self) -> None: - self._reference_point: Optional[np.ndarray] = None + self._reference_point: np.ndarray | None = None def _compute(self, solution_set: np.ndarray, reference_point: np.ndarray) -> float: self._reference_point = reference_point.astype(np.float64) @@ -37,10 +38,8 @@ def _compute_rec(self, solution_set: np.ndarray) -> float: elif solution_set.shape[0] == 2: # S(A v B) = S(A) + S(B) - S(A ^ B). intersec_node = np.maximum(solution_set[0], solution_set[1]) - return ( - np.sum(_rectangular_space(self._reference_point, solution_set)) - - _rectangular_space(self._reference_point, intersec_node) - ) + intersec = _rectangular_space(self._reference_point, intersec_node) + return np.sum(_rectangular_space(self._reference_point, solution_set)) - intersec inclusive_hvs = _rectangular_space(self._reference_point, solution_set) limited_solutions = np.maximum(solution_set[:, np.newaxis], solution_set) @@ -54,7 +53,7 @@ def _compute_exclusive_hv(self, limited_solution: np.ndarray, inclusive_hv: floa if limited_solution.shape[0] == 0: return inclusive_hv elif limited_solution.shape[0] == 1: - inner = _rectangular_space(limited_solution[0], self._reference_point) + inner = float(_rectangular_space(limited_solution[0], self._reference_point)) return inclusive_hv - inner unique_lexsorted_solutions = np.unique(limited_solution, axis=0) From da0ba484bc3c5735970ac19cf4dd5df135780da4 Mon Sep 17 00:00:00 2001 From: nabenabe0928 Date: Mon, 11 Mar 2024 16:49:28 +0100 Subject: [PATCH 13/17] Rename some variables --- optuna/_hypervolume/wfg.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/optuna/_hypervolume/wfg.py b/optuna/_hypervolume/wfg.py index d08d038e75..08d3392260 100644 --- a/optuna/_hypervolume/wfg.py +++ b/optuna/_hypervolume/wfg.py @@ -29,34 +29,34 @@ def _compute(self, solution_set: np.ndarray, reference_point: np.ndarray) -> flo if self._reference_point.shape[0] == 2: return _compute_2d(solution_set, self._reference_point) - return self._compute_rec(solution_set[solution_set[:, 0].argsort()].astype(np.float64)) + return self._compute_hv(solution_set[solution_set[:, 0].argsort()].astype(np.float64)) - def _compute_rec(self, solution_set: np.ndarray) -> float: + def _compute_hv(self, sorted_solutions: np.ndarray) -> float: assert self._reference_point is not None - if solution_set.shape[0] == 1: - return float(_rectangular_space(solution_set[0], self._reference_point)) - elif solution_set.shape[0] == 2: + if sorted_solutions.shape[0] == 1: + return float(_rectangular_space(sorted_solutions[0], self._reference_point)) + elif sorted_solutions.shape[0] == 2: # S(A v B) = S(A) + S(B) - S(A ^ B). - intersec_node = np.maximum(solution_set[0], solution_set[1]) + intersec_node = np.maximum(sorted_solutions[0], sorted_solutions[1]) intersec = _rectangular_space(self._reference_point, intersec_node) - return np.sum(_rectangular_space(self._reference_point, solution_set)) - intersec + return np.sum(_rectangular_space(self._reference_point, sorted_solutions)) - intersec - inclusive_hvs = _rectangular_space(self._reference_point, solution_set) - limited_solutions = np.maximum(solution_set[:, np.newaxis], solution_set) + inclusive_hvs = _rectangular_space(self._reference_point, sorted_solutions) + limited_solutions_array = np.maximum(sorted_solutions[:, np.newaxis], sorted_solutions) return sum( - self._compute_exclusive_hv(limited_solutions[i, i + 1 :], inclusive_hv) + self._compute_exclusive_hv(limited_solutions_array[i, i + 1 :], inclusive_hv) for i, inclusive_hv in enumerate(inclusive_hvs) ) - def _compute_exclusive_hv(self, limited_solution: np.ndarray, inclusive_hv: float) -> float: + def _compute_exclusive_hv(self, limited_solutions: np.ndarray, inclusive_hv: float) -> float: assert self._reference_point is not None - if limited_solution.shape[0] == 0: + if limited_solutions.shape[0] == 0: return inclusive_hv - elif limited_solution.shape[0] == 1: - inner = float(_rectangular_space(limited_solution[0], self._reference_point)) + elif limited_solutions.shape[0] == 1: + inner = float(_rectangular_space(limited_solutions[0], self._reference_point)) return inclusive_hv - inner - unique_lexsorted_solutions = np.unique(limited_solution, axis=0) + unique_lexsorted_solutions = np.unique(limited_solutions, axis=0) on_front = _is_pareto_front(unique_lexsorted_solutions) limited_pareto_sols = unique_lexsorted_solutions[on_front] - return inclusive_hv - self._compute_rec(limited_pareto_sols) + return inclusive_hv - self._compute_hv(limited_pareto_sols) From 3f024752556caa9911c21a6697b7cabd675b5df7 Mon Sep 17 00:00:00 2001 From: nabenabe0928 Date: Tue, 12 Mar 2024 18:03:46 +0100 Subject: [PATCH 14/17] Remove compute 2d with two points --- optuna/_hypervolume/__init__.py | 2 -- optuna/_hypervolume/utils.py | 13 ------------- tests/hypervolume_tests/test_utils.py | 11 ----------- 3 files changed, 26 deletions(-) diff --git a/optuna/_hypervolume/__init__.py b/optuna/_hypervolume/__init__.py index 43689f1943..e6ada1c886 100644 --- a/optuna/_hypervolume/__init__.py +++ b/optuna/_hypervolume/__init__.py @@ -1,14 +1,12 @@ from optuna._hypervolume.base import BaseHypervolume from optuna._hypervolume.hssp import _solve_hssp from optuna._hypervolume.utils import _compute_2d -from optuna._hypervolume.utils import _compute_2points_volume from optuna._hypervolume.wfg import WFG __all__ = [ "BaseHypervolume", "_compute_2d", - "_compute_2points_volume", "_solve_hssp", "WFG", ] diff --git a/optuna/_hypervolume/utils.py b/optuna/_hypervolume/utils.py index b87192cb28..f160e96206 100644 --- a/optuna/_hypervolume/utils.py +++ b/optuna/_hypervolume/utils.py @@ -1,19 +1,6 @@ import numpy as np -def _compute_2points_volume(point1: np.ndarray, point2: np.ndarray) -> float: - """Compute the hypervolume of the hypercube, whose diagonal endpoints are given 2 points. - - Args: - point1: - The first endpoint of the hypercube's diagonal. - point2: - The second endpoint of the hypercube's diagonal. - """ - - return float(np.abs(np.prod(point1 - point2))) - - def _compute_2d(solution_set: np.ndarray, reference_point: np.ndarray) -> float: """Compute the hypervolume for the two-dimensional space. diff --git a/tests/hypervolume_tests/test_utils.py b/tests/hypervolume_tests/test_utils.py index 8786c2ed28..855d26bac0 100644 --- a/tests/hypervolume_tests/test_utils.py +++ b/tests/hypervolume_tests/test_utils.py @@ -3,17 +3,6 @@ import optuna -def test_compute_2points_volume() -> None: - p1 = np.ones(10) - p2 = np.zeros(10) - assert 1 == optuna._hypervolume._compute_2points_volume(p1, p2) - assert 1 == optuna._hypervolume._compute_2points_volume(p2, p1) - - p1 = np.ones(10) * 2 - p2 = np.ones(10) - assert 1 == optuna._hypervolume._compute_2points_volume(p1, p2) - - def test_compute_2d() -> None: for n in range(2, 30): r = n * np.ones(2) From 546cc6bcd595eaad145a06bcf8476376dc3ddd3e Mon Sep 17 00:00:00 2001 From: nabenabe0928 Date: Sat, 16 Mar 2024 20:39:36 +0100 Subject: [PATCH 15/17] Rename rectangular space to hyper rectangular volume --- optuna/_hypervolume/wfg.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/optuna/_hypervolume/wfg.py b/optuna/_hypervolume/wfg.py index 08d3392260..94dc188f82 100644 --- a/optuna/_hypervolume/wfg.py +++ b/optuna/_hypervolume/wfg.py @@ -7,7 +7,7 @@ from optuna.study._multi_objective import _is_pareto_front -def _rectangular_space(p0: np.ndarray, p1: np.ndarray) -> np.ndarray: +def _hyper_rectangular_volume(p0: np.ndarray, p1: np.ndarray) -> np.ndarray: # NOTE: If both p0 and p1 are 1d array, this func returns np.float, which is castable to float. return np.abs(np.prod(p0 - p1, axis=-1)) @@ -34,14 +34,15 @@ def _compute(self, solution_set: np.ndarray, reference_point: np.ndarray) -> flo def _compute_hv(self, sorted_solutions: np.ndarray) -> float: assert self._reference_point is not None if sorted_solutions.shape[0] == 1: - return float(_rectangular_space(sorted_solutions[0], self._reference_point)) + return float(_hyper_rectangular_volume(sorted_solutions[0], self._reference_point)) elif sorted_solutions.shape[0] == 2: # S(A v B) = S(A) + S(B) - S(A ^ B). intersec_node = np.maximum(sorted_solutions[0], sorted_solutions[1]) - intersec = _rectangular_space(self._reference_point, intersec_node) - return np.sum(_rectangular_space(self._reference_point, sorted_solutions)) - intersec + intersec = _hyper_rectangular_volume(self._reference_point, intersec_node) + volume_sum = np.sum(_hyper_rectangular_volume(self._reference_point, sorted_solutions)) + return volume_sum - intersec - inclusive_hvs = _rectangular_space(self._reference_point, sorted_solutions) + inclusive_hvs = _hyper_rectangular_volume(self._reference_point, sorted_solutions) limited_solutions_array = np.maximum(sorted_solutions[:, np.newaxis], sorted_solutions) return sum( self._compute_exclusive_hv(limited_solutions_array[i, i + 1 :], inclusive_hv) @@ -53,7 +54,7 @@ def _compute_exclusive_hv(self, limited_solutions: np.ndarray, inclusive_hv: flo if limited_solutions.shape[0] == 0: return inclusive_hv elif limited_solutions.shape[0] == 1: - inner = float(_rectangular_space(limited_solutions[0], self._reference_point)) + inner = float(_hyper_rectangular_volume(limited_solutions[0], self._reference_point)) return inclusive_hv - inner unique_lexsorted_solutions = np.unique(limited_solutions, axis=0) From 87b2e28b6368b7718290184f3f075d09bc1de3d4 Mon Sep 17 00:00:00 2001 From: nabenabe0928 Date: Sun, 17 Mar 2024 06:33:23 +0100 Subject: [PATCH 16/17] Update init hv in hssp --- optuna/_hypervolume/hssp.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/optuna/_hypervolume/hssp.py b/optuna/_hypervolume/hssp.py index 4183e68538..7aa3b189dd 100644 --- a/optuna/_hypervolume/hssp.py +++ b/optuna/_hypervolume/hssp.py @@ -23,10 +23,7 @@ def _solve_hssp( """ selected_vecs: List[np.ndarray] = [] selected_indices: List[int] = [] - contributions = [ - optuna._hypervolume.WFG().compute(np.asarray([v]), reference_point) - for v in rank_i_loss_vals - ] + contributions = np.prod(reference_point - rank_i_loss_vals, axis=-1) hv_selected = 0.0 while len(selected_indices) < subset_size: max_index = int(np.argmax(contributions)) From f4c717b48e806305eca0f35b791906955dce5bc2 Mon Sep 17 00:00:00 2001 From: nabenabe0928 Date: Thu, 21 Mar 2024 07:32:22 +0100 Subject: [PATCH 17/17] Merge master and refactor a bit --- optuna/_hypervolume/wfg.py | 41 ++++------ optuna/samplers/_tpe/sampler.py | 4 +- .../_elite_population_selection_strategy.py | 4 +- optuna/study/_multi_objective.py | 78 ++++++++++--------- 4 files changed, 60 insertions(+), 67 deletions(-) diff --git a/optuna/_hypervolume/wfg.py b/optuna/_hypervolume/wfg.py index 94dc188f82..206130ab18 100644 --- a/optuna/_hypervolume/wfg.py +++ b/optuna/_hypervolume/wfg.py @@ -7,11 +7,6 @@ from optuna.study._multi_objective import _is_pareto_front -def _hyper_rectangular_volume(p0: np.ndarray, p1: np.ndarray) -> np.ndarray: - # NOTE: If both p0 and p1 are 1d array, this func returns np.float, which is castable to float. - return np.abs(np.prod(p0 - p1, axis=-1)) - - class WFG(BaseHypervolume): """Hypervolume calculator for any dimension. @@ -31,33 +26,25 @@ def _compute(self, solution_set: np.ndarray, reference_point: np.ndarray) -> flo return self._compute_hv(solution_set[solution_set[:, 0].argsort()].astype(np.float64)) - def _compute_hv(self, sorted_solutions: np.ndarray) -> float: + def _compute_hv(self, sorted_sols: np.ndarray) -> float: assert self._reference_point is not None - if sorted_solutions.shape[0] == 1: - return float(_hyper_rectangular_volume(sorted_solutions[0], self._reference_point)) - elif sorted_solutions.shape[0] == 2: + inclusive_hvs = np.prod(self._reference_point - sorted_sols, axis=-1) + if inclusive_hvs.shape[0] == 1: + return float(inclusive_hvs[0]) + elif inclusive_hvs.shape[0] == 2: # S(A v B) = S(A) + S(B) - S(A ^ B). - intersec_node = np.maximum(sorted_solutions[0], sorted_solutions[1]) - intersec = _hyper_rectangular_volume(self._reference_point, intersec_node) - volume_sum = np.sum(_hyper_rectangular_volume(self._reference_point, sorted_solutions)) - return volume_sum - intersec + intersec = np.prod(self._reference_point - np.maximum(sorted_sols[0], sorted_sols[1])) + return np.sum(inclusive_hvs) - intersec - inclusive_hvs = _hyper_rectangular_volume(self._reference_point, sorted_solutions) - limited_solutions_array = np.maximum(sorted_solutions[:, np.newaxis], sorted_solutions) + limited_sols_array = np.maximum(sorted_sols[:, np.newaxis], sorted_sols) return sum( - self._compute_exclusive_hv(limited_solutions_array[i, i + 1 :], inclusive_hv) + self._compute_exclusive_hv(limited_sols_array[i, i + 1 :], inclusive_hv) for i, inclusive_hv in enumerate(inclusive_hvs) ) - def _compute_exclusive_hv(self, limited_solutions: np.ndarray, inclusive_hv: float) -> float: - assert self._reference_point is not None - if limited_solutions.shape[0] == 0: + def _compute_exclusive_hv(self, limited_sols: np.ndarray, inclusive_hv: float) -> float: + if limited_sols.shape[0] == 0: return inclusive_hv - elif limited_solutions.shape[0] == 1: - inner = float(_hyper_rectangular_volume(limited_solutions[0], self._reference_point)) - return inclusive_hv - inner - - unique_lexsorted_solutions = np.unique(limited_solutions, axis=0) - on_front = _is_pareto_front(unique_lexsorted_solutions) - limited_pareto_sols = unique_lexsorted_solutions[on_front] - return inclusive_hv - self._compute_hv(limited_pareto_sols) + + on_front = _is_pareto_front(limited_sols, assume_unique_lexsorted=False) + return inclusive_hv - self._compute_hv(limited_sols[on_front]) diff --git a/optuna/samplers/_tpe/sampler.py b/optuna/samplers/_tpe/sampler.py index d683959fae..0a6ee722d8 100644 --- a/optuna/samplers/_tpe/sampler.py +++ b/optuna/samplers/_tpe/sampler.py @@ -28,7 +28,7 @@ from optuna.search_space.group_decomposed import _GroupDecomposedSearchSpace from optuna.search_space.group_decomposed import _SearchSpaceGroup from optuna.study import Study -from optuna.study._multi_objective import _fast_non_dominated_sort +from optuna.study._multi_objective import _fast_non_domination_rank from optuna.study._study_direction import StudyDirection from optuna.trial import FrozenTrial from optuna.trial import TrialState @@ -685,7 +685,7 @@ def _split_complete_trials_multi_objective( lvals *= np.array([-1.0 if d == StudyDirection.MAXIMIZE else 1.0 for d in study.directions]) # Solving HSSP for variables number of times is a waste of time. - nondomination_ranks = _fast_non_dominated_sort(lvals, n_below=n_below) + nondomination_ranks = _fast_non_domination_rank(lvals, n_below=n_below) assert 0 <= n_below <= len(lvals) indices = np.array(range(len(lvals))) diff --git a/optuna/samplers/nsgaii/_elite_population_selection_strategy.py b/optuna/samplers/nsgaii/_elite_population_selection_strategy.py index d292a4ede6..b0afb322a6 100644 --- a/optuna/samplers/nsgaii/_elite_population_selection_strategy.py +++ b/optuna/samplers/nsgaii/_elite_population_selection_strategy.py @@ -10,7 +10,7 @@ from optuna.samplers.nsgaii._constraints_evaluation import _validate_constraints from optuna.study import Study from optuna.study import StudyDirection -from optuna.study._multi_objective import _fast_non_dominated_sort +from optuna.study._multi_objective import _fast_non_domination_rank from optuna.trial import FrozenTrial @@ -125,7 +125,7 @@ def _rank_population( ) penalty = _evaluate_penalty(population) if is_constrained else None - domination_ranks = _fast_non_dominated_sort(objective_values, penalty=penalty) + domination_ranks = _fast_non_domination_rank(objective_values, penalty=penalty) population_per_rank: list[list[FrozenTrial]] = [[] for _ in range(max(domination_ranks) + 1)] for trial, rank in zip(population, domination_ranks): if rank == -1: diff --git a/optuna/study/_multi_objective.py b/optuna/study/_multi_objective.py index 7b97a02696..87043aaaaf 100644 --- a/optuna/study/_multi_objective.py +++ b/optuna/study/_multi_objective.py @@ -42,8 +42,7 @@ def _get_pareto_front_trials_by_trials( loss_values = np.asarray( [[_normalize_value(v, d) for v, d in zip(t.values, directions)] for t in trials] ) - unique_lexsorted_loss_values, order_inv = np.unique(loss_values, return_inverse=True, axis=0) - on_front = _is_pareto_front(unique_lexsorted_loss_values)[order_inv] + on_front = _is_pareto_front(loss_values, assume_unique_lexsorted=False) return [t for t, is_pareto in zip(trials, on_front) if is_pareto] @@ -53,13 +52,10 @@ def _get_pareto_front_trials( return _get_pareto_front_trials_by_trials(study.trials, study.directions, consider_constraint) -def _fast_non_dominated_sort( - loss_values: np.ndarray, - *, - penalty: np.ndarray | None = None, - n_below: int | None = None, +def _fast_non_domination_rank( + loss_values: np.ndarray, *, penalty: np.ndarray | None = None, n_below: int | None = None ) -> np.ndarray: - """Perform the fast non-dominated sort algorithm. + """Calculate non-domination rank based on the fast non-dominated sort algorithm. The fast non-dominated sort algorithm assigns a rank to each trial based on the dominance relationship of the trials, determined by the objective values and the penalty values. The @@ -88,10 +84,15 @@ def _fast_non_dominated_sort( terminate when the number of sorted trials reaches n_below. Defaults to None. Returns: - An ndarray in the shape of (n_trials,), where each element is the non-dominated rank of - each trial. The rank is 0-indexed. All the rank worse than the top-n_below has the same - value larger than or equal to n_below. + An ndarray in the shape of (n_trials,), where each element is the non-domination rank of + each trial. The rank is 0-indexed. This function guarantees the correctness of the ranks + only up to the top-``n_below`` solutions. If a solution's rank is worse than the + top-``n_below`` solution, its rank is not necessarily correct. If you would like to ensure + the correctness, use ``n_below=None``. """ + n_below = n_below or len(loss_values) + assert n_below > 0, "n_below must be a positive integer." + if penalty is None: return _calculate_nondomination_rank(loss_values, n_below=n_below) @@ -101,34 +102,29 @@ def _fast_non_dominated_sort( f"len(penalty)={len(penalty)} and len(loss_values)={len(loss_values)}." ) - nondomination_rank = np.full(len(loss_values), -1, dtype=int) + ranks = np.full(len(loss_values), -1, dtype=int) is_penalty_nan = np.isnan(penalty) - n_below = n_below or len(loss_values) + is_feasible = np.logical_and(~is_penalty_nan, penalty <= 0) + is_infeasible = np.logical_and(~is_penalty_nan, penalty > 0) # First, we calculate the domination rank for feasible trials. - is_feasible = np.logical_and(~is_penalty_nan, penalty <= 0) - nondomination_rank[is_feasible] = _calculate_nondomination_rank( - loss_values[is_feasible], n_below=n_below - ) + ranks[is_feasible] = _calculate_nondomination_rank(loss_values[is_feasible], n_below=n_below) n_below -= np.count_nonzero(is_feasible) # Second, we calculate the domination rank for infeasible trials. - top_rank_in_infeasible = np.max(nondomination_rank[is_feasible], initial=-1) + 1 - is_infeasible = np.logical_and(~is_penalty_nan, penalty > 0) - n_infeasible = np.count_nonzero(is_infeasible) - if n_infeasible > 0: - _, ranks_in_infeas = np.unique(penalty[is_infeasible], return_inverse=True) - nondomination_rank[is_infeasible] = ranks_in_infeas + top_rank_in_infeasible - n_below -= n_infeasible + top_rank_infeasible = np.max(ranks[is_feasible], initial=-1) + 1 + ranks[is_infeasible] = top_rank_infeasible + _calculate_nondomination_rank( + penalty[is_infeasible][:, np.newaxis], n_below=n_below + ) + n_below -= np.count_nonzero(is_infeasible) # Third, we calculate the domination rank for trials with no penalty information. - top_rank_in_penalty_nan = np.max(nondomination_rank[~is_penalty_nan], initial=-1) + 1 - ranks_in_penalty_nan = _calculate_nondomination_rank( + top_rank_penalty_nan = np.max(ranks[~is_penalty_nan], initial=-1) + 1 + ranks[is_penalty_nan] = top_rank_penalty_nan + _calculate_nondomination_rank( loss_values[is_penalty_nan], n_below=n_below ) - nondomination_rank[is_penalty_nan] = ranks_in_penalty_nan + top_rank_in_penalty_nan - assert np.all(nondomination_rank != -1), "All the rank must be updated." - return nondomination_rank + assert np.all(ranks != -1), "All the rank must be updated." + return ranks def _is_pareto_front_nd(unique_lexsorted_loss_values: np.ndarray) -> np.ndarray: @@ -139,8 +135,9 @@ def _is_pareto_front_nd(unique_lexsorted_loss_values: np.ndarray) -> np.ndarray: on_front = np.zeros(n_trials, dtype=bool) nondominated_indices = np.arange(n_trials) while len(loss_values): + # The following judges `np.any(loss_values[i] < loss_values[0])` for each `i`. nondominated_and_not_top = np.any(loss_values < loss_values[0], axis=1) - # NOTE: trials[j] cannot dominate trials[j] for i < j because of lexsort. + # NOTE: trials[j] cannot dominate trials[i] for i < j because of lexsort. # Therefore, nondominated_indices[0] is always non-dominated. on_front[nondominated_indices[0]] = True loss_values = loss_values[nondominated_and_not_top] @@ -152,27 +149,36 @@ def _is_pareto_front_nd(unique_lexsorted_loss_values: np.ndarray) -> np.ndarray: def _is_pareto_front_2d(unique_lexsorted_loss_values: np.ndarray) -> np.ndarray: n_trials = unique_lexsorted_loss_values.shape[0] cummin_value1 = np.minimum.accumulate(unique_lexsorted_loss_values[:, 1]) - is_value1_min = cummin_value1 == unique_lexsorted_loss_values[:, 1] - is_value1_new_min = cummin_value1[1:] < cummin_value1[:-1] on_front = np.ones(n_trials, dtype=bool) - on_front[1:] = is_value1_min[1:] & is_value1_new_min + on_front[1:] = cummin_value1[1:] < cummin_value1[:-1] # True if cummin value1 is new minimum. return on_front -def _is_pareto_front(unique_lexsorted_loss_values: np.ndarray) -> np.ndarray: +def _is_pareto_front_for_unique_sorted(unique_lexsorted_loss_values: np.ndarray) -> np.ndarray: (n_trials, n_objectives) = unique_lexsorted_loss_values.shape if n_objectives == 1: - return unique_lexsorted_loss_values[:, 0] == unique_lexsorted_loss_values[0] + on_front = np.zeros(len(unique_lexsorted_loss_values), dtype=bool) + on_front[0] = True # Only the first element is Pareto optimal. + return on_front elif n_objectives == 2: return _is_pareto_front_2d(unique_lexsorted_loss_values) else: return _is_pareto_front_nd(unique_lexsorted_loss_values) +def _is_pareto_front(loss_values: np.ndarray, assume_unique_lexsorted: bool = True) -> np.ndarray: + if assume_unique_lexsorted: + return _is_pareto_front_for_unique_sorted(loss_values) + + unique_lexsorted_loss_values, order_inv = np.unique(loss_values, axis=0, return_inverse=True) + on_front = _is_pareto_front_for_unique_sorted(unique_lexsorted_loss_values) + return on_front[order_inv] + + def _calculate_nondomination_rank( loss_values: np.ndarray, *, n_below: int | None = None ) -> np.ndarray: - if n_below is not None and n_below <= 0: + if len(loss_values) == 0 or (n_below is not None and n_below <= 0): return np.zeros(len(loss_values), dtype=int) (n_trials, n_objectives) = loss_values.shape