Skip to content

Selection Policies

SelectionPolicy

Bases: ABC

Base class for selection policies that choose the best combination.

Selection policies define the strategy for choosing the winning combination from a set of scored candidates. Different policies implement different trade-offs between competing objectives (e.g., weighted sum vs. Pareto).

This is a key component of the Generate-and-Test workflow where the SearchAlgorithm generates candidates, the ObjectiveSet scores them, and the SelectionPolicy picks the winner.

Examples:

>>> # See WeightedSumPolicy and ParetoUtopiaPolicy for concrete examples
>>> class SimpleMinPolicy(SelectionPolicy):
...     def select_best(self, evaluations_df: pd.DataFrame, objective_set: ObjectiveSet):
...         # Just pick the row with minimum of first objective
...         first_obj = list(objective_set.component_meta().keys())[0]
...         best_row = evaluations_df.loc[evaluations_df[first_obj].idxmin()]
...         return tuple(best_row['slices'])
Source code in energy_repset/selection_policies/policy.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
class SelectionPolicy(ABC):
    """Base class for selection policies that choose the best combination.

    Selection policies define the strategy for choosing the winning combination
    from a set of scored candidates. Different policies implement different
    trade-offs between competing objectives (e.g., weighted sum vs. Pareto).

    This is a key component of the Generate-and-Test workflow where the
    SearchAlgorithm generates candidates, the ObjectiveSet scores them, and
    the SelectionPolicy picks the winner.

    Examples:
        >>> # See WeightedSumPolicy and ParetoUtopiaPolicy for concrete examples
        >>> class SimpleMinPolicy(SelectionPolicy):
        ...     def select_best(self, evaluations_df: pd.DataFrame, objective_set: ObjectiveSet):
        ...         # Just pick the row with minimum of first objective
        ...         first_obj = list(objective_set.component_meta().keys())[0]
        ...         best_row = evaluations_df.loc[evaluations_df[first_obj].idxmin()]
        ...         return tuple(best_row['slices'])
    """

    @abstractmethod
    def select_best(self, evaluations_df: pd.DataFrame, objective_set: ObjectiveSet) -> Tuple[Hashable, ...]:
        """Select the best combination from scored candidates.

        Args:
            evaluations_df: DataFrame where each row is a candidate combination
                with columns 'slices' (the combination tuple) and score columns
                for each objective component.
            objective_set: Provides metadata about score components (direction,
                weights, etc.) needed for selection logic.

        Returns:
            Tuple of slice identifiers representing the winning combination.
        """
        ...

select_best abstractmethod

select_best(evaluations_df: DataFrame, objective_set: ObjectiveSet) -> tuple[Hashable, ...]

Select the best combination from scored candidates.

Parameters:

Name Type Description Default
evaluations_df DataFrame

DataFrame where each row is a candidate combination with columns 'slices' (the combination tuple) and score columns for each objective component.

required
objective_set ObjectiveSet

Provides metadata about score components (direction, weights, etc.) needed for selection logic.

required

Returns:

Type Description
tuple[Hashable, ...]

Tuple of slice identifiers representing the winning combination.

Source code in energy_repset/selection_policies/policy.py
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
@abstractmethod
def select_best(self, evaluations_df: pd.DataFrame, objective_set: ObjectiveSet) -> Tuple[Hashable, ...]:
    """Select the best combination from scored candidates.

    Args:
        evaluations_df: DataFrame where each row is a candidate combination
            with columns 'slices' (the combination tuple) and score columns
            for each objective component.
        objective_set: Provides metadata about score components (direction,
            weights, etc.) needed for selection logic.

    Returns:
        Tuple of slice identifiers representing the winning combination.
    """
    ...

PolicyOutcome dataclass

Source code in energy_repset/selection_policies/policy.py
15
16
17
18
19
@dataclass(frozen=True)
class PolicyOutcome:
    algorithm: SearchAlgorithm
    selected: RepSetResult
    scores_annotated: pd.DataFrame

WeightedSumPolicy

Bases: SelectionPolicy

Selects the combination minimizing a weighted sum of objectives.

Combines multiple objectives into a single scalar score using weighted averaging. Objectives are oriented for minimization (max objectives are negated), optionally normalized, then combined using weights from the ObjectiveSet (which can be overridden).

This is the simplest multi-objective selection strategy and works well when relative importance of objectives is known.

Examples:

>>> from energy_repset import ObjectiveSet, ObjectiveSpec
>>> from energy_repset.score_components import WassersteinFidelity, CorrelationFidelity
>>> # Default: use weights from ObjectiveSet
>>> policy = WeightedSumPolicy()
>>> objectives = ObjectiveSet([
...     ObjectiveSpec('wasserstein', WassersteinFidelity(), weight=1.0),
...     ObjectiveSpec('correlation', CorrelationFidelity(), weight=0.5)
... ])
>>> # Final score = 1.0*wasserstein + 0.5*correlation
>>> # Override weights in policy
>>> policy = WeightedSumPolicy(
...     overrides={'wasserstein': 2.0, 'correlation': 1.0}
... )
>>> # Final score = 2.0*wasserstein + 1.0*correlation
>>> # With normalization to make objectives comparable
>>> policy = WeightedSumPolicy(
...     normalization='robust_minmax',  # Scale to [0, 1] using 5th-95th percentiles
...     tie_breakers=('wasserstein',),  # Break ties by wasserstein
...     tie_dirs=('min',)
... )
Source code in energy_repset/selection_policies/weighted_sum.py
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
class WeightedSumPolicy(SelectionPolicy):
    """Selects the combination minimizing a weighted sum of objectives.

    Combines multiple objectives into a single scalar score using weighted
    averaging. Objectives are oriented for minimization (max objectives are
    negated), optionally normalized, then combined using weights from the
    ObjectiveSet (which can be overridden).

    This is the simplest multi-objective selection strategy and works well
    when relative importance of objectives is known.

    Examples:
        >>> from energy_repset import ObjectiveSet, ObjectiveSpec
        >>> from energy_repset.score_components import WassersteinFidelity, CorrelationFidelity
        >>> # Default: use weights from ObjectiveSet
        >>> policy = WeightedSumPolicy()
        >>> objectives = ObjectiveSet([
        ...     ObjectiveSpec('wasserstein', WassersteinFidelity(), weight=1.0),
        ...     ObjectiveSpec('correlation', CorrelationFidelity(), weight=0.5)
        ... ])
        >>> # Final score = 1.0*wasserstein + 0.5*correlation

        >>> # Override weights in policy
        >>> policy = WeightedSumPolicy(
        ...     overrides={'wasserstein': 2.0, 'correlation': 1.0}
        ... )
        >>> # Final score = 2.0*wasserstein + 1.0*correlation

        >>> # With normalization to make objectives comparable
        >>> policy = WeightedSumPolicy(
        ...     normalization='robust_minmax',  # Scale to [0, 1] using 5th-95th percentiles
        ...     tie_breakers=('wasserstein',),  # Break ties by wasserstein
        ...     tie_dirs=('min',)
        ... )
    """
    def __init__(
            self,
            overrides: Optional[Dict[str, float]] = None,
            normalization: Normalization = "none",
            tie_breakers: Tuple[str, ...] = (),
            tie_dirs: Tuple[ScoreComponentDirection, ...] = (),
    ) -> None:
        """Initialize weighted sum policy.

        Args:
            overrides: Optional dict mapping objective names to weights,
                overriding weights from ObjectiveSet.
            normalization: How to normalize objectives before weighting:
                - "none": No normalization
                - "robust_minmax": Scale to [0, 1] using 5th-95th percentiles
                - "zscore_iqr": Z-score using median and IQR
            tie_breakers: Tuple of objective names to use for tie-breaking.
            tie_dirs: Corresponding directions ("min" or "max") for tie-breakers.
        """
        self.overrides = overrides or {}
        self.normalization = normalization
        self.tie_breakers = tie_breakers
        self.tie_dirs = tie_dirs

    def select_best(self, evaluations_df: pd.DataFrame, objective_set: ObjectiveSet) -> Tuple[Hashable, ...]:
        """Select combination with minimum weighted sum score.

        Args:
            evaluations_df: DataFrame with 'slices' column and objective scores.
            objective_set: Provides component metadata (direction, weights).

        Returns:
            Tuple of slice identifiers with the lowest weighted sum score.
        """
        df = evaluations_df.copy()
        meta = objective_set.component_meta()
        oriented = df[list(meta.keys())].copy()

        # Orient all objectives for minimization
        for name, m in meta.items():
            if m["direction"] == "max":
                oriented[name] = -oriented[name]

        # Normalize if requested
        Z = self._normalize(oriented, mode=self.normalization)

        # Compute weights (preferences from ObjectiveSet, overrides from strategy)
        weights = {name: float(m["pref"]) for name, m in meta.items()}
        for k, v in self.overrides.items():
            if k not in weights:
                raise ValueError(f"Unknown metric in overrides: {k}")
            weights[k] = float(v)

        # Compute weighted sum scores
        df["strategy_score"] = sum(Z[name] * w for name, w in weights.items())

        # Find best solution
        best = df.sort_values("strategy_score", ascending=True)
        if len(best) > 1 and len(self.tie_breakers) > 0:
            for col, d in zip(self.tie_breakers, self.tie_dirs):
                best = best.sort_values(col, ascending=(d == "min"))

        return tuple(best.iloc[0]["slices"])

    def _normalize(self, Y: pd.DataFrame, mode: Normalization) -> pd.DataFrame:
        if mode == "none":
            return Y
        if mode == "robust_minmax":
            q_lo = Y.quantile(0.05)
            q_hi = Y.quantile(0.95)
            denom = (q_hi - q_lo).replace(0, 1.0)
            return ((Y - q_lo) / denom).clip(lower=0.0)
        med = Y.median()
        iqr = (Y.quantile(0.75) - Y.quantile(0.25)).replace(0, 1.0)
        return (Y - med) / iqr

__init__

__init__(overrides: dict[str, float] | None = None, normalization: Normalization = 'none', tie_breakers: tuple[str, ...] = (), tie_dirs: tuple[ScoreComponentDirection, ...] = ()) -> None

Initialize weighted sum policy.

Parameters:

Name Type Description Default
overrides dict[str, float] | None

Optional dict mapping objective names to weights, overriding weights from ObjectiveSet.

None
normalization Normalization

How to normalize objectives before weighting: - "none": No normalization - "robust_minmax": Scale to [0, 1] using 5th-95th percentiles - "zscore_iqr": Z-score using median and IQR

'none'
tie_breakers tuple[str, ...]

Tuple of objective names to use for tie-breaking.

()
tie_dirs tuple[ScoreComponentDirection, ...]

Corresponding directions ("min" or "max") for tie-breakers.

()
Source code in energy_repset/selection_policies/weighted_sum.py
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
def __init__(
        self,
        overrides: Optional[Dict[str, float]] = None,
        normalization: Normalization = "none",
        tie_breakers: Tuple[str, ...] = (),
        tie_dirs: Tuple[ScoreComponentDirection, ...] = (),
) -> None:
    """Initialize weighted sum policy.

    Args:
        overrides: Optional dict mapping objective names to weights,
            overriding weights from ObjectiveSet.
        normalization: How to normalize objectives before weighting:
            - "none": No normalization
            - "robust_minmax": Scale to [0, 1] using 5th-95th percentiles
            - "zscore_iqr": Z-score using median and IQR
        tie_breakers: Tuple of objective names to use for tie-breaking.
        tie_dirs: Corresponding directions ("min" or "max") for tie-breakers.
    """
    self.overrides = overrides or {}
    self.normalization = normalization
    self.tie_breakers = tie_breakers
    self.tie_dirs = tie_dirs

select_best

select_best(evaluations_df: DataFrame, objective_set: ObjectiveSet) -> tuple[Hashable, ...]

Select combination with minimum weighted sum score.

Parameters:

Name Type Description Default
evaluations_df DataFrame

DataFrame with 'slices' column and objective scores.

required
objective_set ObjectiveSet

Provides component metadata (direction, weights).

required

Returns:

Type Description
tuple[Hashable, ...]

Tuple of slice identifiers with the lowest weighted sum score.

Source code in energy_repset/selection_policies/weighted_sum.py
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
def select_best(self, evaluations_df: pd.DataFrame, objective_set: ObjectiveSet) -> Tuple[Hashable, ...]:
    """Select combination with minimum weighted sum score.

    Args:
        evaluations_df: DataFrame with 'slices' column and objective scores.
        objective_set: Provides component metadata (direction, weights).

    Returns:
        Tuple of slice identifiers with the lowest weighted sum score.
    """
    df = evaluations_df.copy()
    meta = objective_set.component_meta()
    oriented = df[list(meta.keys())].copy()

    # Orient all objectives for minimization
    for name, m in meta.items():
        if m["direction"] == "max":
            oriented[name] = -oriented[name]

    # Normalize if requested
    Z = self._normalize(oriented, mode=self.normalization)

    # Compute weights (preferences from ObjectiveSet, overrides from strategy)
    weights = {name: float(m["pref"]) for name, m in meta.items()}
    for k, v in self.overrides.items():
        if k not in weights:
            raise ValueError(f"Unknown metric in overrides: {k}")
        weights[k] = float(v)

    # Compute weighted sum scores
    df["strategy_score"] = sum(Z[name] * w for name, w in weights.items())

    # Find best solution
    best = df.sort_values("strategy_score", ascending=True)
    if len(best) > 1 and len(self.tie_breakers) > 0:
        for col, d in zip(self.tie_breakers, self.tie_dirs):
            best = best.sort_values(col, ascending=(d == "min"))

    return tuple(best.iloc[0]["slices"])

ParetoMaxMinStrategy

Bases: ParetoUtopiaPolicy

Source code in energy_repset/selection_policies/pareto.py
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
class ParetoMaxMinStrategy(ParetoUtopiaPolicy):

    def select_best(self, evaluations_df: pd.DataFrame, objective_set: ObjectiveSet) -> Tuple[Hashable, ...]:
        """Select best solution using Pareto max-min approach."""
        df = evaluations_df.copy()
        dirs = self._resolve_objectives_from_meta(objective_set.component_meta(), df)
        feas = self._apply_constraints(df, self.fairness_constraints)
        df["feasible"] = feas
        Y = df[list(dirs.keys())].copy()

        # Orient all objectives for minimization
        for c, d in dirs.items():
            if d == "max":
                Y[c] = -Y[c]

        # Find Pareto front
        pareto_mask = self._pareto_mask(Y[feas])
        df["pareto"] = False
        df.loc[feas.index[feas].tolist(), "pareto"] = pareto_mask.values

        # Store masks for diagnostics
        self.pareto_mask = df["pareto"].copy()
        self.feasible_mask = df["feasible"].copy()

        # Normalize and compute max-min score
        Z = self._normalize(Y, self.normalization)
        ideal = Z[df["feasible"]].min(axis=0)
        slack = 1.0 - (Z - ideal.values)
        df["maxmin_score"] = slack.min(axis=1)

        # Select from Pareto front
        front = df[(df["feasible"]) & (df["pareto"])]
        if len(front) == 0:
            front = df[df["feasible"]] if df["feasible"].any() else df

        best = front.sort_values("maxmin_score", ascending=False)
        if len(best) > 1 and len(self.tie_breakers) > 0:
            for col, d in zip(self.tie_breakers, self.tie_dirs):
                best = best.sort_values(col, ascending=(d == "min"))

        return tuple(best.iloc[0]["slices"])

select_best

select_best(evaluations_df: DataFrame, objective_set: ObjectiveSet) -> tuple[Hashable, ...]

Select best solution using Pareto max-min approach.

Source code in energy_repset/selection_policies/pareto.py
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
def select_best(self, evaluations_df: pd.DataFrame, objective_set: ObjectiveSet) -> Tuple[Hashable, ...]:
    """Select best solution using Pareto max-min approach."""
    df = evaluations_df.copy()
    dirs = self._resolve_objectives_from_meta(objective_set.component_meta(), df)
    feas = self._apply_constraints(df, self.fairness_constraints)
    df["feasible"] = feas
    Y = df[list(dirs.keys())].copy()

    # Orient all objectives for minimization
    for c, d in dirs.items():
        if d == "max":
            Y[c] = -Y[c]

    # Find Pareto front
    pareto_mask = self._pareto_mask(Y[feas])
    df["pareto"] = False
    df.loc[feas.index[feas].tolist(), "pareto"] = pareto_mask.values

    # Store masks for diagnostics
    self.pareto_mask = df["pareto"].copy()
    self.feasible_mask = df["feasible"].copy()

    # Normalize and compute max-min score
    Z = self._normalize(Y, self.normalization)
    ideal = Z[df["feasible"]].min(axis=0)
    slack = 1.0 - (Z - ideal.values)
    df["maxmin_score"] = slack.min(axis=1)

    # Select from Pareto front
    front = df[(df["feasible"]) & (df["pareto"])]
    if len(front) == 0:
        front = df[df["feasible"]] if df["feasible"].any() else df

    best = front.sort_values("maxmin_score", ascending=False)
    if len(best) > 1 and len(self.tie_breakers) > 0:
        for col, d in zip(self.tie_breakers, self.tie_dirs):
            best = best.sort_values(col, ascending=(d == "min"))

    return tuple(best.iloc[0]["slices"])

ParetoUtopiaPolicy

Bases: SelectionPolicy

Source code in energy_repset/selection_policies/pareto.py
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
class ParetoUtopiaPolicy(SelectionPolicy):
    def __init__(
            self,
            objectives: Optional[Dict[str, ScoreComponentDirection]] = None,
            normalization: Normalization = "robust_minmax",
            fairness_constraints: Optional[Dict[str, float]] = None,
            distance: Literal["chebyshev", "euclidean"] = "chebyshev",
            tie_breakers: Tuple[str, ...] = (),
            tie_dirs: Tuple[ScoreComponentDirection, ...] = (),
            eps: float = 1e-9,
    ) -> None:
        self.objectives = objectives
        self.normalization = normalization
        self.fairness_constraints = fairness_constraints or {}
        self.distance = distance
        self.tie_breakers = tie_breakers
        self.tie_dirs = tie_dirs
        self.eps = eps
        self.pareto_mask: pd.Series | None = None
        self.feasible_mask: pd.Series | None = None

    def select_best(self, evaluations_df: pd.DataFrame, objective_set: ObjectiveSet) -> Tuple[Hashable, ...]:
        """Select best solution using Pareto utopia approach."""
        df = evaluations_df.copy()
        dirs = self._resolve_objectives_from_meta(objective_set.component_meta(), df)
        feas = self._apply_constraints(df, self.fairness_constraints)
        df["feasible"] = feas
        Y = df[list(dirs.keys())].copy()

        # Orient all objectives for minimization
        for c, d in dirs.items():
            if d == "max":
                Y[c] = -Y[c]

        # Find Pareto front
        pareto_mask = self._pareto_mask(Y[feas])
        df["pareto"] = False
        df.loc[feas.index[feas].tolist(), "pareto"] = pareto_mask.values

        # Store masks for diagnostics
        self.pareto_mask = df["pareto"].copy()
        self.feasible_mask = df["feasible"].copy()

        # Normalize and compute utopia distance
        Z = self._normalize(Y, self.normalization)
        ideal = Z[df["feasible"]].min(axis=0)
        dist = self._dist(Z, ideal, self.distance)
        df["utopia_distance"] = dist

        # Select from Pareto front
        front = df[(df["feasible"]) & (df["pareto"])]
        if len(front) == 0:
            front = df[df["feasible"]] if df["feasible"].any() else df

        best = front.sort_values("utopia_distance", ascending=True)
        if len(best) > 1 and len(self.tie_breakers) > 0:
            for col, d in zip(self.tie_breakers, self.tie_dirs):
                best = best.sort_values(col, ascending=(d == "min"))

        return tuple(best.iloc[0]["slices"])

    def _resolve_objectives(self, objective_set: ObjectiveSet, df: pd.DataFrame) -> Dict[str, ScoreComponentDirection]:
        """Legacy method for backward compatibility."""
        if self.objectives is not None:
            return self.objectives
        meta = objective_set.component_meta()
        return {name: info["direction"] for name, info in meta.items() if name in df.columns}

    def _resolve_objectives_from_meta(self, meta: Dict[str, Dict[str, any]], df: pd.DataFrame) -> Dict[str, ScoreComponentDirection]:
        """Resolve objectives from component metadata."""
        if self.objectives is not None:
            return self.objectives
        return {name: info["direction"] for name, info in meta.items() if name in df.columns}

    def _apply_constraints(self, df: pd.DataFrame, cons: Dict[str, float]) -> pd.Series:
        if not cons:
            return pd.Series(True, index=df.index)
        mask = pd.Series(True, index=df.index)
        for col, thr in cons.items():
            if col not in df.columns:
                raise ValueError(f"Unknown constraint metric: {col}")
            mask &= df[col] <= thr
        return mask

    def _normalize(self, Y: pd.DataFrame, mode: Normalization) -> pd.DataFrame:
        if mode == "robust_minmax":
            q_lo = Y.quantile(0.05)
            q_hi = Y.quantile(0.95)
            denom = (q_hi - q_lo).replace(0, 1.0)
            return ((Y - q_lo) / denom).clip(lower=0.0)
        if mode == "zscore_iqr":
            med = Y.median()
            iqr = (Y.quantile(0.75) - Y.quantile(0.25)).replace(0, 1.0)
            return (Y - med) / iqr
        return Y

    def _dist(self, Z: pd.DataFrame, ideal: pd.Series, kind: str) -> pd.Series:
        D = (Z - ideal.values)
        if kind == "chebyshev":
            return D.abs().max(axis=1)
        return np.sqrt((D.pow(2)).sum(axis=1))

    def _pareto_mask(self, Y: pd.DataFrame) -> pd.Series:
        A = Y.values
        n = A.shape[0]
        mask = np.ones(n, dtype=bool)
        for i in range(n):
            if not mask[i]:
                continue
            for j in range(n):
                if i == j:
                    continue
                if self._dominates(A[j], A[i]):
                    mask[i] = False
                    break
        return pd.Series(mask, index=Y.index)

    def _dominates(self, a: np.ndarray, b: np.ndarray) -> bool:
        return np.all(a <= b + self.eps) and np.any(a < b - self.eps)

select_best

select_best(evaluations_df: DataFrame, objective_set: ObjectiveSet) -> tuple[Hashable, ...]

Select best solution using Pareto utopia approach.

Source code in energy_repset/selection_policies/pareto.py
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
def select_best(self, evaluations_df: pd.DataFrame, objective_set: ObjectiveSet) -> Tuple[Hashable, ...]:
    """Select best solution using Pareto utopia approach."""
    df = evaluations_df.copy()
    dirs = self._resolve_objectives_from_meta(objective_set.component_meta(), df)
    feas = self._apply_constraints(df, self.fairness_constraints)
    df["feasible"] = feas
    Y = df[list(dirs.keys())].copy()

    # Orient all objectives for minimization
    for c, d in dirs.items():
        if d == "max":
            Y[c] = -Y[c]

    # Find Pareto front
    pareto_mask = self._pareto_mask(Y[feas])
    df["pareto"] = False
    df.loc[feas.index[feas].tolist(), "pareto"] = pareto_mask.values

    # Store masks for diagnostics
    self.pareto_mask = df["pareto"].copy()
    self.feasible_mask = df["feasible"].copy()

    # Normalize and compute utopia distance
    Z = self._normalize(Y, self.normalization)
    ideal = Z[df["feasible"]].min(axis=0)
    dist = self._dist(Z, ideal, self.distance)
    df["utopia_distance"] = dist

    # Select from Pareto front
    front = df[(df["feasible"]) & (df["pareto"])]
    if len(front) == 0:
        front = df[df["feasible"]] if df["feasible"].any() else df

    best = front.sort_values("utopia_distance", ascending=True)
    if len(best) > 1 and len(self.tie_breakers) > 0:
        for col, d in zip(self.tie_breakers, self.tie_dirs):
            best = best.sort_values(col, ascending=(d == "min"))

    return tuple(best.iloc[0]["slices"])

ParetoOutcome dataclass

Bases: PolicyOutcome

Source code in energy_repset/selection_policies/pareto.py
19
20
21
22
23
24
@dataclass(frozen=True)
class ParetoOutcome(PolicyOutcome):
    objectives: Dict[str, ScoreComponentDirection]
    feasible_mask_col: str
    pareto_mask_col: str
    score_col: str