Skip to content

Score Components

WassersteinFidelity

Bases: ScoreComponent

Measures distribution similarity using 1D Wasserstein distance per variable.

Computes the Earth Mover's Distance between the full dataset's distribution and the selected subset's distribution for each variable. Distances are normalized by the interquartile range (IQR) to make them scale-invariant and comparable across variables.

Lower scores indicate better distribution matching. This component is particularly effective for preserving statistical properties of the data.

Parameters:

Name Type Description Default
variable_weights Dict[str, float] | None

Optional per-variable weights for prioritizing certain variables in the score. If None, all variables weighted equally (1.0). If specified, missing variables get weight 0.0.

None

Examples:

>>> # Equal weights (default)
>>> component = WassersteinFidelity()
>>> component.prepare(context)
>>> score = component.score((0, 3, 6, 9))
>>> print(f"Wasserstein distance: {score:.3f}")
>>> # With variable-specific weights
>>> component = WassersteinFidelity(
...     variable_weights={'demand': 2.0, 'solar': 1.0, 'wind': 0.5}
... )
>>> component.prepare(context)
>>> score = component.score((0, 3, 6, 9))
>>> # demand has 2x impact, solar 1x, wind 0.5x, other variables 0x
Source code in energy_repset/score_components/wasserstein_fidelity.py
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
class WassersteinFidelity(ScoreComponent):
    """Measures distribution similarity using 1D Wasserstein distance per variable.

    Computes the Earth Mover's Distance between the full dataset's distribution
    and the selected subset's distribution for each variable. Distances are
    normalized by the interquartile range (IQR) to make them scale-invariant
    and comparable across variables.

    Lower scores indicate better distribution matching. This component is
    particularly effective for preserving statistical properties of the data.

    Args:
        variable_weights: Optional per-variable weights for prioritizing certain
            variables in the score. If None, all variables weighted equally (1.0).
            If specified, missing variables get weight 0.0.

    Examples:
        >>> # Equal weights (default)
        >>> component = WassersteinFidelity()
        >>> component.prepare(context)
        >>> score = component.score((0, 3, 6, 9))
        >>> print(f"Wasserstein distance: {score:.3f}")

        >>> # With variable-specific weights
        >>> component = WassersteinFidelity(
        ...     variable_weights={'demand': 2.0, 'solar': 1.0, 'wind': 0.5}
        ... )
        >>> component.prepare(context)
        >>> score = component.score((0, 3, 6, 9))
        >>> # demand has 2x impact, solar 1x, wind 0.5x, other variables 0x
    """

    def __init__(self, variable_weights: Dict[str, float] | None = None) -> None:
        """Initialize Wasserstein fidelity component.

        Args:
            variable_weights: Optional per-variable weights. If None, all
                variables weighted equally (1.0). If specified, missing
                variables get weight 0.0.
        """
        self.name = "wasserstein"
        self.direction = "min"
        self._requested_weights = variable_weights

        self.df: pd.DataFrame = None
        self.labels = None
        self.vars = None
        self.iqr = None
        self.variable_weights: Dict[str, float] = None

    def prepare(self, context: ProblemContext) -> None:
        """Precompute reference distributions and normalization factors.

        Args:
            context: Problem context with raw time-series data.
        """
        df = context.df_raw.copy()
        slicer = context.slicer

        self.df = df
        self.labels = slicer.labels_for_index(df.index)
        self.vars = list(df.columns)
        self.iqr = (df.quantile(0.75) - df.quantile(0.25)).replace(0, 1.0)

        self.variable_weights = self._default_weight_normalization(self._requested_weights, self.vars)

    def score(self, combination: SliceCombination) -> float:
        """Compute normalized Wasserstein distance between full and selection.

        Args:
            combination: Slice identifiers forming the selection.

        Returns:
            Sum of per-variable Wasserstein distances, each normalized by IQR
            and weighted according to variable_weights. Lower is better.
        """
        sel_mask = pd.Index(self.labels).isin(combination)
        sel = self.df.loc[sel_mask]
        s = 0.0
        for v in self.vars:
            s += self.variable_weights[v] * (wasserstein_distance(self.df[v].values, sel[v].values) / float(self.iqr[v]))
        return float(s)

__init__

__init__(variable_weights: Dict[str, float] | None = None) -> None

Initialize Wasserstein fidelity component.

Parameters:

Name Type Description Default
variable_weights Dict[str, float] | None

Optional per-variable weights. If None, all variables weighted equally (1.0). If specified, missing variables get weight 0.0.

None
Source code in energy_repset/score_components/wasserstein_fidelity.py
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
def __init__(self, variable_weights: Dict[str, float] | None = None) -> None:
    """Initialize Wasserstein fidelity component.

    Args:
        variable_weights: Optional per-variable weights. If None, all
            variables weighted equally (1.0). If specified, missing
            variables get weight 0.0.
    """
    self.name = "wasserstein"
    self.direction = "min"
    self._requested_weights = variable_weights

    self.df: pd.DataFrame = None
    self.labels = None
    self.vars = None
    self.iqr = None
    self.variable_weights: Dict[str, float] = None

prepare

prepare(context: ProblemContext) -> None

Precompute reference distributions and normalization factors.

Parameters:

Name Type Description Default
context ProblemContext

Problem context with raw time-series data.

required
Source code in energy_repset/score_components/wasserstein_fidelity.py
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
def prepare(self, context: ProblemContext) -> None:
    """Precompute reference distributions and normalization factors.

    Args:
        context: Problem context with raw time-series data.
    """
    df = context.df_raw.copy()
    slicer = context.slicer

    self.df = df
    self.labels = slicer.labels_for_index(df.index)
    self.vars = list(df.columns)
    self.iqr = (df.quantile(0.75) - df.quantile(0.25)).replace(0, 1.0)

    self.variable_weights = self._default_weight_normalization(self._requested_weights, self.vars)

score

score(combination: SliceCombination) -> float

Compute normalized Wasserstein distance between full and selection.

Parameters:

Name Type Description Default
combination SliceCombination

Slice identifiers forming the selection.

required

Returns:

Type Description
float

Sum of per-variable Wasserstein distances, each normalized by IQR

float

and weighted according to variable_weights. Lower is better.

Source code in energy_repset/score_components/wasserstein_fidelity.py
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
def score(self, combination: SliceCombination) -> float:
    """Compute normalized Wasserstein distance between full and selection.

    Args:
        combination: Slice identifiers forming the selection.

    Returns:
        Sum of per-variable Wasserstein distances, each normalized by IQR
        and weighted according to variable_weights. Lower is better.
    """
    sel_mask = pd.Index(self.labels).isin(combination)
    sel = self.df.loc[sel_mask]
    s = 0.0
    for v in self.vars:
        s += self.variable_weights[v] * (wasserstein_distance(self.df[v].values, sel[v].values) / float(self.iqr[v]))
    return float(s)

CorrelationFidelity

Bases: ScoreComponent

Preserves cross-variable correlation structure using Frobenius norm.

Measures how well the selection preserves the correlation structure between variables by comparing the full dataset's correlation matrix with the selection's correlation matrix. Uses relative Frobenius norm of the difference matrix.

Lower scores indicate better preservation of variable relationships. This component is important for downstream modeling tasks that depend on realistic co-occurrence patterns (e.g., solar and wind generation).

Examples:

>>> component = CorrelationFidelity()
>>> component.prepare(context)
>>> score = component.score((0, 3, 6, 9))
>>> print(f"Correlation mismatch: {score:.3f}")
# 0.0 would be perfect preservation, 1.0+ indicates poor preservation
>>> # Combine with Wasserstein in an ObjectiveSet
>>> from energy_repset import ObjectiveSet, ObjectiveSpec
>>> objectives = ObjectiveSet([
...     ObjectiveSpec('wasserstein', WassersteinFidelity(), weight=1.0),
...     ObjectiveSpec('correlation', CorrelationFidelity(), weight=1.0)
... ])
Source code in energy_repset/score_components/correlation_fidelity.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
class CorrelationFidelity(ScoreComponent):
    """Preserves cross-variable correlation structure using Frobenius norm.

    Measures how well the selection preserves the correlation structure between
    variables by comparing the full dataset's correlation matrix with the
    selection's correlation matrix. Uses relative Frobenius norm of the
    difference matrix.

    Lower scores indicate better preservation of variable relationships. This
    component is important for downstream modeling tasks that depend on
    realistic co-occurrence patterns (e.g., solar and wind generation).

    Examples:
        >>> component = CorrelationFidelity()
        >>> component.prepare(context)
        >>> score = component.score((0, 3, 6, 9))
        >>> print(f"Correlation mismatch: {score:.3f}")
        # 0.0 would be perfect preservation, 1.0+ indicates poor preservation

        >>> # Combine with Wasserstein in an ObjectiveSet
        >>> from energy_repset import ObjectiveSet, ObjectiveSpec
        >>> objectives = ObjectiveSet([
        ...     ObjectiveSpec('wasserstein', WassersteinFidelity(), weight=1.0),
        ...     ObjectiveSpec('correlation', CorrelationFidelity(), weight=1.0)
        ... ])
    """

    def __init__(self) -> None:
        """Initialize correlation fidelity component."""
        self.name = "correlation"
        self.direction = "min"

    def prepare(self, context: ProblemContext) -> None:
        """Precompute full dataset's correlation matrix.

        Args:
            context: Problem context with raw time-series data.
        """
        df = context.df_raw.copy()
        slicer = context.slicer
        self.df = df
        self.labels = slicer.labels_for_index(df.index)
        self.full_corr = df.corr()


    def score(self, combination: SliceCombination) -> float:
        """Compute relative Frobenius norm of correlation matrix difference.

        Args:
            combination: Slice identifiers forming the selection.

        Returns:
            Relative Frobenius norm ||C_full - C_sel||_F / ||C_full||_F where
            C denotes correlation matrices. Lower is better (0 = perfect match).
        """
        sel = self.df.loc[pd.Index(self.labels).isin(combination)]
        diff = self.full_corr - sel.corr()
        num = float(np.linalg.norm(diff.values, ord="fro"))
        den = float(np.linalg.norm(self.full_corr.values, ord="fro")) + 1e-12
        return num / den

__init__

__init__() -> None

Initialize correlation fidelity component.

Source code in energy_repset/score_components/correlation_fidelity.py
42
43
44
45
def __init__(self) -> None:
    """Initialize correlation fidelity component."""
    self.name = "correlation"
    self.direction = "min"

prepare

prepare(context: ProblemContext) -> None

Precompute full dataset's correlation matrix.

Parameters:

Name Type Description Default
context ProblemContext

Problem context with raw time-series data.

required
Source code in energy_repset/score_components/correlation_fidelity.py
47
48
49
50
51
52
53
54
55
56
57
def prepare(self, context: ProblemContext) -> None:
    """Precompute full dataset's correlation matrix.

    Args:
        context: Problem context with raw time-series data.
    """
    df = context.df_raw.copy()
    slicer = context.slicer
    self.df = df
    self.labels = slicer.labels_for_index(df.index)
    self.full_corr = df.corr()

score

score(combination: SliceCombination) -> float

Compute relative Frobenius norm of correlation matrix difference.

Parameters:

Name Type Description Default
combination SliceCombination

Slice identifiers forming the selection.

required

Returns:

Type Description
float

Relative Frobenius norm ||C_full - C_sel||_F / ||C_full||_F where

float

C denotes correlation matrices. Lower is better (0 = perfect match).

Source code in energy_repset/score_components/correlation_fidelity.py
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
def score(self, combination: SliceCombination) -> float:
    """Compute relative Frobenius norm of correlation matrix difference.

    Args:
        combination: Slice identifiers forming the selection.

    Returns:
        Relative Frobenius norm ||C_full - C_sel||_F / ||C_full||_F where
        C denotes correlation matrices. Lower is better (0 = perfect match).
    """
    sel = self.df.loc[pd.Index(self.labels).isin(combination)]
    diff = self.full_corr - sel.corr()
    num = float(np.linalg.norm(diff.values, ord="fro"))
    den = float(np.linalg.norm(self.full_corr.values, ord="fro")) + 1e-12
    return num / den

DiurnalFidelity

Bases: ScoreComponent

Measures how well the selection preserves hourly (diurnal) patterns.

Compares the mean hourly profiles between the full dataset and the selected subset. This is useful for applications where intraday patterns matter (e.g., electricity demand profiles, solar generation curves).

The score is the normalized mean squared error between the full and selected hour-of-day profiles, averaged across all variables and hours.

Examples:

>>> from energy_repset.score_components import DiurnalFidelity
>>> from energy_repset.objectives import ObjectiveSet
>>>
>>> # Add diurnal fidelity to your objective set
>>> objectives = ObjectiveSet({
...     'diurnal': (1.0, DiurnalFidelity())
... })
>>>
>>> # For hourly data, this ensures selected periods
>>> # preserve the typical daily load shape
Source code in energy_repset/score_components/diurnal_fidelity.py
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
class DiurnalFidelity(ScoreComponent):
    """Measures how well the selection preserves hourly (diurnal) patterns.

    Compares the mean hourly profiles between the full dataset and the
    selected subset. This is useful for applications where intraday patterns
    matter (e.g., electricity demand profiles, solar generation curves).

    The score is the normalized mean squared error between the full and
    selected hour-of-day profiles, averaged across all variables and hours.

    Examples:
        >>> from energy_repset.score_components import DiurnalFidelity
        >>> from energy_repset.objectives import ObjectiveSet
        >>>
        >>> # Add diurnal fidelity to your objective set
        >>> objectives = ObjectiveSet({
        ...     'diurnal': (1.0, DiurnalFidelity())
        ... })
        >>>
        >>> # For hourly data, this ensures selected periods
        >>> # preserve the typical daily load shape
    """

    def __init__(self) -> None:
        """Initialize diurnal fidelity component."""
        self.name = "diurnal"
        self.direction = "min"

    def prepare(self, context: ProblemContext) -> None:
        """Precompute the full dataset's mean hourly profile.

        Args:
            context: Problem context containing raw time-series data.
        """
        df = context.df_raw.copy()
        slicer = context.slicer
        self.df = df
        self.labels = slicer.labels_for_index(df.index)
        self.full = df.groupby(df.index.hour).mean(numeric_only=True)

    def score(self, combination: SliceCombination) -> float:
        """Compute normalized MSE between full and selection diurnal profiles.

        Args:
            combination: Tuple of slice identifiers forming the selection.

        Returns:
            Normalized mean squared error across all variables and hours.
            Lower values indicate better preservation of diurnal patterns.
        """
        sel = self.df.loc[pd.Index(self.labels).isin(combination)]
        sub = sel.groupby(sel.index.hour).mean(numeric_only=True)
        a, b = self.full.align(sub, join="inner", axis=0)
        num = float(((a - b).pow(2)).mean().mean())
        den = float(a.pow(2).mean().mean()) + 1e-12
        return num / den

__init__

__init__() -> None

Initialize diurnal fidelity component.

Source code in energy_repset/score_components/diurnal_fidelity.py
37
38
39
40
def __init__(self) -> None:
    """Initialize diurnal fidelity component."""
    self.name = "diurnal"
    self.direction = "min"

prepare

prepare(context: ProblemContext) -> None

Precompute the full dataset's mean hourly profile.

Parameters:

Name Type Description Default
context ProblemContext

Problem context containing raw time-series data.

required
Source code in energy_repset/score_components/diurnal_fidelity.py
42
43
44
45
46
47
48
49
50
51
52
def prepare(self, context: ProblemContext) -> None:
    """Precompute the full dataset's mean hourly profile.

    Args:
        context: Problem context containing raw time-series data.
    """
    df = context.df_raw.copy()
    slicer = context.slicer
    self.df = df
    self.labels = slicer.labels_for_index(df.index)
    self.full = df.groupby(df.index.hour).mean(numeric_only=True)

score

score(combination: SliceCombination) -> float

Compute normalized MSE between full and selection diurnal profiles.

Parameters:

Name Type Description Default
combination SliceCombination

Tuple of slice identifiers forming the selection.

required

Returns:

Type Description
float

Normalized mean squared error across all variables and hours.

float

Lower values indicate better preservation of diurnal patterns.

Source code in energy_repset/score_components/diurnal_fidelity.py
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
def score(self, combination: SliceCombination) -> float:
    """Compute normalized MSE between full and selection diurnal profiles.

    Args:
        combination: Tuple of slice identifiers forming the selection.

    Returns:
        Normalized mean squared error across all variables and hours.
        Lower values indicate better preservation of diurnal patterns.
    """
    sel = self.df.loc[pd.Index(self.labels).isin(combination)]
    sub = sel.groupby(sel.index.hour).mean(numeric_only=True)
    a, b = self.full.align(sub, join="inner", axis=0)
    num = float(((a - b).pow(2)).mean().mean())
    den = float(a.pow(2).mean().mean()) + 1e-12
    return num / den

DurationCurveFidelity

Bases: ScoreComponent

Matches duration curves using quantile approximation and IQR normalization.

Measures how well the selection preserves the statistical distribution of each variable by comparing quantiles of the full and selected data. This is more computationally efficient than NRMSEFidelity for large datasets since it compares a fixed number of quantiles rather than full sorted arrays.

Uses IQR (interquartile range) normalization instead of mean normalization, making it more robust to outliers.

Parameters:

Name Type Description Default
n_quantiles int

Number of quantiles to compute for duration curve approximation. Default is 101 (0%, 1%, ..., 100%).

101
variable_weights Dict[str, float] | None

Optional per-variable weights for prioritizing certain variables in the score. If None, all variables weighted equally (1.0). If specified, missing variables get weight 0.0.

None

Examples:

>>> from energy_repset.score_components import DurationCurveFidelity
>>> from energy_repset.objectives import ObjectiveSet
>>>
>>> # Default: 101 quantiles (0%, 1%, ..., 100%)
>>> objectives = ObjectiveSet({
...     'duration': (1.0, DurationCurveFidelity())
... })
>>>
>>> # Coarser approximation for faster computation
>>> objectives = ObjectiveSet({
...     'duration': (1.0, DurationCurveFidelity(n_quantiles=21))
... })
>>>
>>> # With variable weights for prioritizing specific variables
>>> objectives = ObjectiveSet({
...     'duration': (1.0, DurationCurveFidelity(
...         n_quantiles=101,
...         variable_weights={'demand': 2.0, 'solar': 1.0, 'wind': 0.5}
...     ))
... })
>>> # demand has 2x impact, solar 1x, wind 0.5x, other variables 0x
Source code in energy_repset/score_components/duration_curve_fidelity.py
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
class DurationCurveFidelity(ScoreComponent):
    """Matches duration curves using quantile approximation and IQR normalization.

    Measures how well the selection preserves the statistical distribution
    of each variable by comparing quantiles of the full and selected data.
    This is more computationally efficient than NRMSEFidelity for large
    datasets since it compares a fixed number of quantiles rather than
    full sorted arrays.

    Uses IQR (interquartile range) normalization instead of mean normalization,
    making it more robust to outliers.

    Args:
        n_quantiles: Number of quantiles to compute for duration curve
            approximation. Default is 101 (0%, 1%, ..., 100%).
        variable_weights: Optional per-variable weights for prioritizing certain
            variables in the score. If None, all variables weighted equally (1.0).
            If specified, missing variables get weight 0.0.

    Examples:
        >>> from energy_repset.score_components import DurationCurveFidelity
        >>> from energy_repset.objectives import ObjectiveSet
        >>>
        >>> # Default: 101 quantiles (0%, 1%, ..., 100%)
        >>> objectives = ObjectiveSet({
        ...     'duration': (1.0, DurationCurveFidelity())
        ... })
        >>>
        >>> # Coarser approximation for faster computation
        >>> objectives = ObjectiveSet({
        ...     'duration': (1.0, DurationCurveFidelity(n_quantiles=21))
        ... })
        >>>
        >>> # With variable weights for prioritizing specific variables
        >>> objectives = ObjectiveSet({
        ...     'duration': (1.0, DurationCurveFidelity(
        ...         n_quantiles=101,
        ...         variable_weights={'demand': 2.0, 'solar': 1.0, 'wind': 0.5}
        ...     ))
        ... })
        >>> # demand has 2x impact, solar 1x, wind 0.5x, other variables 0x
    """

    def __init__(
        self,
        n_quantiles: int = 101,
        variable_weights: Dict[str, float] | None = None
    ) -> None:
        """Initialize duration curve fidelity component.

        Args:
            n_quantiles: Number of quantiles for duration curve approximation.
            variable_weights: Optional per-variable weights. If None, all
                variables weighted equally (1.0). If specified, missing
                variables get weight 0.0.
        """
        self.name = "nrmse_duration_curve"
        self.direction = "min"
        self.n_quantiles = n_quantiles
        self._requested_weights = variable_weights
        self.variable_weights: Dict[str, float] = None

    def prepare(self, context: ProblemContext) -> None:
        """Precompute quantiles and normalization factors for full dataset.

        Args:
            context: Problem context containing raw time-series data.
        """
        df = context.df_raw
        self.df = df
        self.labels = context.slicer.labels_for_index(df.index)
        self.vars = list(df.columns)

        self.variable_weights = self._default_weight_normalization(self._requested_weights, self.vars)

        self.quantiles = np.linspace(0, 1, self.n_quantiles)
        self.full_quantiles = self.df.quantile(self.quantiles)
        self.iqr = (df.quantile(0.75) - df.quantile(0.25)).replace(0, 1.0)

    def score(self, combination: SliceCombination) -> float:
        """Compute sum of per-variable NRMSE for quantile-based duration curves.

        Args:
            combination: Tuple of slice identifiers forming the selection.

        Returns:
            Weighted sum of per-variable NRMSE values using IQR normalization.
            Returns infinity if the selection is empty.
        """
        sel_mask = pd.Index(self.labels).isin(combination)
        if not sel_mask.any():
            return np.inf
        sel = self.df.loc[sel_mask]

        sel_quantiles = sel.quantile(self.quantiles)

        total_nrmse = 0.0
        for v in self.vars:
            squared_errors = (self.full_quantiles[v].values - sel_quantiles[v].values) ** 2
            rmse = np.sqrt(squared_errors.mean())
            total_nrmse += self.variable_weights[v] * (rmse / float(self.iqr[v]))

        return float(total_nrmse)

__init__

__init__(n_quantiles: int = 101, variable_weights: Dict[str, float] | None = None) -> None

Initialize duration curve fidelity component.

Parameters:

Name Type Description Default
n_quantiles int

Number of quantiles for duration curve approximation.

101
variable_weights Dict[str, float] | None

Optional per-variable weights. If None, all variables weighted equally (1.0). If specified, missing variables get weight 0.0.

None
Source code in energy_repset/score_components/duration_curve_fidelity.py
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
def __init__(
    self,
    n_quantiles: int = 101,
    variable_weights: Dict[str, float] | None = None
) -> None:
    """Initialize duration curve fidelity component.

    Args:
        n_quantiles: Number of quantiles for duration curve approximation.
        variable_weights: Optional per-variable weights. If None, all
            variables weighted equally (1.0). If specified, missing
            variables get weight 0.0.
    """
    self.name = "nrmse_duration_curve"
    self.direction = "min"
    self.n_quantiles = n_quantiles
    self._requested_weights = variable_weights
    self.variable_weights: Dict[str, float] = None

prepare

prepare(context: ProblemContext) -> None

Precompute quantiles and normalization factors for full dataset.

Parameters:

Name Type Description Default
context ProblemContext

Problem context containing raw time-series data.

required
Source code in energy_repset/score_components/duration_curve_fidelity.py
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
def prepare(self, context: ProblemContext) -> None:
    """Precompute quantiles and normalization factors for full dataset.

    Args:
        context: Problem context containing raw time-series data.
    """
    df = context.df_raw
    self.df = df
    self.labels = context.slicer.labels_for_index(df.index)
    self.vars = list(df.columns)

    self.variable_weights = self._default_weight_normalization(self._requested_weights, self.vars)

    self.quantiles = np.linspace(0, 1, self.n_quantiles)
    self.full_quantiles = self.df.quantile(self.quantiles)
    self.iqr = (df.quantile(0.75) - df.quantile(0.25)).replace(0, 1.0)

score

score(combination: SliceCombination) -> float

Compute sum of per-variable NRMSE for quantile-based duration curves.

Parameters:

Name Type Description Default
combination SliceCombination

Tuple of slice identifiers forming the selection.

required

Returns:

Type Description
float

Weighted sum of per-variable NRMSE values using IQR normalization.

float

Returns infinity if the selection is empty.

Source code in energy_repset/score_components/duration_curve_fidelity.py
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
def score(self, combination: SliceCombination) -> float:
    """Compute sum of per-variable NRMSE for quantile-based duration curves.

    Args:
        combination: Tuple of slice identifiers forming the selection.

    Returns:
        Weighted sum of per-variable NRMSE values using IQR normalization.
        Returns infinity if the selection is empty.
    """
    sel_mask = pd.Index(self.labels).isin(combination)
    if not sel_mask.any():
        return np.inf
    sel = self.df.loc[sel_mask]

    sel_quantiles = sel.quantile(self.quantiles)

    total_nrmse = 0.0
    for v in self.vars:
        squared_errors = (self.full_quantiles[v].values - sel_quantiles[v].values) ** 2
        rmse = np.sqrt(squared_errors.mean())
        total_nrmse += self.variable_weights[v] * (rmse / float(self.iqr[v]))

    return float(total_nrmse)

NRMSEFidelity

Bases: ScoreComponent

Matches duration curves using interpolation and NRMSE.

Measures how well the selection preserves the statistical distribution of each variable by comparing full and selected duration curves (sorted value profiles). The selection's duration curve is interpolated to match the full curve's length, then NRMSE is computed.

This approach uses the full sorted arrays and is accurate but can be computationally expensive for very large datasets. For efficiency with large data, consider DurationCurveFidelity which uses quantiles.

Parameters:

Name Type Description Default
variable_weights Dict[str, float] | None

Optional per-variable weights for prioritizing certain variables in the score. If None, all variables weighted equally (1.0). If specified, missing variables get weight 0.0.

None

Examples:

>>> from energy_repset.score_components import NRMSEFidelity
>>> from energy_repset.objectives import ObjectiveSet
>>>
>>> # Basic usage with equal variable weights
>>> objectives = ObjectiveSet({
...     'nrmse': (1.0, NRMSEFidelity())
... })
>>>
>>> # Prioritize specific variables
>>> objectives = ObjectiveSet({
...     'nrmse': (1.0, NRMSEFidelity(
...         variable_weights={'demand': 2.0, 'solar': 1.0, 'wind': 0.5}
...     ))
... })
>>> # demand has 2x impact, solar 1x, wind 0.5x, other variables 0x
Source code in energy_repset/score_components/nrmse_fidelity.py
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
class NRMSEFidelity(ScoreComponent):
    """Matches duration curves using interpolation and NRMSE.

    Measures how well the selection preserves the statistical distribution
    of each variable by comparing full and selected duration curves (sorted
    value profiles). The selection's duration curve is interpolated to match
    the full curve's length, then NRMSE is computed.

    This approach uses the full sorted arrays and is accurate but can be
    computationally expensive for very large datasets. For efficiency with
    large data, consider DurationCurveFidelity which uses quantiles.

    Args:
        variable_weights: Optional per-variable weights for prioritizing certain
            variables in the score. If None, all variables weighted equally (1.0).
            If specified, missing variables get weight 0.0.

    Examples:
        >>> from energy_repset.score_components import NRMSEFidelity
        >>> from energy_repset.objectives import ObjectiveSet
        >>>
        >>> # Basic usage with equal variable weights
        >>> objectives = ObjectiveSet({
        ...     'nrmse': (1.0, NRMSEFidelity())
        ... })
        >>>
        >>> # Prioritize specific variables
        >>> objectives = ObjectiveSet({
        ...     'nrmse': (1.0, NRMSEFidelity(
        ...         variable_weights={'demand': 2.0, 'solar': 1.0, 'wind': 0.5}
        ...     ))
        ... })
        >>> # demand has 2x impact, solar 1x, wind 0.5x, other variables 0x
    """

    def __init__(self, variable_weights: Dict[str, float] | None = None) -> None:
        """Initialize NRMSE fidelity component.

        Args:
            variable_weights: Optional per-variable weights. If None, all
                variables weighted equally (1.0). If specified, missing
                variables get weight 0.0.
        """
        self.name = "nrmse"
        self.direction = "min"
        self._requested_weights = variable_weights
        self.variable_weights: Dict[str, float] = None

    def prepare(self, context: ProblemContext) -> None:
        """Precompute full duration curves and normalization factors.

        Args:
            context: Problem context containing raw time-series data.
        """
        df = context.df_raw
        self.df = df
        self.labels = context.slicer.labels_for_index(df.index)
        self.vars = list(df.columns)

        self.variable_weights = self._default_weight_normalization(self._requested_weights, self.vars)

        self.full_curves = {
            v: np.sort(df[v].values)[::-1] for v in self.vars
        }
        self.full_means = {
            v: np.mean(df[v].values) for v in self.vars
        }

    def score(self, combination: SliceCombination) -> float:
        """Compute sum of per-variable NRMSE for duration curves.

        Args:
            combination: Tuple of slice identifiers forming the selection.

        Returns:
            Weighted sum of per-variable NRMSE values. Returns infinity
            if the selection is empty.
        """
        sel_mask = pd.Index(self.labels).isin(combination)
        if not sel_mask.any():
            return np.inf

        sel = self.df.loc[sel_mask]
        s = 0.0

        for v in self.vars:
            full_curve = self.full_curves[v]
            sel_curve = np.sort(sel[v].values)[::-1]

            if len(sel_curve) == 0:
                continue

            # Interpolate selection's duration curve to match full length
            x_full = np.linspace(0, 1, len(full_curve))
            x_sel = np.linspace(0, 1, len(sel_curve))
            resampled_sel_curve = np.interp(x_full, x_sel, sel_curve)

            # Calculate RMSE
            mse = np.mean((full_curve - resampled_sel_curve) ** 2)
            rmse = np.sqrt(mse)

            # Normalize by mean
            mean_val = self.full_means[v]
            nrmse = rmse / (mean_val + 1e-12)

            s += self.variable_weights[v] * nrmse

        return float(s)

__init__

__init__(variable_weights: Dict[str, float] | None = None) -> None

Initialize NRMSE fidelity component.

Parameters:

Name Type Description Default
variable_weights Dict[str, float] | None

Optional per-variable weights. If None, all variables weighted equally (1.0). If specified, missing variables get weight 0.0.

None
Source code in energy_repset/score_components/nrmse_fidelity.py
50
51
52
53
54
55
56
57
58
59
60
61
def __init__(self, variable_weights: Dict[str, float] | None = None) -> None:
    """Initialize NRMSE fidelity component.

    Args:
        variable_weights: Optional per-variable weights. If None, all
            variables weighted equally (1.0). If specified, missing
            variables get weight 0.0.
    """
    self.name = "nrmse"
    self.direction = "min"
    self._requested_weights = variable_weights
    self.variable_weights: Dict[str, float] = None

prepare

prepare(context: ProblemContext) -> None

Precompute full duration curves and normalization factors.

Parameters:

Name Type Description Default
context ProblemContext

Problem context containing raw time-series data.

required
Source code in energy_repset/score_components/nrmse_fidelity.py
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
def prepare(self, context: ProblemContext) -> None:
    """Precompute full duration curves and normalization factors.

    Args:
        context: Problem context containing raw time-series data.
    """
    df = context.df_raw
    self.df = df
    self.labels = context.slicer.labels_for_index(df.index)
    self.vars = list(df.columns)

    self.variable_weights = self._default_weight_normalization(self._requested_weights, self.vars)

    self.full_curves = {
        v: np.sort(df[v].values)[::-1] for v in self.vars
    }
    self.full_means = {
        v: np.mean(df[v].values) for v in self.vars
    }

score

score(combination: SliceCombination) -> float

Compute sum of per-variable NRMSE for duration curves.

Parameters:

Name Type Description Default
combination SliceCombination

Tuple of slice identifiers forming the selection.

required

Returns:

Type Description
float

Weighted sum of per-variable NRMSE values. Returns infinity

float

if the selection is empty.

Source code in energy_repset/score_components/nrmse_fidelity.py
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
def score(self, combination: SliceCombination) -> float:
    """Compute sum of per-variable NRMSE for duration curves.

    Args:
        combination: Tuple of slice identifiers forming the selection.

    Returns:
        Weighted sum of per-variable NRMSE values. Returns infinity
        if the selection is empty.
    """
    sel_mask = pd.Index(self.labels).isin(combination)
    if not sel_mask.any():
        return np.inf

    sel = self.df.loc[sel_mask]
    s = 0.0

    for v in self.vars:
        full_curve = self.full_curves[v]
        sel_curve = np.sort(sel[v].values)[::-1]

        if len(sel_curve) == 0:
            continue

        # Interpolate selection's duration curve to match full length
        x_full = np.linspace(0, 1, len(full_curve))
        x_sel = np.linspace(0, 1, len(sel_curve))
        resampled_sel_curve = np.interp(x_full, x_sel, sel_curve)

        # Calculate RMSE
        mse = np.mean((full_curve - resampled_sel_curve) ** 2)
        rmse = np.sqrt(mse)

        # Normalize by mean
        mean_val = self.full_means[v]
        nrmse = rmse / (mean_val + 1e-12)

        s += self.variable_weights[v] * nrmse

    return float(s)

DTWFidelity

Bases: ScoreComponent

Measures representation quality using Dynamic Time Warping distance.

Computes the average DTW distance from each unselected slice to its nearest representative in the selection. This is analogous to inertia in k-medoids clustering but uses DTW instead of Euclidean distance.

DTW allows temporal alignment, making it suitable for time-series where similar patterns may be shifted in time (e.g., seasonal load profiles with varying peak times).

Requires the tslearn package for DTW computation.

Examples:

>>> from energy_repset.score_components import DTWFidelity
>>> from energy_repset.objectives import ObjectiveSet
>>>
>>> # Use DTW for time-series with temporal shifts
>>> objectives = ObjectiveSet({
...     'dtw': (1.0, DTWFidelity())
... })
>>>
>>> # Good for multi-day periods with similar but shifted patterns
>>> # e.g., weeks with similar load but peak occurring at different times
Note

This requires tslearn to be installed: pip install tslearn

Source code in energy_repset/score_components/dtw_fidelity.py
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
class DTWFidelity(ScoreComponent):
    """Measures representation quality using Dynamic Time Warping distance.

    Computes the average DTW distance from each unselected slice to its
    nearest representative in the selection. This is analogous to inertia
    in k-medoids clustering but uses DTW instead of Euclidean distance.

    DTW allows temporal alignment, making it suitable for time-series where
    similar patterns may be shifted in time (e.g., seasonal load profiles
    with varying peak times).

    Requires the `tslearn` package for DTW computation.

    Examples:
        >>> from energy_repset.score_components import DTWFidelity
        >>> from energy_repset.objectives import ObjectiveSet
        >>>
        >>> # Use DTW for time-series with temporal shifts
        >>> objectives = ObjectiveSet({
        ...     'dtw': (1.0, DTWFidelity())
        ... })
        >>>
        >>> # Good for multi-day periods with similar but shifted patterns
        >>> # e.g., weeks with similar load but peak occurring at different times

    Note:
        This requires `tslearn` to be installed:
            pip install tslearn
    """

    def __init__(self) -> None:
        """Initialize DTW fidelity component."""
        self.name = "dtw"
        self.direction = "min"

    def prepare(self, context: ProblemContext) -> None:
        """Precompute per-slice time-series data.

        Args:
            context: Problem context containing raw time-series data.
        """
        df = context.df_raw
        self.slices = {
            label: group.values
            for label, group in df.groupby(context.slicer.labels_for_index(df.index))
        }
        self.all_labels = set(self.slices.keys())

    def score(self, combination: SliceCombination) -> float:
        """Compute average DTW distance from unselected to selected slices.

        Args:
            combination: Tuple of slice identifiers forming the selection.

        Returns:
            Average DTW distance from each unselected slice to its nearest
            representative. Returns 0.0 if all slices are selected or
            none are selected.

        Raises:
            ImportError: If tslearn is not installed.
        """
        from tslearn.metrics import dtw
        selected_labels = set(combination)
        unselected_labels = self.all_labels - selected_labels

        if not selected_labels or not unselected_labels:
            return 0.0

        selected_series = [self.slices[lbl] for lbl in selected_labels]
        total_dist = 0.0

        for lbl in unselected_labels:
            unselected_series = self.slices[lbl]
            min_dist = np.inf
            for sel_series in selected_series:
                dist = dtw(unselected_series, sel_series)
                if dist < min_dist:
                    min_dist = dist
            total_dist += min_dist

        return total_dist / len(unselected_labels)

__init__

__init__() -> None

Initialize DTW fidelity component.

Source code in energy_repset/score_components/dtw_fidelity.py
44
45
46
47
def __init__(self) -> None:
    """Initialize DTW fidelity component."""
    self.name = "dtw"
    self.direction = "min"

prepare

prepare(context: ProblemContext) -> None

Precompute per-slice time-series data.

Parameters:

Name Type Description Default
context ProblemContext

Problem context containing raw time-series data.

required
Source code in energy_repset/score_components/dtw_fidelity.py
49
50
51
52
53
54
55
56
57
58
59
60
def prepare(self, context: ProblemContext) -> None:
    """Precompute per-slice time-series data.

    Args:
        context: Problem context containing raw time-series data.
    """
    df = context.df_raw
    self.slices = {
        label: group.values
        for label, group in df.groupby(context.slicer.labels_for_index(df.index))
    }
    self.all_labels = set(self.slices.keys())

score

score(combination: SliceCombination) -> float

Compute average DTW distance from unselected to selected slices.

Parameters:

Name Type Description Default
combination SliceCombination

Tuple of slice identifiers forming the selection.

required

Returns:

Type Description
float

Average DTW distance from each unselected slice to its nearest

float

representative. Returns 0.0 if all slices are selected or

float

none are selected.

Raises:

Type Description
ImportError

If tslearn is not installed.

Source code in energy_repset/score_components/dtw_fidelity.py
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
def score(self, combination: SliceCombination) -> float:
    """Compute average DTW distance from unselected to selected slices.

    Args:
        combination: Tuple of slice identifiers forming the selection.

    Returns:
        Average DTW distance from each unselected slice to its nearest
        representative. Returns 0.0 if all slices are selected or
        none are selected.

    Raises:
        ImportError: If tslearn is not installed.
    """
    from tslearn.metrics import dtw
    selected_labels = set(combination)
    unselected_labels = self.all_labels - selected_labels

    if not selected_labels or not unselected_labels:
        return 0.0

    selected_series = [self.slices[lbl] for lbl in selected_labels]
    total_dist = 0.0

    for lbl in unselected_labels:
        unselected_series = self.slices[lbl]
        min_dist = np.inf
        for sel_series in selected_series:
            dist = dtw(unselected_series, sel_series)
            if dist < min_dist:
                min_dist = dist
        total_dist += min_dist

    return total_dist / len(unselected_labels)

DiurnalDTWFidelity

Bases: ScoreComponent

Preserves hourly patterns using Dynamic Time Warping on diurnal profiles.

Combines the concepts of DiurnalFidelity and DTWFidelity: compares hour-of-day aggregated profiles between full and selected data, but uses DTW distance instead of MSE to allow for temporal flexibility.

This is useful when you want to preserve the general shape of hourly patterns but allow for some temporal shifting (e.g., load profiles with similar shapes but shifted peak hours).

Uses a custom DTW implementation (no external dependencies), normalized by the standard deviation of the full diurnal profile.

Examples:

>>> from energy_repset.score_components import DiurnalDTWFidelity
>>> from energy_repset.objectives import ObjectiveSet
>>>
>>> # Preserve diurnal patterns with temporal flexibility
>>> objectives = ObjectiveSet({
...     'diurnal_dtw': (1.0, DiurnalDTWFidelity())
... })
>>>
>>> # Useful when hourly patterns are important but exact timing
>>> # alignment is not critical (e.g., shifted daily load curves)
Note

Unlike DTWFidelity, this does not require tslearn since it uses a custom DTW implementation on aggregated hourly profiles.

Source code in energy_repset/score_components/diurnal_dtw_fidelity.py
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
class DiurnalDTWFidelity(ScoreComponent):
    """Preserves hourly patterns using Dynamic Time Warping on diurnal profiles.

    Combines the concepts of DiurnalFidelity and DTWFidelity: compares
    hour-of-day aggregated profiles between full and selected data, but
    uses DTW distance instead of MSE to allow for temporal flexibility.

    This is useful when you want to preserve the general shape of hourly
    patterns but allow for some temporal shifting (e.g., load profiles
    with similar shapes but shifted peak hours).

    Uses a custom DTW implementation (no external dependencies), normalized
    by the standard deviation of the full diurnal profile.

    Examples:
        >>> from energy_repset.score_components import DiurnalDTWFidelity
        >>> from energy_repset.objectives import ObjectiveSet
        >>>
        >>> # Preserve diurnal patterns with temporal flexibility
        >>> objectives = ObjectiveSet({
        ...     'diurnal_dtw': (1.0, DiurnalDTWFidelity())
        ... })
        >>>
        >>> # Useful when hourly patterns are important but exact timing
        >>> # alignment is not critical (e.g., shifted daily load curves)

    Note:
        Unlike DTWFidelity, this does not require tslearn since it uses
        a custom DTW implementation on aggregated hourly profiles.
    """

    def __init__(self) -> None:
        """Initialize diurnal DTW fidelity component."""
        self.name = "diurnal_dtw"
        self.direction = "min"

    def _dtw_distance(self, s1: np.ndarray, s2: np.ndarray) -> float:
        """Compute Dynamic Time Warping distance between two 1D arrays.

        Args:
            s1: First time series.
            s2: Second time series.

        Returns:
            DTW distance between s1 and s2.
        """
        n, m = len(s1), len(s2)
        dtw_matrix = np.full((n + 1, m + 1), np.inf)
        dtw_matrix[0, 0] = 0.0

        for i in range(1, n + 1):
            for j in range(1, m + 1):
                cost = abs(s1[i - 1] - s2[j - 1])
                last_min = min(dtw_matrix[i - 1, j], dtw_matrix[i, j - 1], dtw_matrix[i - 1, j - 1])
                dtw_matrix[i, j] = cost + last_min

        return dtw_matrix[n, m]

    def prepare(self, context: ProblemContext) -> None:
        """Precompute full dataset's diurnal profile and normalization factors.

        Args:
            context: Problem context containing raw time-series data.
        """
        df = context.df_raw
        self.df = df
        self.labels = context.slicer.labels_for_index(df.index)
        self.vars = list(df.columns)
        self.full_diurnal = df.groupby(df.index.hour).mean(numeric_only=True)
        self.norm_factor = self.full_diurnal.std().replace(0, 1.0)

    def score(self, combination: SliceCombination) -> float:
        """Compute sum of per-variable normalized DTW distances for diurnal profiles.

        Args:
            combination: Tuple of slice identifiers forming the selection.

        Returns:
            Sum of per-variable DTW distances between full and selected
            diurnal profiles, normalized by standard deviation. Returns
            infinity if the selection is empty.
        """
        sel_mask = pd.Index(self.labels).isin(combination)
        if not sel_mask.any():
            return np.inf

        sel = self.df.loc[sel_mask]
        sel_diurnal = sel.groupby(sel.index.hour).mean(numeric_only=True)

        full_aligned, sel_aligned = self.full_diurnal.align(sel_diurnal, join="inner", axis=0)
        if full_aligned.empty:
            return np.inf

        total_dtw_dist = 0.0
        for v in self.vars:
            if v in full_aligned and v in sel_aligned:
                full_profile = full_aligned[v].values
                sel_profile = sel_aligned[v].values
                dist = self._dtw_distance(full_profile, sel_profile)
                total_dtw_dist += dist / float(self.norm_factor[v])

        return float(total_dtw_dist)

__init__

__init__() -> None

Initialize diurnal DTW fidelity component.

Source code in energy_repset/score_components/diurnal_dtw_fidelity.py
46
47
48
49
def __init__(self) -> None:
    """Initialize diurnal DTW fidelity component."""
    self.name = "diurnal_dtw"
    self.direction = "min"

prepare

prepare(context: ProblemContext) -> None

Precompute full dataset's diurnal profile and normalization factors.

Parameters:

Name Type Description Default
context ProblemContext

Problem context containing raw time-series data.

required
Source code in energy_repset/score_components/diurnal_dtw_fidelity.py
73
74
75
76
77
78
79
80
81
82
83
84
def prepare(self, context: ProblemContext) -> None:
    """Precompute full dataset's diurnal profile and normalization factors.

    Args:
        context: Problem context containing raw time-series data.
    """
    df = context.df_raw
    self.df = df
    self.labels = context.slicer.labels_for_index(df.index)
    self.vars = list(df.columns)
    self.full_diurnal = df.groupby(df.index.hour).mean(numeric_only=True)
    self.norm_factor = self.full_diurnal.std().replace(0, 1.0)

score

score(combination: SliceCombination) -> float

Compute sum of per-variable normalized DTW distances for diurnal profiles.

Parameters:

Name Type Description Default
combination SliceCombination

Tuple of slice identifiers forming the selection.

required

Returns:

Type Description
float

Sum of per-variable DTW distances between full and selected

float

diurnal profiles, normalized by standard deviation. Returns

float

infinity if the selection is empty.

Source code in energy_repset/score_components/diurnal_dtw_fidelity.py
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
def score(self, combination: SliceCombination) -> float:
    """Compute sum of per-variable normalized DTW distances for diurnal profiles.

    Args:
        combination: Tuple of slice identifiers forming the selection.

    Returns:
        Sum of per-variable DTW distances between full and selected
        diurnal profiles, normalized by standard deviation. Returns
        infinity if the selection is empty.
    """
    sel_mask = pd.Index(self.labels).isin(combination)
    if not sel_mask.any():
        return np.inf

    sel = self.df.loc[sel_mask]
    sel_diurnal = sel.groupby(sel.index.hour).mean(numeric_only=True)

    full_aligned, sel_aligned = self.full_diurnal.align(sel_diurnal, join="inner", axis=0)
    if full_aligned.empty:
        return np.inf

    total_dtw_dist = 0.0
    for v in self.vars:
        if v in full_aligned and v in sel_aligned:
            full_profile = full_aligned[v].values
            sel_profile = sel_aligned[v].values
            dist = self._dtw_distance(full_profile, sel_profile)
            total_dtw_dist += dist / float(self.norm_factor[v])

    return float(total_dtw_dist)

DiversityReward

Bases: ScoreComponent

Rewards selections with diverse, mutually distant representative periods.

Computes the average pairwise Euclidean distance between selected slice features in feature space. Higher diversity can help ensure the selection covers a wider range of conditions, avoiding redundant representatives.

This is particularly useful when combined with fidelity objectives to balance accuracy with coverage.

Examples:

>>> from energy_repset.score_components import DiversityReward
>>> from energy_repset.objectives import ObjectiveSet
>>>
>>> # Encourage diverse representatives
>>> objectives = ObjectiveSet({
...     'diversity': (0.3, DiversityReward())
... })
>>>
>>> # Combine with fidelity for balanced selection
>>> from energy_repset.score_components import WassersteinFidelity
>>> objectives = ObjectiveSet({
...     'fidelity': (1.0, WassersteinFidelity()),
...     'diversity': (0.2, DiversityReward())
... })
Source code in energy_repset/score_components/diversity_reward.py
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
class DiversityReward(ScoreComponent):
    """Rewards selections with diverse, mutually distant representative periods.

    Computes the average pairwise Euclidean distance between selected slice
    features in feature space. Higher diversity can help ensure the selection
    covers a wider range of conditions, avoiding redundant representatives.

    This is particularly useful when combined with fidelity objectives to
    balance accuracy with coverage.

    Examples:
        >>> from energy_repset.score_components import DiversityReward
        >>> from energy_repset.objectives import ObjectiveSet
        >>>
        >>> # Encourage diverse representatives
        >>> objectives = ObjectiveSet({
        ...     'diversity': (0.3, DiversityReward())
        ... })
        >>>
        >>> # Combine with fidelity for balanced selection
        >>> from energy_repset.score_components import WassersteinFidelity
        >>> objectives = ObjectiveSet({
        ...     'fidelity': (1.0, WassersteinFidelity()),
        ...     'diversity': (0.2, DiversityReward())
        ... })
    """

    def __init__(self) -> None:
        """Initialize diversity reward component."""
        self.name = "diversity"
        self.direction = "max"

    def prepare(self, context: ProblemContext) -> None:
        """Store the feature matrix for pairwise distance computation.

        Args:
            context: Problem context with computed features.
        """
        self.features = context.df_features.copy()

    def score(self, combination: SliceCombination) -> float:
        """Compute mean pairwise Euclidean distance among selected features.

        Args:
            combination: Tuple of slice identifiers forming the selection.

        Returns:
            Average pairwise distance in feature space. Returns 0.0 if
            fewer than two slices are selected.
        """
        X = self.features.loc[list(combination)].values
        if X.shape[0] < 2:
            return 0.0
        n = X.shape[0]
        dsum = 0.0
        cnt = 0
        for i in range(n):
            for j in range(i + 1, n):
                dsum += float(np.linalg.norm(X[i] - X[j]))
                cnt += 1
        return dsum / cnt

__init__

__init__() -> None

Initialize diversity reward component.

Source code in energy_repset/score_components/diversity_reward.py
41
42
43
44
def __init__(self) -> None:
    """Initialize diversity reward component."""
    self.name = "diversity"
    self.direction = "max"

prepare

prepare(context: ProblemContext) -> None

Store the feature matrix for pairwise distance computation.

Parameters:

Name Type Description Default
context ProblemContext

Problem context with computed features.

required
Source code in energy_repset/score_components/diversity_reward.py
46
47
48
49
50
51
52
def prepare(self, context: ProblemContext) -> None:
    """Store the feature matrix for pairwise distance computation.

    Args:
        context: Problem context with computed features.
    """
    self.features = context.df_features.copy()

score

score(combination: SliceCombination) -> float

Compute mean pairwise Euclidean distance among selected features.

Parameters:

Name Type Description Default
combination SliceCombination

Tuple of slice identifiers forming the selection.

required

Returns:

Type Description
float

Average pairwise distance in feature space. Returns 0.0 if

float

fewer than two slices are selected.

Source code in energy_repset/score_components/diversity_reward.py
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
def score(self, combination: SliceCombination) -> float:
    """Compute mean pairwise Euclidean distance among selected features.

    Args:
        combination: Tuple of slice identifiers forming the selection.

    Returns:
        Average pairwise distance in feature space. Returns 0.0 if
        fewer than two slices are selected.
    """
    X = self.features.loc[list(combination)].values
    if X.shape[0] < 2:
        return 0.0
    n = X.shape[0]
    dsum = 0.0
    cnt = 0
    for i in range(n):
        for j in range(i + 1, n):
            dsum += float(np.linalg.norm(X[i] - X[j]))
            cnt += 1
    return dsum / cnt

CentroidBalance

Bases: ScoreComponent

Penalizes selections whose centroid deviates from the global center.

Computes the Euclidean distance between the centroid of selected slice features and the origin (global center in standardized feature space).

This objective ensures the selection doesn't systematically bias toward extreme conditions, maintaining balance around typical conditions.

Examples:

>>> from energy_repset.score_components import CentroidBalance
>>> from energy_repset.objectives import ObjectiveSet
>>>
>>> # Penalize selections biased toward extreme periods
>>> objectives = ObjectiveSet({
...     'balance': (0.5, CentroidBalance())
... })
>>>
>>> # Used in examples/ex2_feature_space.py to maintain balanced selections
>>> from energy_repset.score_components import WassersteinFidelity
>>> objectives = ObjectiveSet({
...     'fidelity': (1.0, WassersteinFidelity()),
...     'balance': (0.3, CentroidBalance())
... })
Source code in energy_repset/score_components/centroid_balance.py
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
class CentroidBalance(ScoreComponent):
    """Penalizes selections whose centroid deviates from the global center.

    Computes the Euclidean distance between the centroid of selected slice
    features and the origin (global center in standardized feature space).

    This objective ensures the selection doesn't systematically bias toward
    extreme conditions, maintaining balance around typical conditions.

    Examples:
        >>> from energy_repset.score_components import CentroidBalance
        >>> from energy_repset.objectives import ObjectiveSet
        >>>
        >>> # Penalize selections biased toward extreme periods
        >>> objectives = ObjectiveSet({
        ...     'balance': (0.5, CentroidBalance())
        ... })
        >>>
        >>> # Used in examples/ex2_feature_space.py to maintain balanced selections
        >>> from energy_repset.score_components import WassersteinFidelity
        >>> objectives = ObjectiveSet({
        ...     'fidelity': (1.0, WassersteinFidelity()),
        ...     'balance': (0.3, CentroidBalance())
        ... })
    """

    def __init__(self) -> None:
        """Initialize centroid balance component."""
        self.name = "centroid_balance"
        self.direction = "min"

    def prepare(self, context: ProblemContext) -> None:
        """Store the feature matrix for centroid computation.

        Args:
            context: Problem context with computed features (should be
                standardized for meaningful centroid distances).
        """
        self.features = context.df_features.copy()

    def score(self, combination: SliceCombination) -> float:
        """Compute distance from selection centroid to global center.

        Args:
            combination: Tuple of slice identifiers forming the selection.

        Returns:
            Euclidean distance from the selection's feature centroid to
            the origin. Lower values indicate more balanced selections.
        """
        X = self.features.loc[list(combination)].values
        mu = X.mean(axis=0)
        return float(np.linalg.norm(mu))

__init__

__init__() -> None

Initialize centroid balance component.

Source code in energy_repset/score_components/centroid_balance.py
40
41
42
43
def __init__(self) -> None:
    """Initialize centroid balance component."""
    self.name = "centroid_balance"
    self.direction = "min"

prepare

prepare(context: ProblemContext) -> None

Store the feature matrix for centroid computation.

Parameters:

Name Type Description Default
context ProblemContext

Problem context with computed features (should be standardized for meaningful centroid distances).

required
Source code in energy_repset/score_components/centroid_balance.py
45
46
47
48
49
50
51
52
def prepare(self, context: ProblemContext) -> None:
    """Store the feature matrix for centroid computation.

    Args:
        context: Problem context with computed features (should be
            standardized for meaningful centroid distances).
    """
    self.features = context.df_features.copy()

score

score(combination: SliceCombination) -> float

Compute distance from selection centroid to global center.

Parameters:

Name Type Description Default
combination SliceCombination

Tuple of slice identifiers forming the selection.

required

Returns:

Type Description
float

Euclidean distance from the selection's feature centroid to

float

the origin. Lower values indicate more balanced selections.

Source code in energy_repset/score_components/centroid_balance.py
54
55
56
57
58
59
60
61
62
63
64
65
66
def score(self, combination: SliceCombination) -> float:
    """Compute distance from selection centroid to global center.

    Args:
        combination: Tuple of slice identifiers forming the selection.

    Returns:
        Euclidean distance from the selection's feature centroid to
        the origin. Lower values indicate more balanced selections.
    """
    X = self.features.loc[list(combination)].values
    mu = X.mean(axis=0)
    return float(np.linalg.norm(mu))

CoverageBalance

Bases: ScoreComponent

Promotes balanced coverage by encouraging uniform responsibility.

Uses RBF (Radial Basis Function) kernel-based soft assignment to compute how much "responsibility" each selected representative has for covering all candidate slices. Penalizes selections where some representatives cover many slices while others cover few.

This is conceptually similar to cluster balance in k-medoids, ensuring no representative is over- or under-utilized.

Parameters:

Name Type Description Default
gamma float

RBF kernel sharpness parameter (higher = sharper assignments). Default is 1.0.

1.0

Examples:

>>> from energy_repset.score_components import CoverageBalance
>>> from energy_repset.objectives import ObjectiveSet
>>>
>>> # Ensure balanced coverage with default sharpness
>>> objectives = ObjectiveSet({
...     'coverage': (0.5, CoverageBalance())
... })
>>>
>>> # Sharper assignments (more cluster-like behavior)
>>> objectives = ObjectiveSet({
...     'coverage': (0.5, CoverageBalance(gamma=2.0))
... })
>>>
>>> # Softer assignments (smoother transitions)
>>> objectives = ObjectiveSet({
...     'coverage': (0.5, CoverageBalance(gamma=0.5))
... })
Source code in energy_repset/score_components/coverage_balance.py
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
class CoverageBalance(ScoreComponent):
    """Promotes balanced coverage by encouraging uniform responsibility.

    Uses RBF (Radial Basis Function) kernel-based soft assignment to compute
    how much "responsibility" each selected representative has for covering
    all candidate slices. Penalizes selections where some representatives
    cover many slices while others cover few.

    This is conceptually similar to cluster balance in k-medoids, ensuring
    no representative is over- or under-utilized.

    Args:
        gamma: RBF kernel sharpness parameter (higher = sharper assignments).
            Default is 1.0.

    Examples:
        >>> from energy_repset.score_components import CoverageBalance
        >>> from energy_repset.objectives import ObjectiveSet
        >>>
        >>> # Ensure balanced coverage with default sharpness
        >>> objectives = ObjectiveSet({
        ...     'coverage': (0.5, CoverageBalance())
        ... })
        >>>
        >>> # Sharper assignments (more cluster-like behavior)
        >>> objectives = ObjectiveSet({
        ...     'coverage': (0.5, CoverageBalance(gamma=2.0))
        ... })
        >>>
        >>> # Softer assignments (smoother transitions)
        >>> objectives = ObjectiveSet({
        ...     'coverage': (0.5, CoverageBalance(gamma=0.5))
        ... })
    """

    def __init__(self, gamma: float = 1.0) -> None:
        """Initialize coverage balance component.

        Args:
            gamma: RBF kernel sharpness. Higher values create sharper
                cluster-like assignments.
        """
        self.name = "coverage_balance"
        self.direction = "min"
        self.gamma = gamma

    def prepare(self, context: ProblemContext) -> None:
        """Store feature matrix for responsibility computation.

        Args:
            context: Problem context with computed features.
        """
        self.features = context.df_features.copy()
        self.all_X = np.nan_to_num(self.features.values, nan=0.0)

    def _responsibilities(self, combination: SliceCombination) -> np.ndarray:
        """Compute soft assignment responsibilities using RBF kernel.

        Args:
            combination: Tuple of slice identifiers.

        Returns:
            Array of responsibility weights for each selected slice,
            summing to 1.0.
        """
        sel_X = self.features.loc[list(combination)].values
        # Compute squared distances: (n_all, n_sel)
        d2 = ((self.all_X[:, None, :] - sel_X[None, :, :]) ** 2).sum(axis=2)
        # RBF kernel weights
        K = np.exp(-self.gamma * d2)
        # Responsibility = sum of weights across all slices
        mass = K.sum(axis=0)
        if mass.sum() <= 0:
            return np.ones(len(combination)) / len(combination)
        return mass / mass.sum()

    def score(self, combination: SliceCombination) -> float:
        """Compute L2 deviation of responsibilities from uniform distribution.

        Args:
            combination: Tuple of slice identifiers forming the selection.

        Returns:
            L2 norm of (responsibilities - uniform). Zero indicates perfectly
            balanced coverage; higher values indicate imbalance.
        """
        r = self._responsibilities(combination)
        u = np.ones_like(r) / len(r)
        return float(np.linalg.norm(r - u))

__init__

__init__(gamma: float = 1.0) -> None

Initialize coverage balance component.

Parameters:

Name Type Description Default
gamma float

RBF kernel sharpness. Higher values create sharper cluster-like assignments.

1.0
Source code in energy_repset/score_components/coverage_balance.py
49
50
51
52
53
54
55
56
57
58
def __init__(self, gamma: float = 1.0) -> None:
    """Initialize coverage balance component.

    Args:
        gamma: RBF kernel sharpness. Higher values create sharper
            cluster-like assignments.
    """
    self.name = "coverage_balance"
    self.direction = "min"
    self.gamma = gamma

prepare

prepare(context: ProblemContext) -> None

Store feature matrix for responsibility computation.

Parameters:

Name Type Description Default
context ProblemContext

Problem context with computed features.

required
Source code in energy_repset/score_components/coverage_balance.py
60
61
62
63
64
65
66
67
def prepare(self, context: ProblemContext) -> None:
    """Store feature matrix for responsibility computation.

    Args:
        context: Problem context with computed features.
    """
    self.features = context.df_features.copy()
    self.all_X = np.nan_to_num(self.features.values, nan=0.0)

score

score(combination: SliceCombination) -> float

Compute L2 deviation of responsibilities from uniform distribution.

Parameters:

Name Type Description Default
combination SliceCombination

Tuple of slice identifiers forming the selection.

required

Returns:

Type Description
float

L2 norm of (responsibilities - uniform). Zero indicates perfectly

float

balanced coverage; higher values indicate imbalance.

Source code in energy_repset/score_components/coverage_balance.py
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
def score(self, combination: SliceCombination) -> float:
    """Compute L2 deviation of responsibilities from uniform distribution.

    Args:
        combination: Tuple of slice identifiers forming the selection.

    Returns:
        L2 norm of (responsibilities - uniform). Zero indicates perfectly
        balanced coverage; higher values indicate imbalance.
    """
    r = self._responsibilities(combination)
    u = np.ones_like(r) / len(r)
    return float(np.linalg.norm(r - u))