Skip to content

Diagnostics

Feature Space

FeatureSpaceScatter2D

2D scatter plot for visualizing feature space.

Creates an interactive scatter plot of any two features from df_features. Can highlight a specific selection of slices. Works with any feature columns including PCA components ('pc_0', 'pc_1'), statistical features ('mean__wind'), or mixed features.

Examples:

>>> # Visualize PCA space
>>> scatter = FeatureSpaceScatter2D()
>>> fig = scatter.plot(context.df_features, x='pc_0', y='pc_1')
>>> fig.update_layout(title='PCA Feature Space')
>>> fig.show()

>>> # Visualize with selection highlighted
>>> fig = scatter.plot(
...     context.df_features,
...     x='mean__demand',
...     y='pc_0',
...     selection=('2024-01', '2024-04', '2024-07')
... )

>>> # Color by another feature
>>> fig = scatter.plot(
...     context.df_features,
...     x='pc_0',
...     y='pc_1',
...     color='std__wind'
... )
Source code in energy_repset/diagnostics/feature_space/feature_space_scatter.py
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
class FeatureSpaceScatter2D:
    """2D scatter plot for visualizing feature space.

    Creates an interactive scatter plot of any two features from df_features.
    Can highlight a specific selection of slices. Works with any feature columns
    including PCA components ('pc_0', 'pc_1'), statistical features ('mean__wind'),
    or mixed features.

    Examples:

        >>> # Visualize PCA space
        >>> scatter = FeatureSpaceScatter2D()
        >>> fig = scatter.plot(context.df_features, x='pc_0', y='pc_1')
        >>> fig.update_layout(title='PCA Feature Space')
        >>> fig.show()

        >>> # Visualize with selection highlighted
        >>> fig = scatter.plot(
        ...     context.df_features,
        ...     x='mean__demand',
        ...     y='pc_0',
        ...     selection=('2024-01', '2024-04', '2024-07')
        ... )

        >>> # Color by another feature
        >>> fig = scatter.plot(
        ...     context.df_features,
        ...     x='pc_0',
        ...     y='pc_1',
        ...     color='std__wind'
        ... )
    """

    def __init__(self):
        """Initialize the scatter plot diagnostic."""
        pass

    def plot(
        self,
        df_features: pd.DataFrame,
        x: str,
        y: str,
        selection: SliceCombination = None,
        color: str = None,
    ) -> go.Figure:
        """Create a 2D scatter plot of feature space.

        Args:
            df_features: Feature matrix with slices as rows, features as columns.
            x: Column name for x-axis.
            y: Column name for y-axis.
            selection: Optional tuple of slice identifiers to highlight.
            color: Optional column name to use for color mapping.

        Returns:
            Plotly figure object ready for display or further customization.

        Raises:
            KeyError: If x, y, or color columns are not in df_features.
        """
        # Validate columns
        if x not in df_features.columns:
            raise KeyError(f"Column '{x}' not found in df_features")
        if y not in df_features.columns:
            raise KeyError(f"Column '{y}' not found in df_features")
        if color is not None and color not in df_features.columns:
            raise KeyError(f"Column '{color}' not found in df_features")

        # Prepare data
        plot_df = df_features.copy()
        plot_df['slice_label'] = plot_df.index.astype(str)

        # Add selection indicator
        if selection is not None:
            selection_set = set(selection)
            plot_df['is_selected'] = plot_df.index.isin(selection_set)
        else:
            plot_df['is_selected'] = False

        # Create scatter plot
        if color is not None:
            # Color by feature value
            fig = px.scatter(
                plot_df,
                x=x,
                y=y,
                color=color,
                hover_data=['slice_label'],
                symbol='is_selected' if selection is not None else None,
                symbol_map={True: 'star', False: 'circle'} if selection is not None else None,
            )
        else:
            # Color by selection status
            if selection is not None:
                fig = px.scatter(
                    plot_df,
                    x=x,
                    y=y,
                    color='is_selected',
                    hover_data=['slice_label'],
                    color_discrete_map={True: 'red', False: 'lightgray'},
                )
            else:
                fig = px.scatter(
                    plot_df,
                    x=x,
                    y=y,
                    hover_data=['slice_label'],
                )

        # Update layout for better readability
        fig.update_layout(
            xaxis_title=x,
            yaxis_title=y,
            hovermode='closest',
        )

        return fig

__init__

__init__()

Initialize the scatter plot diagnostic.

Source code in energy_repset/diagnostics/feature_space/feature_space_scatter.py
44
45
46
def __init__(self):
    """Initialize the scatter plot diagnostic."""
    pass

plot

plot(df_features: DataFrame, x: str, y: str, selection: SliceCombination = None, color: str = None) -> Figure

Create a 2D scatter plot of feature space.

Parameters:

Name Type Description Default
df_features DataFrame

Feature matrix with slices as rows, features as columns.

required
x str

Column name for x-axis.

required
y str

Column name for y-axis.

required
selection SliceCombination

Optional tuple of slice identifiers to highlight.

None
color str

Optional column name to use for color mapping.

None

Returns:

Type Description
Figure

Plotly figure object ready for display or further customization.

Raises:

Type Description
KeyError

If x, y, or color columns are not in df_features.

Source code in energy_repset/diagnostics/feature_space/feature_space_scatter.py
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
def plot(
    self,
    df_features: pd.DataFrame,
    x: str,
    y: str,
    selection: SliceCombination = None,
    color: str = None,
) -> go.Figure:
    """Create a 2D scatter plot of feature space.

    Args:
        df_features: Feature matrix with slices as rows, features as columns.
        x: Column name for x-axis.
        y: Column name for y-axis.
        selection: Optional tuple of slice identifiers to highlight.
        color: Optional column name to use for color mapping.

    Returns:
        Plotly figure object ready for display or further customization.

    Raises:
        KeyError: If x, y, or color columns are not in df_features.
    """
    # Validate columns
    if x not in df_features.columns:
        raise KeyError(f"Column '{x}' not found in df_features")
    if y not in df_features.columns:
        raise KeyError(f"Column '{y}' not found in df_features")
    if color is not None and color not in df_features.columns:
        raise KeyError(f"Column '{color}' not found in df_features")

    # Prepare data
    plot_df = df_features.copy()
    plot_df['slice_label'] = plot_df.index.astype(str)

    # Add selection indicator
    if selection is not None:
        selection_set = set(selection)
        plot_df['is_selected'] = plot_df.index.isin(selection_set)
    else:
        plot_df['is_selected'] = False

    # Create scatter plot
    if color is not None:
        # Color by feature value
        fig = px.scatter(
            plot_df,
            x=x,
            y=y,
            color=color,
            hover_data=['slice_label'],
            symbol='is_selected' if selection is not None else None,
            symbol_map={True: 'star', False: 'circle'} if selection is not None else None,
        )
    else:
        # Color by selection status
        if selection is not None:
            fig = px.scatter(
                plot_df,
                x=x,
                y=y,
                color='is_selected',
                hover_data=['slice_label'],
                color_discrete_map={True: 'red', False: 'lightgray'},
            )
        else:
            fig = px.scatter(
                plot_df,
                x=x,
                y=y,
                hover_data=['slice_label'],
            )

    # Update layout for better readability
    fig.update_layout(
        xaxis_title=x,
        yaxis_title=y,
        hovermode='closest',
    )

    return fig

FeatureSpaceScatter3D

3D scatter plot for visualizing feature space.

Creates an interactive 3D scatter plot of any three features from df_features. Can highlight a specific selection of slices. Works with any feature columns including PCA components or statistical features.

Examples:

>>> # Visualize 3D PCA space
>>> scatter = FeatureSpaceScatter3D()
>>> fig = scatter.plot(
...     context.df_features,
...     x='pc_0',
...     y='pc_1',
...     z='pc_2'
... )
>>> fig.update_layout(title='3D PCA Space')
>>> fig.show()

>>> # Highlight selection
>>> fig = scatter.plot(
...     context.df_features,
...     x='pc_0',
...     y='pc_1',
...     z='pc_2',
...     selection=('2024-01', '2024-04')
... )

>>> # Color by feature value
>>> fig = scatter.plot(
...     context.df_features,
...     x='pc_0',
...     y='pc_1',
...     z='pc_2',
...     color='mean__demand'
... )
Source code in energy_repset/diagnostics/feature_space/feature_space_scatter.py
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
class FeatureSpaceScatter3D:
    """3D scatter plot for visualizing feature space.

    Creates an interactive 3D scatter plot of any three features from df_features.
    Can highlight a specific selection of slices. Works with any feature columns
    including PCA components or statistical features.

    Examples:

        >>> # Visualize 3D PCA space
        >>> scatter = FeatureSpaceScatter3D()
        >>> fig = scatter.plot(
        ...     context.df_features,
        ...     x='pc_0',
        ...     y='pc_1',
        ...     z='pc_2'
        ... )
        >>> fig.update_layout(title='3D PCA Space')
        >>> fig.show()

        >>> # Highlight selection
        >>> fig = scatter.plot(
        ...     context.df_features,
        ...     x='pc_0',
        ...     y='pc_1',
        ...     z='pc_2',
        ...     selection=('2024-01', '2024-04')
        ... )

        >>> # Color by feature value
        >>> fig = scatter.plot(
        ...     context.df_features,
        ...     x='pc_0',
        ...     y='pc_1',
        ...     z='pc_2',
        ...     color='mean__demand'
        ... )
    """

    def __init__(self):
        """Initialize the 3D scatter plot diagnostic."""
        pass

    def plot(
        self,
        df_features: pd.DataFrame,
        x: str,
        y: str,
        z: str,
        selection: SliceCombination = None,
        color: str = None,
    ) -> go.Figure:
        """Create a 3D scatter plot of feature space.

        Args:
            df_features: Feature matrix with slices as rows, features as columns.
            x: Column name for x-axis.
            y: Column name for y-axis.
            z: Column name for z-axis.
            selection: Optional tuple of slice identifiers to highlight.
            color: Optional column name to use for color mapping.

        Returns:
            Plotly figure object ready for display or further customization.

        Raises:
            KeyError: If x, y, z, or color columns are not in df_features.
        """
        # Validate columns
        if x not in df_features.columns:
            raise KeyError(f"Column '{x}' not found in df_features")
        if y not in df_features.columns:
            raise KeyError(f"Column '{y}' not found in df_features")
        if z not in df_features.columns:
            raise KeyError(f"Column '{z}' not found in df_features")
        if color is not None and color not in df_features.columns:
            raise KeyError(f"Column '{color}' not found in df_features")

        # Prepare data
        plot_df = df_features.copy()
        plot_df['slice_label'] = plot_df.index.astype(str)

        # Add selection indicator
        if selection is not None:
            selection_set = set(selection)
            plot_df['is_selected'] = plot_df.index.isin(selection_set)
        else:
            plot_df['is_selected'] = False

        # Create 3D scatter plot
        if color is not None:
            # Color by feature value
            fig = px.scatter_3d(
                plot_df,
                x=x,
                y=y,
                z=z,
                color=color,
                hover_data=['slice_label'],
                symbol='is_selected' if selection is not None else None,
                symbol_map={True: 'diamond', False: 'circle'} if selection is not None else None,
            )
        else:
            # Color by selection status
            if selection is not None:
                fig = px.scatter_3d(
                    plot_df,
                    x=x,
                    y=y,
                    z=z,
                    color='is_selected',
                    hover_data=['slice_label'],
                    color_discrete_map={True: 'red', False: 'lightgray'},
                )
            else:
                fig = px.scatter_3d(
                    plot_df,
                    x=x,
                    y=y,
                    z=z,
                    hover_data=['slice_label'],
                )

        # Update layout for better readability
        fig.update_layout(
            scene=dict(
                xaxis_title=x,
                yaxis_title=y,
                zaxis_title=z,
            ),
            hovermode='closest',
        )

        return fig

__init__

__init__()

Initialize the 3D scatter plot diagnostic.

Source code in energy_repset/diagnostics/feature_space/feature_space_scatter.py
170
171
172
def __init__(self):
    """Initialize the 3D scatter plot diagnostic."""
    pass

plot

plot(df_features: DataFrame, x: str, y: str, z: str, selection: SliceCombination = None, color: str = None) -> Figure

Create a 3D scatter plot of feature space.

Parameters:

Name Type Description Default
df_features DataFrame

Feature matrix with slices as rows, features as columns.

required
x str

Column name for x-axis.

required
y str

Column name for y-axis.

required
z str

Column name for z-axis.

required
selection SliceCombination

Optional tuple of slice identifiers to highlight.

None
color str

Optional column name to use for color mapping.

None

Returns:

Type Description
Figure

Plotly figure object ready for display or further customization.

Raises:

Type Description
KeyError

If x, y, z, or color columns are not in df_features.

Source code in energy_repset/diagnostics/feature_space/feature_space_scatter.py
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
def plot(
    self,
    df_features: pd.DataFrame,
    x: str,
    y: str,
    z: str,
    selection: SliceCombination = None,
    color: str = None,
) -> go.Figure:
    """Create a 3D scatter plot of feature space.

    Args:
        df_features: Feature matrix with slices as rows, features as columns.
        x: Column name for x-axis.
        y: Column name for y-axis.
        z: Column name for z-axis.
        selection: Optional tuple of slice identifiers to highlight.
        color: Optional column name to use for color mapping.

    Returns:
        Plotly figure object ready for display or further customization.

    Raises:
        KeyError: If x, y, z, or color columns are not in df_features.
    """
    # Validate columns
    if x not in df_features.columns:
        raise KeyError(f"Column '{x}' not found in df_features")
    if y not in df_features.columns:
        raise KeyError(f"Column '{y}' not found in df_features")
    if z not in df_features.columns:
        raise KeyError(f"Column '{z}' not found in df_features")
    if color is not None and color not in df_features.columns:
        raise KeyError(f"Column '{color}' not found in df_features")

    # Prepare data
    plot_df = df_features.copy()
    plot_df['slice_label'] = plot_df.index.astype(str)

    # Add selection indicator
    if selection is not None:
        selection_set = set(selection)
        plot_df['is_selected'] = plot_df.index.isin(selection_set)
    else:
        plot_df['is_selected'] = False

    # Create 3D scatter plot
    if color is not None:
        # Color by feature value
        fig = px.scatter_3d(
            plot_df,
            x=x,
            y=y,
            z=z,
            color=color,
            hover_data=['slice_label'],
            symbol='is_selected' if selection is not None else None,
            symbol_map={True: 'diamond', False: 'circle'} if selection is not None else None,
        )
    else:
        # Color by selection status
        if selection is not None:
            fig = px.scatter_3d(
                plot_df,
                x=x,
                y=y,
                z=z,
                color='is_selected',
                hover_data=['slice_label'],
                color_discrete_map={True: 'red', False: 'lightgray'},
            )
        else:
            fig = px.scatter_3d(
                plot_df,
                x=x,
                y=y,
                z=z,
                hover_data=['slice_label'],
            )

    # Update layout for better readability
    fig.update_layout(
        scene=dict(
            xaxis_title=x,
            yaxis_title=y,
            zaxis_title=z,
        ),
        hovermode='closest',
    )

    return fig

FeatureSpaceScatterMatrix

Scatter matrix (SPLOM) for visualizing relationships between multiple features.

Creates an interactive scatter plot matrix showing pairwise relationships between all specified features. Can highlight a specific selection of slices. Useful for exploring multi-dimensional feature spaces and identifying feature correlations.

Examples:

>>> # Visualize PCA components
>>> scatter_matrix = FeatureSpaceScatterMatrix()
>>> fig = scatter_matrix.plot(
...     context.df_features,
...     dimensions=['pc_0', 'pc_1', 'pc_2']
... )
>>> fig.update_layout(title='PCA Component Relationships')
>>> fig.show()

>>> # Visualize statistical features with selection
>>> fig = scatter_matrix.plot(
...     context.df_features,
...     dimensions=['mean__demand', 'std__demand', 'max__wind'],
...     selection=('2024-01', '2024-04', '2024-07')
... )

>>> # Color by a feature value
>>> fig = scatter_matrix.plot(
...     context.df_features,
...     dimensions=['pc_0', 'pc_1', 'pc_2', 'pc_3'],
...     color='mean__demand'
... )

>>> # All features
>>> fig = scatter_matrix.plot(context.df_features)
Source code in energy_repset/diagnostics/feature_space/feature_space_scatter_matrix.py
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
class FeatureSpaceScatterMatrix:
    """Scatter matrix (SPLOM) for visualizing relationships between multiple features.

    Creates an interactive scatter plot matrix showing pairwise relationships between
    all specified features. Can highlight a specific selection of slices. Useful for
    exploring multi-dimensional feature spaces and identifying feature correlations.

    Examples:

        >>> # Visualize PCA components
        >>> scatter_matrix = FeatureSpaceScatterMatrix()
        >>> fig = scatter_matrix.plot(
        ...     context.df_features,
        ...     dimensions=['pc_0', 'pc_1', 'pc_2']
        ... )
        >>> fig.update_layout(title='PCA Component Relationships')
        >>> fig.show()

        >>> # Visualize statistical features with selection
        >>> fig = scatter_matrix.plot(
        ...     context.df_features,
        ...     dimensions=['mean__demand', 'std__demand', 'max__wind'],
        ...     selection=('2024-01', '2024-04', '2024-07')
        ... )

        >>> # Color by a feature value
        >>> fig = scatter_matrix.plot(
        ...     context.df_features,
        ...     dimensions=['pc_0', 'pc_1', 'pc_2', 'pc_3'],
        ...     color='mean__demand'
        ... )

        >>> # All features
        >>> fig = scatter_matrix.plot(context.df_features)
    """

    def __init__(self):
        """Initialize the scatter matrix diagnostic."""
        pass

    def plot(
        self,
        df_features: pd.DataFrame,
        dimensions: list[str] = None,
        selection: SliceCombination = None,
        color: str = None,
    ) -> go.Figure:
        """Create a scatter plot matrix of feature space.

        Args:
            df_features: Feature matrix with slices as rows, features as columns.
            dimensions: List of column names to include in the matrix. If None,
                uses all columns (may be slow for many features).
            selection: Optional tuple of slice identifiers to highlight.
            color: Optional column name to use for color mapping. If None and
                selection is provided, colors by selection status.

        Returns:
            Plotly figure object ready for display or further customization.

        Raises:
            KeyError: If any dimension or color column is not in df_features.
            ValueError: If dimensions list is empty.
        """
        # Handle dimensions default
        if dimensions is None:
            dimensions = list(df_features.columns)

        if len(dimensions) == 0:
            raise ValueError("dimensions list cannot be empty")

        # Validate columns
        for dim in dimensions:
            if dim not in df_features.columns:
                raise KeyError(f"Column '{dim}' not found in df_features")
        if color is not None and color not in df_features.columns:
            raise KeyError(f"Column '{color}' not found in df_features")

        # Prepare data
        plot_df = df_features[dimensions].copy()
        plot_df['slice_label'] = df_features.index.astype(str)

        # Add selection indicator
        if selection is not None:
            selection_set = set(selection)
            plot_df['is_selected'] = df_features.index.isin(selection_set)
            # Order so selected points are drawn on top
            plot_df = pd.concat([
                plot_df[~plot_df['is_selected']],
                plot_df[plot_df['is_selected']]
            ], ignore_index=False)
        else:
            plot_df['is_selected'] = False

        # Create scatter matrix
        if color is not None:
            # Color by feature value
            fig = px.scatter_matrix(
                plot_df,
                dimensions=dimensions,
                color=color,
                hover_data=['slice_label'],
                symbol='is_selected' if selection is not None else None,
                symbol_map={True: 'star', False: 'circle'} if selection is not None else None,
            )
        else:
            # Color by selection status
            if selection is not None:
                fig = px.scatter_matrix(
                    plot_df,
                    dimensions=dimensions,
                    color='is_selected',
                    hover_data=['slice_label'],
                    color_discrete_map={True: 'red', False: 'lightgray'},
                    symbol='is_selected',
                    symbol_map={True: 'star', False: 'circle'},
                )
            else:
                fig = px.scatter_matrix(
                    plot_df,
                    dimensions=dimensions,
                    hover_data=['slice_label'],
                )

        # Update layout for better readability
        fig.update_traces(
            diagonal_visible=False,
            showupperhalf=False,
            marker=dict(size=4)
        )

        return fig

__init__

__init__()

Initialize the scatter matrix diagnostic.

Source code in energy_repset/diagnostics/feature_space/feature_space_scatter_matrix.py
47
48
49
def __init__(self):
    """Initialize the scatter matrix diagnostic."""
    pass

plot

plot(df_features: DataFrame, dimensions: list[str] = None, selection: SliceCombination = None, color: str = None) -> Figure

Create a scatter plot matrix of feature space.

Parameters:

Name Type Description Default
df_features DataFrame

Feature matrix with slices as rows, features as columns.

required
dimensions list[str]

List of column names to include in the matrix. If None, uses all columns (may be slow for many features).

None
selection SliceCombination

Optional tuple of slice identifiers to highlight.

None
color str

Optional column name to use for color mapping. If None and selection is provided, colors by selection status.

None

Returns:

Type Description
Figure

Plotly figure object ready for display or further customization.

Raises:

Type Description
KeyError

If any dimension or color column is not in df_features.

ValueError

If dimensions list is empty.

Source code in energy_repset/diagnostics/feature_space/feature_space_scatter_matrix.py
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
def plot(
    self,
    df_features: pd.DataFrame,
    dimensions: list[str] = None,
    selection: SliceCombination = None,
    color: str = None,
) -> go.Figure:
    """Create a scatter plot matrix of feature space.

    Args:
        df_features: Feature matrix with slices as rows, features as columns.
        dimensions: List of column names to include in the matrix. If None,
            uses all columns (may be slow for many features).
        selection: Optional tuple of slice identifiers to highlight.
        color: Optional column name to use for color mapping. If None and
            selection is provided, colors by selection status.

    Returns:
        Plotly figure object ready for display or further customization.

    Raises:
        KeyError: If any dimension or color column is not in df_features.
        ValueError: If dimensions list is empty.
    """
    # Handle dimensions default
    if dimensions is None:
        dimensions = list(df_features.columns)

    if len(dimensions) == 0:
        raise ValueError("dimensions list cannot be empty")

    # Validate columns
    for dim in dimensions:
        if dim not in df_features.columns:
            raise KeyError(f"Column '{dim}' not found in df_features")
    if color is not None and color not in df_features.columns:
        raise KeyError(f"Column '{color}' not found in df_features")

    # Prepare data
    plot_df = df_features[dimensions].copy()
    plot_df['slice_label'] = df_features.index.astype(str)

    # Add selection indicator
    if selection is not None:
        selection_set = set(selection)
        plot_df['is_selected'] = df_features.index.isin(selection_set)
        # Order so selected points are drawn on top
        plot_df = pd.concat([
            plot_df[~plot_df['is_selected']],
            plot_df[plot_df['is_selected']]
        ], ignore_index=False)
    else:
        plot_df['is_selected'] = False

    # Create scatter matrix
    if color is not None:
        # Color by feature value
        fig = px.scatter_matrix(
            plot_df,
            dimensions=dimensions,
            color=color,
            hover_data=['slice_label'],
            symbol='is_selected' if selection is not None else None,
            symbol_map={True: 'star', False: 'circle'} if selection is not None else None,
        )
    else:
        # Color by selection status
        if selection is not None:
            fig = px.scatter_matrix(
                plot_df,
                dimensions=dimensions,
                color='is_selected',
                hover_data=['slice_label'],
                color_discrete_map={True: 'red', False: 'lightgray'},
                symbol='is_selected',
                symbol_map={True: 'star', False: 'circle'},
            )
        else:
            fig = px.scatter_matrix(
                plot_df,
                dimensions=dimensions,
                hover_data=['slice_label'],
            )

    # Update layout for better readability
    fig.update_traces(
        diagonal_visible=False,
        showupperhalf=False,
        marker=dict(size=4)
    )

    return fig

PCAVarianceExplained

Visualize explained variance ratio for PCA components.

Creates a bar chart showing the proportion of variance explained by each principal component, along with cumulative variance. Helps determine how many components are needed to capture most of the data's variance.

This diagnostic requires the fitted PCAFeatureEngineer instance to access the explained_variance_ratio_ attribute.

Examples:

>>> # Get PCA engineer from pipeline
>>> pca_engineer = pipeline.engineers['pca']
>>> variance_plot = PCAVarianceExplained(pca_engineer)
>>> fig = variance_plot.plot()
>>> fig.update_layout(title='PCA Variance Explained')
>>> fig.show()

>>> # With custom number of components shown
>>> fig = variance_plot.plot(n_components=10)

>>> # After running workflow
>>> context_with_features = workflow.feature_engineer.run(context)
>>> pca_eng = workflow.feature_engineer.engineers['pca']
>>> variance_plot = PCAVarianceExplained(pca_eng)
>>> fig = variance_plot.plot()
Source code in energy_repset/diagnostics/feature_space/pca_variance_explained.py
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
class PCAVarianceExplained:
    """Visualize explained variance ratio for PCA components.

    Creates a bar chart showing the proportion of variance explained by each
    principal component, along with cumulative variance. Helps determine how
    many components are needed to capture most of the data's variance.

    This diagnostic requires the fitted PCAFeatureEngineer instance to access
    the explained_variance_ratio_ attribute.

    Examples:

        >>> # Get PCA engineer from pipeline
        >>> pca_engineer = pipeline.engineers['pca']
        >>> variance_plot = PCAVarianceExplained(pca_engineer)
        >>> fig = variance_plot.plot()
        >>> fig.update_layout(title='PCA Variance Explained')
        >>> fig.show()

        >>> # With custom number of components shown
        >>> fig = variance_plot.plot(n_components=10)

        >>> # After running workflow
        >>> context_with_features = workflow.feature_engineer.run(context)
        >>> pca_eng = workflow.feature_engineer.engineers['pca']
        >>> variance_plot = PCAVarianceExplained(pca_eng)
        >>> fig = variance_plot.plot()
    """

    def __init__(self, pca_engineer: PCAFeatureEngineer):
        """Initialize the PCA variance explained diagnostic.

        Args:
            pca_engineer: A fitted PCAFeatureEngineer instance. Must have been
                fitted on data (i.e., calc_and_get_features_df has been called).
        """
        self.pca_engineer = pca_engineer

    def plot(self, n_components: int = None, show_cumulative: bool = True) -> go.Figure:
        """Create a bar chart of explained variance ratios.

        Args:
            n_components: Number of components to show. If None, shows all components.
            show_cumulative: If True, adds a line showing cumulative variance explained.

        Returns:
            Plotly figure object ready for display or further customization.

        Raises:
            AttributeError: If the PCA engineer has not been fitted yet.
        """
        # Get variance ratios
        if not hasattr(self.pca_engineer, 'explained_variance_ratio_'):
            raise AttributeError(
                "PCA engineer has not been fitted. Call calc_and_get_features_df() first."
            )

        variance_ratio = self.pca_engineer.explained_variance_ratio_

        # Limit to requested number of components
        if n_components is not None:
            variance_ratio = variance_ratio[:n_components]

        # Prepare data
        n = len(variance_ratio)
        component_labels = [f'PC{i}' for i in range(n)]
        cumulative_variance = variance_ratio.cumsum()

        # Create figure
        fig = go.Figure()

        # Add variance bars
        fig.add_trace(go.Bar(
            x=component_labels,
            y=variance_ratio,
            name='Individual',
            marker_color='lightblue',
            text=[f'{v:.1%}' for v in variance_ratio],
            textposition='outside',
        ))

        # Add cumulative line if requested
        if show_cumulative:
            fig.add_trace(go.Scatter(
                x=component_labels,
                y=cumulative_variance,
                name='Cumulative',
                mode='lines+markers',
                line=dict(color='red', width=2),
                yaxis='y2',
                text=[f'{v:.1%}' for v in cumulative_variance],
                textposition='top center',
            ))

        # Update layout
        layout_kwargs = dict(
            xaxis_title='Principal Component',
            yaxis_title='Explained Variance Ratio',
            hovermode='x unified',
            yaxis=dict(tickformat='.0%'),
        )

        if show_cumulative:
            layout_kwargs['yaxis2'] = dict(
                title='Cumulative Variance',
                overlaying='y',
                side='right',
                tickformat='.0%',
                range=[0, 1.05],
            )

        fig.update_layout(**layout_kwargs)

        return fig

__init__

__init__(pca_engineer: PCAFeatureEngineer)

Initialize the PCA variance explained diagnostic.

Parameters:

Name Type Description Default
pca_engineer PCAFeatureEngineer

A fitted PCAFeatureEngineer instance. Must have been fitted on data (i.e., calc_and_get_features_df has been called).

required
Source code in energy_repset/diagnostics/feature_space/pca_variance_explained.py
41
42
43
44
45
46
47
48
def __init__(self, pca_engineer: PCAFeatureEngineer):
    """Initialize the PCA variance explained diagnostic.

    Args:
        pca_engineer: A fitted PCAFeatureEngineer instance. Must have been
            fitted on data (i.e., calc_and_get_features_df has been called).
    """
    self.pca_engineer = pca_engineer

plot

plot(n_components: int = None, show_cumulative: bool = True) -> Figure

Create a bar chart of explained variance ratios.

Parameters:

Name Type Description Default
n_components int

Number of components to show. If None, shows all components.

None
show_cumulative bool

If True, adds a line showing cumulative variance explained.

True

Returns:

Type Description
Figure

Plotly figure object ready for display or further customization.

Raises:

Type Description
AttributeError

If the PCA engineer has not been fitted yet.

Source code in energy_repset/diagnostics/feature_space/pca_variance_explained.py
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
def plot(self, n_components: int = None, show_cumulative: bool = True) -> go.Figure:
    """Create a bar chart of explained variance ratios.

    Args:
        n_components: Number of components to show. If None, shows all components.
        show_cumulative: If True, adds a line showing cumulative variance explained.

    Returns:
        Plotly figure object ready for display or further customization.

    Raises:
        AttributeError: If the PCA engineer has not been fitted yet.
    """
    # Get variance ratios
    if not hasattr(self.pca_engineer, 'explained_variance_ratio_'):
        raise AttributeError(
            "PCA engineer has not been fitted. Call calc_and_get_features_df() first."
        )

    variance_ratio = self.pca_engineer.explained_variance_ratio_

    # Limit to requested number of components
    if n_components is not None:
        variance_ratio = variance_ratio[:n_components]

    # Prepare data
    n = len(variance_ratio)
    component_labels = [f'PC{i}' for i in range(n)]
    cumulative_variance = variance_ratio.cumsum()

    # Create figure
    fig = go.Figure()

    # Add variance bars
    fig.add_trace(go.Bar(
        x=component_labels,
        y=variance_ratio,
        name='Individual',
        marker_color='lightblue',
        text=[f'{v:.1%}' for v in variance_ratio],
        textposition='outside',
    ))

    # Add cumulative line if requested
    if show_cumulative:
        fig.add_trace(go.Scatter(
            x=component_labels,
            y=cumulative_variance,
            name='Cumulative',
            mode='lines+markers',
            line=dict(color='red', width=2),
            yaxis='y2',
            text=[f'{v:.1%}' for v in cumulative_variance],
            textposition='top center',
        ))

    # Update layout
    layout_kwargs = dict(
        xaxis_title='Principal Component',
        yaxis_title='Explained Variance Ratio',
        hovermode='x unified',
        yaxis=dict(tickformat='.0%'),
    )

    if show_cumulative:
        layout_kwargs['yaxis2'] = dict(
            title='Cumulative Variance',
            overlaying='y',
            side='right',
            tickformat='.0%',
            range=[0, 1.05],
        )

    fig.update_layout(**layout_kwargs)

    return fig

FeatureCorrelationHeatmap

Visualize correlation matrix of features.

Creates an interactive heatmap showing Pearson correlations between all features in the feature matrix. Helps identify redundant features and understand feature relationships. Can optionally show only the lower triangle to avoid redundancy.

Examples:

>>> # Visualize all feature correlations
>>> heatmap = FeatureCorrelationHeatmap()
>>> fig = heatmap.plot(context.df_features)
>>> fig.update_layout(title='Feature Correlation Matrix')
>>> fig.show()

>>> # Show only lower triangle
>>> fig = heatmap.plot(context.df_features, show_lower_only=True)

>>> # Subset of features
>>> selected_features = context.df_features[['pc_0', 'pc_1', 'mean__demand']]
>>> fig = heatmap.plot(selected_features)
Source code in energy_repset/diagnostics/feature_space/feature_correlation_heatmap.py
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
class FeatureCorrelationHeatmap:
    """Visualize correlation matrix of features.

    Creates an interactive heatmap showing Pearson correlations between all features
    in the feature matrix. Helps identify redundant features and understand feature
    relationships. Can optionally show only the lower triangle to avoid redundancy.

    Examples:

        >>> # Visualize all feature correlations
        >>> heatmap = FeatureCorrelationHeatmap()
        >>> fig = heatmap.plot(context.df_features)
        >>> fig.update_layout(title='Feature Correlation Matrix')
        >>> fig.show()

        >>> # Show only lower triangle
        >>> fig = heatmap.plot(context.df_features, show_lower_only=True)

        >>> # Subset of features
        >>> selected_features = context.df_features[['pc_0', 'pc_1', 'mean__demand']]
        >>> fig = heatmap.plot(selected_features)
    """

    def __init__(self):
        """Initialize the feature correlation heatmap diagnostic."""
        pass

    def plot(
        self,
        df_features: pd.DataFrame,
        method: str = 'pearson',
        show_lower_only: bool = False,
    ) -> go.Figure:
        """Create a heatmap of feature correlations.

        Args:
            df_features: Feature matrix with slices as rows, features as columns.
            method: Correlation method ('pearson', 'spearman', or 'kendall').
                Default is 'pearson'.
            show_lower_only: If True, shows only the lower triangle of the
                correlation matrix (removes redundant upper triangle and diagonal).

        Returns:
            Plotly figure object ready for display or further customization.

        Raises:
            ValueError: If method is not one of the supported correlation methods.
        """
        if method not in ['pearson', 'spearman', 'kendall']:
            raise ValueError(
                f"method must be 'pearson', 'spearman', or 'kendall', got '{method}'"
            )

        # Calculate correlation matrix
        corr_matrix = df_features.corr(method=method)

        # Mask upper triangle if requested
        if show_lower_only:
            mask = pd.DataFrame(
                False,
                index=corr_matrix.index,
                columns=corr_matrix.columns
            )
            # Set upper triangle and diagonal to True (to be masked)
            for i in range(len(corr_matrix)):
                for j in range(i, len(corr_matrix)):
                    mask.iloc[i, j] = True

            # Apply mask by setting values to NaN
            corr_matrix = corr_matrix.where(~mask)

        # Create heatmap
        fig = px.imshow(
            corr_matrix,
            x=corr_matrix.columns,
            y=corr_matrix.index,
            color_continuous_scale='RdBu_r',
            color_continuous_midpoint=0,
            zmin=-1,
            zmax=1,
            aspect='auto',
        )

        # Update layout for better readability
        fig.update_layout(
            xaxis_title='',
            yaxis_title='',
            coloraxis_colorbar=dict(title='Correlation'),
        )

        # Improve text readability
        fig.update_traces(
            text=corr_matrix.round(2).values,
            texttemplate='%{text}',
            textfont=dict(size=10),
        )

        return fig

__init__

__init__()

Initialize the feature correlation heatmap diagnostic.

Source code in energy_repset/diagnostics/feature_space/feature_correlation_heatmap.py
31
32
33
def __init__(self):
    """Initialize the feature correlation heatmap diagnostic."""
    pass

plot

plot(df_features: DataFrame, method: str = 'pearson', show_lower_only: bool = False) -> Figure

Create a heatmap of feature correlations.

Parameters:

Name Type Description Default
df_features DataFrame

Feature matrix with slices as rows, features as columns.

required
method str

Correlation method ('pearson', 'spearman', or 'kendall'). Default is 'pearson'.

'pearson'
show_lower_only bool

If True, shows only the lower triangle of the correlation matrix (removes redundant upper triangle and diagonal).

False

Returns:

Type Description
Figure

Plotly figure object ready for display or further customization.

Raises:

Type Description
ValueError

If method is not one of the supported correlation methods.

Source code in energy_repset/diagnostics/feature_space/feature_correlation_heatmap.py
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
def plot(
    self,
    df_features: pd.DataFrame,
    method: str = 'pearson',
    show_lower_only: bool = False,
) -> go.Figure:
    """Create a heatmap of feature correlations.

    Args:
        df_features: Feature matrix with slices as rows, features as columns.
        method: Correlation method ('pearson', 'spearman', or 'kendall').
            Default is 'pearson'.
        show_lower_only: If True, shows only the lower triangle of the
            correlation matrix (removes redundant upper triangle and diagonal).

    Returns:
        Plotly figure object ready for display or further customization.

    Raises:
        ValueError: If method is not one of the supported correlation methods.
    """
    if method not in ['pearson', 'spearman', 'kendall']:
        raise ValueError(
            f"method must be 'pearson', 'spearman', or 'kendall', got '{method}'"
        )

    # Calculate correlation matrix
    corr_matrix = df_features.corr(method=method)

    # Mask upper triangle if requested
    if show_lower_only:
        mask = pd.DataFrame(
            False,
            index=corr_matrix.index,
            columns=corr_matrix.columns
        )
        # Set upper triangle and diagonal to True (to be masked)
        for i in range(len(corr_matrix)):
            for j in range(i, len(corr_matrix)):
                mask.iloc[i, j] = True

        # Apply mask by setting values to NaN
        corr_matrix = corr_matrix.where(~mask)

    # Create heatmap
    fig = px.imshow(
        corr_matrix,
        x=corr_matrix.columns,
        y=corr_matrix.index,
        color_continuous_scale='RdBu_r',
        color_continuous_midpoint=0,
        zmin=-1,
        zmax=1,
        aspect='auto',
    )

    # Update layout for better readability
    fig.update_layout(
        xaxis_title='',
        yaxis_title='',
        coloraxis_colorbar=dict(title='Correlation'),
    )

    # Improve text readability
    fig.update_traces(
        text=corr_matrix.round(2).values,
        texttemplate='%{text}',
        textfont=dict(size=10),
    )

    return fig

FeatureDistributions

Visualize distributions of all features as histograms.

Creates a grid of histograms showing the distribution of each feature across all slices. Helps identify feature scales, skewness, and potential outliers. Useful for understanding the feature space before selection.

Examples:

>>> # Visualize all feature distributions
>>> dist_plot = FeatureDistributions()
>>> fig = dist_plot.plot(context.df_features)
>>> fig.update_layout(title='Feature Distributions')
>>> fig.show()

>>> # Subset of features
>>> selected_features = context.df_features[['pc_0', 'pc_1', 'mean__demand']]
>>> fig = dist_plot.plot(selected_features)

>>> # With custom bin count
>>> fig = dist_plot.plot(context.df_features, nbins=30)
Source code in energy_repset/diagnostics/feature_space/feature_distributions.py
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
class FeatureDistributions:
    """Visualize distributions of all features as histograms.

    Creates a grid of histograms showing the distribution of each feature across
    all slices. Helps identify feature scales, skewness, and potential outliers.
    Useful for understanding the feature space before selection.

    Examples:

        >>> # Visualize all feature distributions
        >>> dist_plot = FeatureDistributions()
        >>> fig = dist_plot.plot(context.df_features)
        >>> fig.update_layout(title='Feature Distributions')
        >>> fig.show()

        >>> # Subset of features
        >>> selected_features = context.df_features[['pc_0', 'pc_1', 'mean__demand']]
        >>> fig = dist_plot.plot(selected_features)

        >>> # With custom bin count
        >>> fig = dist_plot.plot(context.df_features, nbins=30)
    """

    def __init__(self):
        """Initialize the feature distributions diagnostic."""
        pass

    def plot(
        self,
        df_features: pd.DataFrame,
        nbins: int = 20,
        cols: int = 3,
    ) -> go.Figure:
        """Create a grid of histograms for all features.

        Args:
            df_features: Feature matrix with slices as rows, features as columns.
            nbins: Number of bins for each histogram. Default is 20.
            cols: Number of columns in the subplot grid. Default is 3.

        Returns:
            Plotly figure object ready for display or further customization.

        Raises:
            ValueError: If df_features is empty or nbins/cols are invalid.
        """
        if df_features.empty:
            raise ValueError("df_features cannot be empty")
        if nbins <= 0:
            raise ValueError("nbins must be positive")
        if cols <= 0:
            raise ValueError("cols must be positive")

        features = list(df_features.columns)
        n_features = len(features)

        # Calculate grid dimensions
        rows = (n_features + cols - 1) // cols  # Ceiling division

        # Create subplots
        fig = make_subplots(
            rows=rows,
            cols=cols,
            subplot_titles=features,
            vertical_spacing=0.12 / rows if rows > 1 else 0.1,
            horizontal_spacing=0.1 / cols if cols > 1 else 0.1,
        )

        # Add histogram for each feature
        for idx, feature in enumerate(features):
            row = idx // cols + 1
            col = idx % cols + 1

            fig.add_trace(
                go.Histogram(
                    x=df_features[feature],
                    nbinsx=nbins,
                    name=feature,
                    showlegend=False,
                    marker_color='lightblue',
                ),
                row=row,
                col=col,
            )

            # Update axes labels
            fig.update_xaxes(title_text=feature, row=row, col=col)
            fig.update_yaxes(title_text='Count', row=row, col=col)

        # Update overall layout
        fig.update_layout(
            height=300 * rows,
            showlegend=False,
        )

        return fig

__init__

__init__()

Initialize the feature distributions diagnostic.

Source code in energy_repset/diagnostics/feature_space/feature_distributions.py
31
32
33
def __init__(self):
    """Initialize the feature distributions diagnostic."""
    pass

plot

plot(df_features: DataFrame, nbins: int = 20, cols: int = 3) -> Figure

Create a grid of histograms for all features.

Parameters:

Name Type Description Default
df_features DataFrame

Feature matrix with slices as rows, features as columns.

required
nbins int

Number of bins for each histogram. Default is 20.

20
cols int

Number of columns in the subplot grid. Default is 3.

3

Returns:

Type Description
Figure

Plotly figure object ready for display or further customization.

Raises:

Type Description
ValueError

If df_features is empty or nbins/cols are invalid.

Source code in energy_repset/diagnostics/feature_space/feature_distributions.py
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
def plot(
    self,
    df_features: pd.DataFrame,
    nbins: int = 20,
    cols: int = 3,
) -> go.Figure:
    """Create a grid of histograms for all features.

    Args:
        df_features: Feature matrix with slices as rows, features as columns.
        nbins: Number of bins for each histogram. Default is 20.
        cols: Number of columns in the subplot grid. Default is 3.

    Returns:
        Plotly figure object ready for display or further customization.

    Raises:
        ValueError: If df_features is empty or nbins/cols are invalid.
    """
    if df_features.empty:
        raise ValueError("df_features cannot be empty")
    if nbins <= 0:
        raise ValueError("nbins must be positive")
    if cols <= 0:
        raise ValueError("cols must be positive")

    features = list(df_features.columns)
    n_features = len(features)

    # Calculate grid dimensions
    rows = (n_features + cols - 1) // cols  # Ceiling division

    # Create subplots
    fig = make_subplots(
        rows=rows,
        cols=cols,
        subplot_titles=features,
        vertical_spacing=0.12 / rows if rows > 1 else 0.1,
        horizontal_spacing=0.1 / cols if cols > 1 else 0.1,
    )

    # Add histogram for each feature
    for idx, feature in enumerate(features):
        row = idx // cols + 1
        col = idx % cols + 1

        fig.add_trace(
            go.Histogram(
                x=df_features[feature],
                nbinsx=nbins,
                name=feature,
                showlegend=False,
                marker_color='lightblue',
            ),
            row=row,
            col=col,
        )

        # Update axes labels
        fig.update_xaxes(title_text=feature, row=row, col=col)
        fig.update_yaxes(title_text='Count', row=row, col=col)

    # Update overall layout
    fig.update_layout(
        height=300 * rows,
        showlegend=False,
    )

    return fig

Score Components

DistributionOverlayECDF

Overlay empirical cumulative distribution functions (ECDF) to compare distributions.

Creates a plot showing the ECDF of a variable for both the full dataset and a selection. This helps visualize how well the selection represents the full distribution, which is what WassersteinFidelity measures.

Examples:

>>> # Compare demand distribution
>>> ecdf_plot = DistributionOverlayECDF()
>>> full_data = context.df_raw['demand']
>>> selected_indices = context.slicer.get_indices_for_slices(result.selection)
>>> selected_data = context.df_raw.loc[selected_indices, 'demand']
>>> fig = ecdf_plot.plot(full_data, selected_data)
>>> fig.update_layout(title='Demand Distribution: Full vs Selected')
>>> fig.show()

>>> # Alternative: using iloc
>>> selection_mask = context.df_raw.index.isin(selected_indices)
>>> fig = ecdf_plot.plot(
...     context.df_raw['wind'],
...     context.df_raw.loc[selection_mask, 'wind']
... )
Source code in energy_repset/diagnostics/score_components/distribution_overlay.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
class DistributionOverlayECDF:
    """Overlay empirical cumulative distribution functions (ECDF) to compare distributions.

    Creates a plot showing the ECDF of a variable for both the full dataset and
    a selection. This helps visualize how well the selection represents the full
    distribution, which is what WassersteinFidelity measures.

    Examples:

        >>> # Compare demand distribution
        >>> ecdf_plot = DistributionOverlayECDF()
        >>> full_data = context.df_raw['demand']
        >>> selected_indices = context.slicer.get_indices_for_slices(result.selection)
        >>> selected_data = context.df_raw.loc[selected_indices, 'demand']
        >>> fig = ecdf_plot.plot(full_data, selected_data)
        >>> fig.update_layout(title='Demand Distribution: Full vs Selected')
        >>> fig.show()

        >>> # Alternative: using iloc
        >>> selection_mask = context.df_raw.index.isin(selected_indices)
        >>> fig = ecdf_plot.plot(
        ...     context.df_raw['wind'],
        ...     context.df_raw.loc[selection_mask, 'wind']
        ... )
    """

    def __init__(self):
        """Initialize the ECDF overlay diagnostic."""
        pass

    def plot(
        self,
        df_full: pd.Series,
        df_selection: pd.Series,
        full_label: str = 'Full',
        selection_label: str = 'Selection',
    ) -> go.Figure:
        """Create an ECDF overlay plot.

        Args:
            df_full: Series containing values for the full dataset.
            df_selection: Series containing values for the selection.
            full_label: Label for the full dataset in the legend. Default 'Full'.
            selection_label: Label for the selection in the legend. Default 'Selection'.

        Returns:
            Plotly figure object ready for display or further customization.
        """
        # Drop NaN values
        full_values = df_full.dropna().values
        selection_values = df_selection.dropna().values

        # Calculate ECDF for full dataset
        full_sorted = np.sort(full_values)
        full_ecdf = np.arange(1, len(full_sorted) + 1) / len(full_sorted)

        # Calculate ECDF for selection
        selection_sorted = np.sort(selection_values)
        selection_ecdf = np.arange(1, len(selection_sorted) + 1) / len(selection_sorted)

        # Create figure
        fig = go.Figure()

        # Add full dataset ECDF
        fig.add_trace(go.Scatter(
            x=full_sorted,
            y=full_ecdf,
            mode='lines',
            name=full_label,
            line=dict(width=2),
        ))

        # Add selection ECDF
        fig.add_trace(go.Scatter(
            x=selection_sorted,
            y=selection_ecdf,
            mode='lines',
            name=selection_label,
            line=dict(width=2, dash='dash'),
        ))

        # Update layout
        fig.update_layout(
            xaxis_title=df_full.name or 'Value',
            yaxis_title='Cumulative Probability',
            hovermode='x unified',
            yaxis=dict(tickformat='.0%', range=[0, 1]),
        )

        return fig

__init__

__init__()

Initialize the ECDF overlay diagnostic.

Source code in energy_repset/diagnostics/score_components/distribution_overlay.py
35
36
37
def __init__(self):
    """Initialize the ECDF overlay diagnostic."""
    pass

plot

plot(df_full: Series, df_selection: Series, full_label: str = 'Full', selection_label: str = 'Selection') -> Figure

Create an ECDF overlay plot.

Parameters:

Name Type Description Default
df_full Series

Series containing values for the full dataset.

required
df_selection Series

Series containing values for the selection.

required
full_label str

Label for the full dataset in the legend. Default 'Full'.

'Full'
selection_label str

Label for the selection in the legend. Default 'Selection'.

'Selection'

Returns:

Type Description
Figure

Plotly figure object ready for display or further customization.

Source code in energy_repset/diagnostics/score_components/distribution_overlay.py
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
def plot(
    self,
    df_full: pd.Series,
    df_selection: pd.Series,
    full_label: str = 'Full',
    selection_label: str = 'Selection',
) -> go.Figure:
    """Create an ECDF overlay plot.

    Args:
        df_full: Series containing values for the full dataset.
        df_selection: Series containing values for the selection.
        full_label: Label for the full dataset in the legend. Default 'Full'.
        selection_label: Label for the selection in the legend. Default 'Selection'.

    Returns:
        Plotly figure object ready for display or further customization.
    """
    # Drop NaN values
    full_values = df_full.dropna().values
    selection_values = df_selection.dropna().values

    # Calculate ECDF for full dataset
    full_sorted = np.sort(full_values)
    full_ecdf = np.arange(1, len(full_sorted) + 1) / len(full_sorted)

    # Calculate ECDF for selection
    selection_sorted = np.sort(selection_values)
    selection_ecdf = np.arange(1, len(selection_sorted) + 1) / len(selection_sorted)

    # Create figure
    fig = go.Figure()

    # Add full dataset ECDF
    fig.add_trace(go.Scatter(
        x=full_sorted,
        y=full_ecdf,
        mode='lines',
        name=full_label,
        line=dict(width=2),
    ))

    # Add selection ECDF
    fig.add_trace(go.Scatter(
        x=selection_sorted,
        y=selection_ecdf,
        mode='lines',
        name=selection_label,
        line=dict(width=2, dash='dash'),
    ))

    # Update layout
    fig.update_layout(
        xaxis_title=df_full.name or 'Value',
        yaxis_title='Cumulative Probability',
        hovermode='x unified',
        yaxis=dict(tickformat='.0%', range=[0, 1]),
    )

    return fig

DistributionOverlayHistogram

Overlay histograms to compare distributions.

Creates a plot showing normalized histograms of a variable for both the full dataset and a selection. Alternative to ECDF that may be more intuitive for some users. Shows probability density rather than cumulative probability.

Examples:

>>> # Compare demand distribution
>>> hist_plot = DistributionOverlayHistogram()
>>> full_data = context.df_raw['demand']
>>> selected_indices = context.slicer.get_indices_for_slices(result.selection)
>>> selected_data = context.df_raw.loc[selected_indices, 'demand']
>>> fig = hist_plot.plot(full_data, selected_data)
>>> fig.update_layout(title='Demand Distribution: Full vs Selected')
>>> fig.show()

>>> # With custom bin count
>>> fig = hist_plot.plot(full_data, selected_data, nbins=50)

>>> # Using density mode
>>> fig = hist_plot.plot(full_data, selected_data, histnorm='probability density')
Source code in energy_repset/diagnostics/score_components/distribution_overlay.py
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
class DistributionOverlayHistogram:
    """Overlay histograms to compare distributions.

    Creates a plot showing normalized histograms of a variable for both the
    full dataset and a selection. Alternative to ECDF that may be more intuitive
    for some users. Shows probability density rather than cumulative probability.

    Examples:

        >>> # Compare demand distribution
        >>> hist_plot = DistributionOverlayHistogram()
        >>> full_data = context.df_raw['demand']
        >>> selected_indices = context.slicer.get_indices_for_slices(result.selection)
        >>> selected_data = context.df_raw.loc[selected_indices, 'demand']
        >>> fig = hist_plot.plot(full_data, selected_data)
        >>> fig.update_layout(title='Demand Distribution: Full vs Selected')
        >>> fig.show()

        >>> # With custom bin count
        >>> fig = hist_plot.plot(full_data, selected_data, nbins=50)

        >>> # Using density mode
        >>> fig = hist_plot.plot(full_data, selected_data, histnorm='probability density')
    """

    def __init__(self):
        """Initialize the histogram overlay diagnostic."""
        pass

    def plot(
        self,
        df_full: pd.Series,
        df_selection: pd.Series,
        nbins: int = 30,
        histnorm: str = 'probability',
        full_label: str = 'Full',
        selection_label: str = 'Selection',
    ) -> go.Figure:
        """Create a histogram overlay plot.

        Args:
            df_full: Series containing values for the full dataset.
            df_selection: Series containing values for the selection.
            nbins: Number of bins for the histogram. Default is 30.
            histnorm: Histogram normalization mode. Options: 'probability',
                'probability density', 'percent'. Default is 'probability'.
            full_label: Label for the full dataset in the legend. Default 'Full'.
            selection_label: Label for the selection in the legend. Default 'Selection'.

        Returns:
            Plotly figure object ready for display or further customization.

        Raises:
            ValueError: If histnorm is not a valid option.
        """
        valid_histnorms = ['probability', 'probability density', 'percent', '']
        if histnorm not in valid_histnorms:
            raise ValueError(
                f"histnorm must be one of {valid_histnorms}, got '{histnorm}'"
            )

        # Drop NaN values
        full_values = df_full.dropna().values
        selection_values = df_selection.dropna().values

        # Create figure
        fig = go.Figure()

        # Add full dataset histogram
        fig.add_trace(go.Histogram(
            x=full_values,
            name=full_label,
            nbinsx=nbins,
            histnorm=histnorm,
            opacity=0.6,
        ))

        # Add selection histogram
        fig.add_trace(go.Histogram(
            x=selection_values,
            name=selection_label,
            nbinsx=nbins,
            histnorm=histnorm,
            opacity=0.6,
        ))

        # Update layout
        yaxis_title = {
            'probability': 'Probability',
            'probability density': 'Probability Density',
            'percent': 'Percent',
            '': 'Count',
        }.get(histnorm, 'Frequency')

        fig.update_layout(
            xaxis_title=df_full.name or 'Value',
            yaxis_title=yaxis_title,
            barmode='overlay',
            hovermode='x unified',
        )

        return fig

__init__

__init__()

Initialize the histogram overlay diagnostic.

Source code in energy_repset/diagnostics/score_components/distribution_overlay.py
126
127
128
def __init__(self):
    """Initialize the histogram overlay diagnostic."""
    pass

plot

plot(df_full: Series, df_selection: Series, nbins: int = 30, histnorm: str = 'probability', full_label: str = 'Full', selection_label: str = 'Selection') -> Figure

Create a histogram overlay plot.

Parameters:

Name Type Description Default
df_full Series

Series containing values for the full dataset.

required
df_selection Series

Series containing values for the selection.

required
nbins int

Number of bins for the histogram. Default is 30.

30
histnorm str

Histogram normalization mode. Options: 'probability', 'probability density', 'percent'. Default is 'probability'.

'probability'
full_label str

Label for the full dataset in the legend. Default 'Full'.

'Full'
selection_label str

Label for the selection in the legend. Default 'Selection'.

'Selection'

Returns:

Type Description
Figure

Plotly figure object ready for display or further customization.

Raises:

Type Description
ValueError

If histnorm is not a valid option.

Source code in energy_repset/diagnostics/score_components/distribution_overlay.py
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
def plot(
    self,
    df_full: pd.Series,
    df_selection: pd.Series,
    nbins: int = 30,
    histnorm: str = 'probability',
    full_label: str = 'Full',
    selection_label: str = 'Selection',
) -> go.Figure:
    """Create a histogram overlay plot.

    Args:
        df_full: Series containing values for the full dataset.
        df_selection: Series containing values for the selection.
        nbins: Number of bins for the histogram. Default is 30.
        histnorm: Histogram normalization mode. Options: 'probability',
            'probability density', 'percent'. Default is 'probability'.
        full_label: Label for the full dataset in the legend. Default 'Full'.
        selection_label: Label for the selection in the legend. Default 'Selection'.

    Returns:
        Plotly figure object ready for display or further customization.

    Raises:
        ValueError: If histnorm is not a valid option.
    """
    valid_histnorms = ['probability', 'probability density', 'percent', '']
    if histnorm not in valid_histnorms:
        raise ValueError(
            f"histnorm must be one of {valid_histnorms}, got '{histnorm}'"
        )

    # Drop NaN values
    full_values = df_full.dropna().values
    selection_values = df_selection.dropna().values

    # Create figure
    fig = go.Figure()

    # Add full dataset histogram
    fig.add_trace(go.Histogram(
        x=full_values,
        name=full_label,
        nbinsx=nbins,
        histnorm=histnorm,
        opacity=0.6,
    ))

    # Add selection histogram
    fig.add_trace(go.Histogram(
        x=selection_values,
        name=selection_label,
        nbinsx=nbins,
        histnorm=histnorm,
        opacity=0.6,
    ))

    # Update layout
    yaxis_title = {
        'probability': 'Probability',
        'probability density': 'Probability Density',
        'percent': 'Percent',
        '': 'Count',
    }.get(histnorm, 'Frequency')

    fig.update_layout(
        xaxis_title=df_full.name or 'Value',
        yaxis_title=yaxis_title,
        barmode='overlay',
        hovermode='x unified',
    )

    return fig

CorrelationDifferenceHeatmap

Visualize the difference between correlation matrices.

Creates a heatmap showing the difference between the correlation matrix of the full dataset and the selection. This helps identify which variable relationships are well-preserved or poorly-preserved by the selection. Related to CorrelationFidelity score component.

Positive values (red) indicate the selection has stronger correlation than the full dataset. Negative values (blue) indicate weaker correlation.

Examples:

>>> # Compare correlation structure
>>> corr_diff = CorrelationDifferenceHeatmap()
>>> full_data = context.df_raw[['demand', 'wind', 'solar']]
>>> selected_indices = context.slicer.get_indices_for_slices(result.selection)
>>> selected_data = context.df_raw.loc[selected_indices, ['demand', 'wind', 'solar']]
>>> fig = corr_diff.plot(full_data, selected_data)
>>> fig.update_layout(title='Correlation Difference: Selection - Full')
>>> fig.show()

>>> # With Spearman correlation
>>> fig = corr_diff.plot(full_data, selected_data, method='spearman')

>>> # Show only lower triangle
>>> fig = corr_diff.plot(full_data, selected_data, show_lower_only=True)
Source code in energy_repset/diagnostics/score_components/correlation_difference_heatmap.py
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
class CorrelationDifferenceHeatmap:
    """Visualize the difference between correlation matrices.

    Creates a heatmap showing the difference between the correlation matrix of
    the full dataset and the selection. This helps identify which variable
    relationships are well-preserved or poorly-preserved by the selection.
    Related to CorrelationFidelity score component.

    Positive values (red) indicate the selection has stronger correlation than
    the full dataset. Negative values (blue) indicate weaker correlation.

    Examples:

        >>> # Compare correlation structure
        >>> corr_diff = CorrelationDifferenceHeatmap()
        >>> full_data = context.df_raw[['demand', 'wind', 'solar']]
        >>> selected_indices = context.slicer.get_indices_for_slices(result.selection)
        >>> selected_data = context.df_raw.loc[selected_indices, ['demand', 'wind', 'solar']]
        >>> fig = corr_diff.plot(full_data, selected_data)
        >>> fig.update_layout(title='Correlation Difference: Selection - Full')
        >>> fig.show()

        >>> # With Spearman correlation
        >>> fig = corr_diff.plot(full_data, selected_data, method='spearman')

        >>> # Show only lower triangle
        >>> fig = corr_diff.plot(full_data, selected_data, show_lower_only=True)
    """

    def __init__(self):
        """Initialize the correlation difference heatmap diagnostic."""
        pass

    def plot(
        self,
        df_full: pd.DataFrame,
        df_selection: pd.DataFrame,
        method: str = 'pearson',
        show_lower_only: bool = False,
    ) -> go.Figure:
        """Create a heatmap of correlation differences.

        Args:
            df_full: DataFrame containing variables for the full dataset.
            df_selection: DataFrame containing variables for the selection.
                Must have the same columns as df_full.
            method: Correlation method ('pearson', 'spearman', or 'kendall').
                Default is 'pearson'.
            show_lower_only: If True, shows only the lower triangle of the
                difference matrix (removes redundant upper triangle and diagonal).

        Returns:
            Plotly figure object ready for display or further customization.

        Raises:
            ValueError: If method is invalid or columns don't match.
        """
        if method not in ['pearson', 'spearman', 'kendall']:
            raise ValueError(
                f"method must be 'pearson', 'spearman', or 'kendall', got '{method}'"
            )

        if not df_full.columns.equals(df_selection.columns):
            raise ValueError(
                "df_full and df_selection must have the same columns"
            )

        # Calculate correlation matrices
        corr_full = df_full.corr(method=method)
        corr_selection = df_selection.corr(method=method)

        # Calculate difference (selection - full)
        corr_diff = corr_selection - corr_full

        # Mask upper triangle if requested
        if show_lower_only:
            mask = pd.DataFrame(
                False,
                index=corr_diff.index,
                columns=corr_diff.columns
            )
            # Set upper triangle and diagonal to True (to be masked)
            for i in range(len(corr_diff)):
                for j in range(i, len(corr_diff)):
                    mask.iloc[i, j] = True

            # Apply mask by setting values to NaN
            corr_diff = corr_diff.where(~mask)

        # Determine color scale range (symmetric around 0)
        max_abs = max(abs(corr_diff.min().min()), abs(corr_diff.max().max()))
        if pd.isna(max_abs):
            max_abs = 1.0

        # Create heatmap
        fig = px.imshow(
            corr_diff,
            x=corr_diff.columns,
            y=corr_diff.index,
            color_continuous_scale='RdBu_r',
            color_continuous_midpoint=0,
            zmin=-max_abs,
            zmax=max_abs,
            aspect='auto',
        )

        # Update layout for better readability
        fig.update_layout(
            xaxis_title='',
            yaxis_title='',
            coloraxis_colorbar=dict(title='Δ Correlation<br>(Selection - Full)'),
        )

        # Improve text readability
        fig.update_traces(
            text=corr_diff.round(2).values,
            texttemplate='%{text}',
            textfont=dict(size=10),
        )

        return fig

__init__

__init__()

Initialize the correlation difference heatmap diagnostic.

Source code in energy_repset/diagnostics/score_components/correlation_difference_heatmap.py
37
38
39
def __init__(self):
    """Initialize the correlation difference heatmap diagnostic."""
    pass

plot

plot(df_full: DataFrame, df_selection: DataFrame, method: str = 'pearson', show_lower_only: bool = False) -> Figure

Create a heatmap of correlation differences.

Parameters:

Name Type Description Default
df_full DataFrame

DataFrame containing variables for the full dataset.

required
df_selection DataFrame

DataFrame containing variables for the selection. Must have the same columns as df_full.

required
method str

Correlation method ('pearson', 'spearman', or 'kendall'). Default is 'pearson'.

'pearson'
show_lower_only bool

If True, shows only the lower triangle of the difference matrix (removes redundant upper triangle and diagonal).

False

Returns:

Type Description
Figure

Plotly figure object ready for display or further customization.

Raises:

Type Description
ValueError

If method is invalid or columns don't match.

Source code in energy_repset/diagnostics/score_components/correlation_difference_heatmap.py
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
def plot(
    self,
    df_full: pd.DataFrame,
    df_selection: pd.DataFrame,
    method: str = 'pearson',
    show_lower_only: bool = False,
) -> go.Figure:
    """Create a heatmap of correlation differences.

    Args:
        df_full: DataFrame containing variables for the full dataset.
        df_selection: DataFrame containing variables for the selection.
            Must have the same columns as df_full.
        method: Correlation method ('pearson', 'spearman', or 'kendall').
            Default is 'pearson'.
        show_lower_only: If True, shows only the lower triangle of the
            difference matrix (removes redundant upper triangle and diagonal).

    Returns:
        Plotly figure object ready for display or further customization.

    Raises:
        ValueError: If method is invalid or columns don't match.
    """
    if method not in ['pearson', 'spearman', 'kendall']:
        raise ValueError(
            f"method must be 'pearson', 'spearman', or 'kendall', got '{method}'"
        )

    if not df_full.columns.equals(df_selection.columns):
        raise ValueError(
            "df_full and df_selection must have the same columns"
        )

    # Calculate correlation matrices
    corr_full = df_full.corr(method=method)
    corr_selection = df_selection.corr(method=method)

    # Calculate difference (selection - full)
    corr_diff = corr_selection - corr_full

    # Mask upper triangle if requested
    if show_lower_only:
        mask = pd.DataFrame(
            False,
            index=corr_diff.index,
            columns=corr_diff.columns
        )
        # Set upper triangle and diagonal to True (to be masked)
        for i in range(len(corr_diff)):
            for j in range(i, len(corr_diff)):
                mask.iloc[i, j] = True

        # Apply mask by setting values to NaN
        corr_diff = corr_diff.where(~mask)

    # Determine color scale range (symmetric around 0)
    max_abs = max(abs(corr_diff.min().min()), abs(corr_diff.max().max()))
    if pd.isna(max_abs):
        max_abs = 1.0

    # Create heatmap
    fig = px.imshow(
        corr_diff,
        x=corr_diff.columns,
        y=corr_diff.index,
        color_continuous_scale='RdBu_r',
        color_continuous_midpoint=0,
        zmin=-max_abs,
        zmax=max_abs,
        aspect='auto',
    )

    # Update layout for better readability
    fig.update_layout(
        xaxis_title='',
        yaxis_title='',
        coloraxis_colorbar=dict(title='Δ Correlation<br>(Selection - Full)'),
    )

    # Improve text readability
    fig.update_traces(
        text=corr_diff.round(2).values,
        texttemplate='%{text}',
        textfont=dict(size=10),
    )

    return fig

DiurnalProfileOverlay

Overlay mean diurnal (hour-of-day) profiles for full vs selected data.

Creates a plot showing the average value by hour of day for each variable, comparing the full dataset to the selection. This helps visualize how well the selection preserves daily patterns, which is related to DiurnalFidelity score component.

Examples:

>>> # Compare diurnal patterns
>>> diurnal_plot = DiurnalProfileOverlay()
>>> full_data = context.df_raw[['demand', 'wind', 'solar']]
>>> selected_indices = context.slicer.get_indices_for_slices(result.selection)
>>> selected_data = context.df_raw.loc[selected_indices, ['demand', 'wind', 'solar']]
>>> fig = diurnal_plot.plot(full_data, selected_data)
>>> fig.update_layout(title='Diurnal Profiles: Full vs Selected')
>>> fig.show()

>>> # Single variable
>>> fig = diurnal_plot.plot(
...     full_data[['demand']],
...     selected_data[['demand']]
... )

>>> # Subset of variables
>>> fig = diurnal_plot.plot(
...     full_data,
...     selected_data,
...     variables=['demand', 'wind']
... )
Source code in energy_repset/diagnostics/score_components/diurnal_profile_overlay.py
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
class DiurnalProfileOverlay:
    """Overlay mean diurnal (hour-of-day) profiles for full vs selected data.

    Creates a plot showing the average value by hour of day for each variable,
    comparing the full dataset to the selection. This helps visualize how well
    the selection preserves daily patterns, which is related to DiurnalFidelity
    score component.

    Examples:

        >>> # Compare diurnal patterns
        >>> diurnal_plot = DiurnalProfileOverlay()
        >>> full_data = context.df_raw[['demand', 'wind', 'solar']]
        >>> selected_indices = context.slicer.get_indices_for_slices(result.selection)
        >>> selected_data = context.df_raw.loc[selected_indices, ['demand', 'wind', 'solar']]
        >>> fig = diurnal_plot.plot(full_data, selected_data)
        >>> fig.update_layout(title='Diurnal Profiles: Full vs Selected')
        >>> fig.show()

        >>> # Single variable
        >>> fig = diurnal_plot.plot(
        ...     full_data[['demand']],
        ...     selected_data[['demand']]
        ... )

        >>> # Subset of variables
        >>> fig = diurnal_plot.plot(
        ...     full_data,
        ...     selected_data,
        ...     variables=['demand', 'wind']
        ... )
    """

    def __init__(self):
        """Initialize the diurnal profile overlay diagnostic."""
        pass

    def plot(
        self,
        df_full: pd.DataFrame,
        df_selection: pd.DataFrame,
        variables: list[str] = None,
        full_label: str = 'Full',
        selection_label: str = 'Selection',
    ) -> go.Figure:
        """Create a diurnal profile overlay plot.

        Args:
            df_full: DataFrame with DatetimeIndex and variable columns for full dataset.
            df_selection: DataFrame with DatetimeIndex and variable columns for selection.
                Must have the same columns as df_full.
            variables: List of variable names to include. If None, uses all columns.
            full_label: Label suffix for full dataset traces. Default 'Full'.
            selection_label: Label suffix for selection traces. Default 'Selection'.

        Returns:
            Plotly figure object ready for display or further customization.

        Raises:
            ValueError: If DataFrames don't have DatetimeIndex or columns don't match.
        """
        if not isinstance(df_full.index, pd.DatetimeIndex):
            raise ValueError("df_full must have a DatetimeIndex")
        if not isinstance(df_selection.index, pd.DatetimeIndex):
            raise ValueError("df_selection must have a DatetimeIndex")
        if not df_full.columns.equals(df_selection.columns):
            raise ValueError("df_full and df_selection must have the same columns")

        # Determine which variables to plot
        if variables is None:
            variables = list(df_full.columns)
        else:
            # Validate requested variables
            missing = set(variables) - set(df_full.columns)
            if missing:
                raise ValueError(f"Variables not found in DataFrames: {missing}")

        # Extract hour from index
        df_full_with_hour = df_full[variables].copy()
        df_full_with_hour['hour'] = df_full.index.hour

        df_selection_with_hour = df_selection[variables].copy()
        df_selection_with_hour['hour'] = df_selection.index.hour

        # Calculate mean profiles
        full_profile = df_full_with_hour.groupby('hour').mean(numeric_only=True)
        selection_profile = df_selection_with_hour.groupby('hour').mean(numeric_only=True)

        # Create figure
        fig = go.Figure()

        # Add traces for each variable
        for variable in variables:
            # Full dataset trace
            fig.add_trace(go.Scatter(
                x=full_profile.index,
                y=full_profile[variable],
                mode='lines+markers',
                name=f'{variable} ({full_label})',
                line=dict(width=2),
                marker=dict(size=6),
            ))

            # Selection trace
            fig.add_trace(go.Scatter(
                x=selection_profile.index,
                y=selection_profile[variable],
                mode='lines+markers',
                name=f'{variable} ({selection_label})',
                line=dict(width=2, dash='dash'),
                marker=dict(size=6, symbol='diamond'),
            ))

        # Update layout
        fig.update_layout(
            xaxis_title='Hour of Day',
            yaxis_title='Mean Value',
            hovermode='x unified',
            xaxis=dict(
                tickmode='linear',
                tick0=0,
                dtick=2,
                range=[-0.5, 23.5],
            ),
        )

        return fig

__init__

__init__()

Initialize the diurnal profile overlay diagnostic.

Source code in energy_repset/diagnostics/score_components/diurnal_profile_overlay.py
40
41
42
def __init__(self):
    """Initialize the diurnal profile overlay diagnostic."""
    pass

plot

plot(df_full: DataFrame, df_selection: DataFrame, variables: list[str] = None, full_label: str = 'Full', selection_label: str = 'Selection') -> Figure

Create a diurnal profile overlay plot.

Parameters:

Name Type Description Default
df_full DataFrame

DataFrame with DatetimeIndex and variable columns for full dataset.

required
df_selection DataFrame

DataFrame with DatetimeIndex and variable columns for selection. Must have the same columns as df_full.

required
variables list[str]

List of variable names to include. If None, uses all columns.

None
full_label str

Label suffix for full dataset traces. Default 'Full'.

'Full'
selection_label str

Label suffix for selection traces. Default 'Selection'.

'Selection'

Returns:

Type Description
Figure

Plotly figure object ready for display or further customization.

Raises:

Type Description
ValueError

If DataFrames don't have DatetimeIndex or columns don't match.

Source code in energy_repset/diagnostics/score_components/diurnal_profile_overlay.py
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
def plot(
    self,
    df_full: pd.DataFrame,
    df_selection: pd.DataFrame,
    variables: list[str] = None,
    full_label: str = 'Full',
    selection_label: str = 'Selection',
) -> go.Figure:
    """Create a diurnal profile overlay plot.

    Args:
        df_full: DataFrame with DatetimeIndex and variable columns for full dataset.
        df_selection: DataFrame with DatetimeIndex and variable columns for selection.
            Must have the same columns as df_full.
        variables: List of variable names to include. If None, uses all columns.
        full_label: Label suffix for full dataset traces. Default 'Full'.
        selection_label: Label suffix for selection traces. Default 'Selection'.

    Returns:
        Plotly figure object ready for display or further customization.

    Raises:
        ValueError: If DataFrames don't have DatetimeIndex or columns don't match.
    """
    if not isinstance(df_full.index, pd.DatetimeIndex):
        raise ValueError("df_full must have a DatetimeIndex")
    if not isinstance(df_selection.index, pd.DatetimeIndex):
        raise ValueError("df_selection must have a DatetimeIndex")
    if not df_full.columns.equals(df_selection.columns):
        raise ValueError("df_full and df_selection must have the same columns")

    # Determine which variables to plot
    if variables is None:
        variables = list(df_full.columns)
    else:
        # Validate requested variables
        missing = set(variables) - set(df_full.columns)
        if missing:
            raise ValueError(f"Variables not found in DataFrames: {missing}")

    # Extract hour from index
    df_full_with_hour = df_full[variables].copy()
    df_full_with_hour['hour'] = df_full.index.hour

    df_selection_with_hour = df_selection[variables].copy()
    df_selection_with_hour['hour'] = df_selection.index.hour

    # Calculate mean profiles
    full_profile = df_full_with_hour.groupby('hour').mean(numeric_only=True)
    selection_profile = df_selection_with_hour.groupby('hour').mean(numeric_only=True)

    # Create figure
    fig = go.Figure()

    # Add traces for each variable
    for variable in variables:
        # Full dataset trace
        fig.add_trace(go.Scatter(
            x=full_profile.index,
            y=full_profile[variable],
            mode='lines+markers',
            name=f'{variable} ({full_label})',
            line=dict(width=2),
            marker=dict(size=6),
        ))

        # Selection trace
        fig.add_trace(go.Scatter(
            x=selection_profile.index,
            y=selection_profile[variable],
            mode='lines+markers',
            name=f'{variable} ({selection_label})',
            line=dict(width=2, dash='dash'),
            marker=dict(size=6, symbol='diamond'),
        ))

    # Update layout
    fig.update_layout(
        xaxis_title='Hour of Day',
        yaxis_title='Mean Value',
        hovermode='x unified',
        xaxis=dict(
            tickmode='linear',
            tick0=0,
            dtick=2,
            range=[-0.5, 23.5],
        ),
    )

    return fig

Results

ResponsibilityBars

Bar chart showing responsibility weights for selected representatives.

Visualizes the weight distribution across selected periods as computed by a RepresentationModel. Each bar shows how much each representative contributes to the full dataset representation.

Optionally displays a reference line showing uniform weights (1/k) for comparison with non-uniform weighting schemes like cluster-size based weights.

Examples:

>>> from energy_repset.diagnostics.results import ResponsibilityBars
>>>
>>> # After running workflow with result containing weights
>>> weights = result.weights  # e.g., {Period('2024-01'): 0.35, ...}
>>> bars = ResponsibilityBars()
>>> fig = bars.plot(weights, show_uniform_reference=True)
>>> fig.update_layout(title='Responsibility Weights')
>>> fig.show()
Source code in energy_repset/diagnostics/results/responsibility_bars.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
class ResponsibilityBars:
    """Bar chart showing responsibility weights for selected representatives.

    Visualizes the weight distribution across selected periods as computed by
    a RepresentationModel. Each bar shows how much each representative
    contributes to the full dataset representation.

    Optionally displays a reference line showing uniform weights (1/k) for
    comparison with non-uniform weighting schemes like cluster-size based
    weights.

    Examples:

        >>> from energy_repset.diagnostics.results import ResponsibilityBars
        >>>
        >>> # After running workflow with result containing weights
        >>> weights = result.weights  # e.g., {Period('2024-01'): 0.35, ...}
        >>> bars = ResponsibilityBars()
        >>> fig = bars.plot(weights, show_uniform_reference=True)
        >>> fig.update_layout(title='Responsibility Weights')
        >>> fig.show()
    """

    def __init__(self):
        """Initialize ResponsibilityBars diagnostic."""
        pass

    def plot(
        self,
        weights: Dict[Hashable, float],
        show_uniform_reference: bool = True,
    ) -> go.Figure:
        """Create bar chart of responsibility weights.

        Args:
            weights: Dictionary mapping slice identifiers to their weights.
                Weights should sum to 1.0 for meaningful comparison with
                the uniform reference line.
            show_uniform_reference: If True, adds horizontal dashed line
                showing uniform weight (1/k) for comparison.

        Returns:
            Plotly figure with bar chart. X-axis shows slice labels, Y-axis
            shows weight values. Text labels show weights to 3 decimal places.

        Raises:
            ValueError: If weights dictionary is empty.
        """
        if not weights:
            raise ValueError("Weights dictionary cannot be empty")

        # Prepare data for plotting
        df = pd.DataFrame({
            'slice': [str(s) for s in weights.keys()],
            'weight': list(weights.values())
        })

        # Create bar chart
        fig = px.bar(
            df,
            x='slice',
            y='weight',
            text='weight'
        )

        # Format text labels to 3 decimal places, position outside bars
        fig.update_traces(
            texttemplate='%{y:.3f}',
            textposition='outside'
        )

        # Set y-axis range and label
        fig.update_yaxes(
            range=[0, max(df['weight']) * 1.15],  # Add headroom for text labels
            title='Responsibility Weight'
        )

        fig.update_xaxes(title='Selected Period')

        # Add uniform reference line if requested
        if show_uniform_reference and len(weights) > 0:
            uniform_weight = 1.0 / len(weights)
            fig.add_hline(
                y=uniform_weight,
                line_dash='dot',
                annotation_text=f'Uniform ({uniform_weight:.3f})',
                annotation_position='top left'
            )

        return fig

__init__

__init__()

Initialize ResponsibilityBars diagnostic.

Source code in energy_repset/diagnostics/results/responsibility_bars.py
33
34
35
def __init__(self):
    """Initialize ResponsibilityBars diagnostic."""
    pass

plot

plot(weights: dict[Hashable, float], show_uniform_reference: bool = True) -> Figure

Create bar chart of responsibility weights.

Parameters:

Name Type Description Default
weights dict[Hashable, float]

Dictionary mapping slice identifiers to their weights. Weights should sum to 1.0 for meaningful comparison with the uniform reference line.

required
show_uniform_reference bool

If True, adds horizontal dashed line showing uniform weight (1/k) for comparison.

True

Returns:

Type Description
Figure

Plotly figure with bar chart. X-axis shows slice labels, Y-axis

Figure

shows weight values. Text labels show weights to 3 decimal places.

Raises:

Type Description
ValueError

If weights dictionary is empty.

Source code in energy_repset/diagnostics/results/responsibility_bars.py
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
def plot(
    self,
    weights: Dict[Hashable, float],
    show_uniform_reference: bool = True,
) -> go.Figure:
    """Create bar chart of responsibility weights.

    Args:
        weights: Dictionary mapping slice identifiers to their weights.
            Weights should sum to 1.0 for meaningful comparison with
            the uniform reference line.
        show_uniform_reference: If True, adds horizontal dashed line
            showing uniform weight (1/k) for comparison.

    Returns:
        Plotly figure with bar chart. X-axis shows slice labels, Y-axis
        shows weight values. Text labels show weights to 3 decimal places.

    Raises:
        ValueError: If weights dictionary is empty.
    """
    if not weights:
        raise ValueError("Weights dictionary cannot be empty")

    # Prepare data for plotting
    df = pd.DataFrame({
        'slice': [str(s) for s in weights.keys()],
        'weight': list(weights.values())
    })

    # Create bar chart
    fig = px.bar(
        df,
        x='slice',
        y='weight',
        text='weight'
    )

    # Format text labels to 3 decimal places, position outside bars
    fig.update_traces(
        texttemplate='%{y:.3f}',
        textposition='outside'
    )

    # Set y-axis range and label
    fig.update_yaxes(
        range=[0, max(df['weight']) * 1.15],  # Add headroom for text labels
        title='Responsibility Weight'
    )

    fig.update_xaxes(title='Selected Period')

    # Add uniform reference line if requested
    if show_uniform_reference and len(weights) > 0:
        uniform_weight = 1.0 / len(weights)
        fig.add_hline(
            y=uniform_weight,
            line_dash='dot',
            annotation_text=f'Uniform ({uniform_weight:.3f})',
            annotation_position='top left'
        )

    return fig

ParetoScatter2D

2D scatter plot of all evaluated combinations with Pareto front highlighted.

Visualizes the objective space for two objectives, showing: - All evaluated combinations as scatter points - Pareto-optimal solutions highlighted - Selected combination (if provided) marked distinctly - Feasible vs infeasible solutions (if constraints exist)

Parameters:

Name Type Description Default
objective_x str

Name of objective for x-axis.

required
objective_y str

Name of objective for y-axis.

required

Examples:

>>> from energy_repset.diagnostics.results import ParetoScatter2D
>>> scatter = ParetoScatter2D(objective_x='wasserstein', objective_y='correlation')
>>> fig = scatter.plot(
...     search_algorithm=workflow.search_algorithm,
...     selected_combination=result.selection
... )
>>> fig.update_layout(title='Pareto Front: Wasserstein vs Correlation')
>>> fig.show()
Source code in energy_repset/diagnostics/results/pareto_scatter.py
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
class ParetoScatter2D:
    """2D scatter plot of all evaluated combinations with Pareto front highlighted.

    Visualizes the objective space for two objectives, showing:
    - All evaluated combinations as scatter points
    - Pareto-optimal solutions highlighted
    - Selected combination (if provided) marked distinctly
    - Feasible vs infeasible solutions (if constraints exist)

    Args:
        objective_x: Name of objective for x-axis.
        objective_y: Name of objective for y-axis.

    Examples:
        >>> from energy_repset.diagnostics.results import ParetoScatter2D
        >>> scatter = ParetoScatter2D(objective_x='wasserstein', objective_y='correlation')
        >>> fig = scatter.plot(
        ...     search_algorithm=workflow.search_algorithm,
        ...     selected_combination=result.selection
        ... )
        >>> fig.update_layout(title='Pareto Front: Wasserstein vs Correlation')
        >>> fig.show()
    """

    def __init__(self, objective_x: str, objective_y: str):
        """Initialize Pareto scatter diagnostic.

        Args:
            objective_x: Name of objective for x-axis.
            objective_y: Name of objective for y-axis.
        """
        self.objective_x = objective_x
        self.objective_y = objective_y

    def plot(
        self,
        search_algorithm: ObjectiveDrivenCombinatorialSearchAlgorithm,
        selected_combination: SliceCombination | None = None,
    ) -> go.Figure:
        """Create 2D scatter plot of Pareto front.

        Args:
            search_algorithm: Search algorithm after find_selection() has been called.
            selected_combination: Optional combination to highlight (e.g., result.selection).

        Returns:
            Plotly figure with scatter plot.

        Raises:
            ValueError: If find_selection() hasn't been called or objectives not found.
        """
        df = search_algorithm.get_all_scores()

        if self.objective_x not in df.columns:
            raise ValueError(f"Objective '{self.objective_x}' not found in scores")
        if self.objective_y not in df.columns:
            raise ValueError(f"Objective '{self.objective_y}' not found in scores")

        has_pareto = hasattr(search_algorithm.selection_policy, 'pareto_mask')
        pareto_mask = None
        feasible_mask = None

        if has_pareto and search_algorithm.selection_policy.pareto_mask is not None:
            pareto_mask = search_algorithm.selection_policy.pareto_mask
            feasible_mask = search_algorithm.selection_policy.feasible_mask

        fig = go.Figure()

        x_vals = df[self.objective_x]
        y_vals = df[self.objective_y]

        if has_pareto and pareto_mask is not None:
            pareto = pareto_mask.values
            feasible = feasible_mask.values

            infeasible = ~feasible
            if infeasible.any():
                fig.add_trace(go.Scatter(
                    x=x_vals[infeasible],
                    y=y_vals[infeasible],
                    mode='markers',
                    marker=dict(size=6, opacity=0.3),
                    name='Infeasible',
                    hovertemplate=(
                        f'{self.objective_x}: %{{x:.4f}}<br>'
                        f'{self.objective_y}: %{{y:.4f}}<br>'
                        '<extra></extra>'
                    ),
                ))

            dominated = feasible & ~pareto
            if dominated.any():
                fig.add_trace(go.Scatter(
                    x=x_vals[dominated],
                    y=y_vals[dominated],
                    mode='markers',
                    marker=dict(size=6, opacity=0.5),
                    name='Dominated',
                    hovertemplate=(
                        f'{self.objective_x}: %{{x:.4f}}<br>'
                        f'{self.objective_y}: %{{y:.4f}}<br>'
                        '<extra></extra>'
                    ),
                ))

            pareto_points = pareto & feasible
            if pareto_points.any():
                fig.add_trace(go.Scatter(
                    x=x_vals[pareto_points],
                    y=y_vals[pareto_points],
                    mode='markers',
                    marker=dict(size=10, symbol='diamond'),
                    name='Pareto Front',
                    hovertemplate=(
                        f'{self.objective_x}: %{{x:.4f}}<br>'
                        f'{self.objective_y}: %{{y:.4f}}<br>'
                        '<extra></extra>'
                    ),
                ))
        else:
            fig.add_trace(go.Scatter(
                x=x_vals,
                y=y_vals,
                mode='markers',
                marker=dict(size=6, opacity=0.5),
                name='All Combinations',
                hovertemplate=(
                    f'{self.objective_x}: %{{x:.4f}}<br>'
                    f'{self.objective_y}: %{{y:.4f}}<br>'
                    '<extra></extra>'
                ),
            ))

        if selected_combination is not None:
            selected_idx = df['slices'].apply(lambda x: x == selected_combination)
            if selected_idx.any():
                sel_x = x_vals[selected_idx].values[0]
                sel_y = y_vals[selected_idx].values[0]
                fig.add_trace(go.Scatter(
                    x=[sel_x],
                    y=[sel_y],
                    mode='markers',
                    marker=dict(
                        size=15,
                        symbol='star',
                        line=dict(width=2, color='black')
                    ),
                    name='Selected',
                    hovertemplate=(
                        f'{self.objective_x}: %{{x:.4f}}<br>'
                        f'{self.objective_y}: %{{y:.4f}}<br>'
                        '<b>SELECTED</b><br>'
                        '<extra></extra>'
                    ),
                ))

        fig.update_layout(
            xaxis_title=self.objective_x,
            yaxis_title=self.objective_y,
            hovermode='closest',
            showlegend=True,
        )

        return fig

__init__

__init__(objective_x: str, objective_y: str)

Initialize Pareto scatter diagnostic.

Parameters:

Name Type Description Default
objective_x str

Name of objective for x-axis.

required
objective_y str

Name of objective for y-axis.

required
Source code in energy_repset/diagnostics/results/pareto_scatter.py
37
38
39
40
41
42
43
44
45
def __init__(self, objective_x: str, objective_y: str):
    """Initialize Pareto scatter diagnostic.

    Args:
        objective_x: Name of objective for x-axis.
        objective_y: Name of objective for y-axis.
    """
    self.objective_x = objective_x
    self.objective_y = objective_y

plot

plot(search_algorithm: ObjectiveDrivenCombinatorialSearchAlgorithm, selected_combination: SliceCombination | None = None) -> Figure

Create 2D scatter plot of Pareto front.

Parameters:

Name Type Description Default
search_algorithm ObjectiveDrivenCombinatorialSearchAlgorithm

Search algorithm after find_selection() has been called.

required
selected_combination SliceCombination | None

Optional combination to highlight (e.g., result.selection).

None

Returns:

Type Description
Figure

Plotly figure with scatter plot.

Raises:

Type Description
ValueError

If find_selection() hasn't been called or objectives not found.

Source code in energy_repset/diagnostics/results/pareto_scatter.py
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
def plot(
    self,
    search_algorithm: ObjectiveDrivenCombinatorialSearchAlgorithm,
    selected_combination: SliceCombination | None = None,
) -> go.Figure:
    """Create 2D scatter plot of Pareto front.

    Args:
        search_algorithm: Search algorithm after find_selection() has been called.
        selected_combination: Optional combination to highlight (e.g., result.selection).

    Returns:
        Plotly figure with scatter plot.

    Raises:
        ValueError: If find_selection() hasn't been called or objectives not found.
    """
    df = search_algorithm.get_all_scores()

    if self.objective_x not in df.columns:
        raise ValueError(f"Objective '{self.objective_x}' not found in scores")
    if self.objective_y not in df.columns:
        raise ValueError(f"Objective '{self.objective_y}' not found in scores")

    has_pareto = hasattr(search_algorithm.selection_policy, 'pareto_mask')
    pareto_mask = None
    feasible_mask = None

    if has_pareto and search_algorithm.selection_policy.pareto_mask is not None:
        pareto_mask = search_algorithm.selection_policy.pareto_mask
        feasible_mask = search_algorithm.selection_policy.feasible_mask

    fig = go.Figure()

    x_vals = df[self.objective_x]
    y_vals = df[self.objective_y]

    if has_pareto and pareto_mask is not None:
        pareto = pareto_mask.values
        feasible = feasible_mask.values

        infeasible = ~feasible
        if infeasible.any():
            fig.add_trace(go.Scatter(
                x=x_vals[infeasible],
                y=y_vals[infeasible],
                mode='markers',
                marker=dict(size=6, opacity=0.3),
                name='Infeasible',
                hovertemplate=(
                    f'{self.objective_x}: %{{x:.4f}}<br>'
                    f'{self.objective_y}: %{{y:.4f}}<br>'
                    '<extra></extra>'
                ),
            ))

        dominated = feasible & ~pareto
        if dominated.any():
            fig.add_trace(go.Scatter(
                x=x_vals[dominated],
                y=y_vals[dominated],
                mode='markers',
                marker=dict(size=6, opacity=0.5),
                name='Dominated',
                hovertemplate=(
                    f'{self.objective_x}: %{{x:.4f}}<br>'
                    f'{self.objective_y}: %{{y:.4f}}<br>'
                    '<extra></extra>'
                ),
            ))

        pareto_points = pareto & feasible
        if pareto_points.any():
            fig.add_trace(go.Scatter(
                x=x_vals[pareto_points],
                y=y_vals[pareto_points],
                mode='markers',
                marker=dict(size=10, symbol='diamond'),
                name='Pareto Front',
                hovertemplate=(
                    f'{self.objective_x}: %{{x:.4f}}<br>'
                    f'{self.objective_y}: %{{y:.4f}}<br>'
                    '<extra></extra>'
                ),
            ))
    else:
        fig.add_trace(go.Scatter(
            x=x_vals,
            y=y_vals,
            mode='markers',
            marker=dict(size=6, opacity=0.5),
            name='All Combinations',
            hovertemplate=(
                f'{self.objective_x}: %{{x:.4f}}<br>'
                f'{self.objective_y}: %{{y:.4f}}<br>'
                '<extra></extra>'
            ),
        ))

    if selected_combination is not None:
        selected_idx = df['slices'].apply(lambda x: x == selected_combination)
        if selected_idx.any():
            sel_x = x_vals[selected_idx].values[0]
            sel_y = y_vals[selected_idx].values[0]
            fig.add_trace(go.Scatter(
                x=[sel_x],
                y=[sel_y],
                mode='markers',
                marker=dict(
                    size=15,
                    symbol='star',
                    line=dict(width=2, color='black')
                ),
                name='Selected',
                hovertemplate=(
                    f'{self.objective_x}: %{{x:.4f}}<br>'
                    f'{self.objective_y}: %{{y:.4f}}<br>'
                    '<b>SELECTED</b><br>'
                    '<extra></extra>'
                ),
            ))

    fig.update_layout(
        xaxis_title=self.objective_x,
        yaxis_title=self.objective_y,
        hovermode='closest',
        showlegend=True,
    )

    return fig

ParetoScatterMatrix

Scatter matrix of all objectives showing Pareto front.

Creates a scatter plot matrix (SPLOM) showing pairwise relationships between all objectives. Each subplot shows two objectives with Pareto front highlighted.

Parameters:

Name Type Description Default
objectives list[str] | None

List of objective names to include (None = all objectives).

None

Examples:

>>> from energy_repset.diagnostics.results import ParetoScatterMatrix
>>> scatter_matrix = ParetoScatterMatrix(
...     objectives=['wasserstein', 'correlation', 'diurnal']
... )
>>> fig = scatter_matrix.plot(
...     search_algorithm=workflow.search_algorithm,
...     selected_combination=result.selection
... )
>>> fig.update_layout(title='Pareto Front: All Objectives')
>>> fig.show()
Source code in energy_repset/diagnostics/results/pareto_scatter.py
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
class ParetoScatterMatrix:
    """Scatter matrix of all objectives showing Pareto front.

    Creates a scatter plot matrix (SPLOM) showing pairwise relationships between
    all objectives. Each subplot shows two objectives with Pareto front highlighted.

    Args:
        objectives: List of objective names to include (None = all objectives).

    Examples:
        >>> from energy_repset.diagnostics.results import ParetoScatterMatrix
        >>> scatter_matrix = ParetoScatterMatrix(
        ...     objectives=['wasserstein', 'correlation', 'diurnal']
        ... )
        >>> fig = scatter_matrix.plot(
        ...     search_algorithm=workflow.search_algorithm,
        ...     selected_combination=result.selection
        ... )
        >>> fig.update_layout(title='Pareto Front: All Objectives')
        >>> fig.show()
    """

    def __init__(self, objectives: list[str] | None = None):
        """Initialize Pareto scatter matrix diagnostic.

        Args:
            objectives: List of objective names to include (None = all).
        """
        self.objectives = objectives

    def plot(
        self,
        search_algorithm: ObjectiveDrivenCombinatorialSearchAlgorithm,
        selected_combination: SliceCombination | None = None,
    ) -> go.Figure:
        """Create scatter matrix of Pareto front.

        Args:
            search_algorithm: Search algorithm after find_selection() has been called.
            selected_combination: Optional combination to highlight.

        Returns:
            Plotly figure with scatter matrix.

        Raises:
            ValueError: If find_selection() hasn't been called.
        """
        df = search_algorithm.get_all_scores()

        if self.objectives is None:
            obj_cols = [col for col in df.columns if col not in ['slices', 'label']]
        else:
            obj_cols = self.objectives
            for obj in obj_cols:
                if obj not in df.columns:
                    raise ValueError(f"Objective '{obj}' not found in scores")

        if len(obj_cols) < 2:
            raise ValueError("Need at least 2 objectives for scatter matrix")

        has_pareto = hasattr(search_algorithm.selection_policy, 'pareto_mask')
        pareto_mask = None
        feasible_mask = None

        if has_pareto and search_algorithm.selection_policy.pareto_mask is not None:
            pareto_mask = search_algorithm.selection_policy.pareto_mask
            feasible_mask = search_algorithm.selection_policy.feasible_mask

        color_col = None
        if has_pareto and pareto_mask is not None:
            df_plot = df.copy()
            pareto = pareto_mask.values
            feasible = feasible_mask.values
            df_plot['category'] = 'Dominated'
            df_plot.loc[~feasible, 'category'] = 'Infeasible'
            df_plot.loc[pareto & feasible, 'category'] = 'Pareto Front'
            color_col = 'category'
        else:
            df_plot = df.copy()

        dimensions = []
        for obj in obj_cols:
            dimensions.append(dict(
                label=obj,
                values=df_plot[obj]
            ))

        fig = go.Figure(data=go.Splom(
            dimensions=dimensions,
            marker=dict(
                size=5,
                color=df_plot[color_col].map({
                    'Infeasible': 0,
                    'Dominated': 1,
                    'Pareto Front': 2
                }) if color_col else None,
                colorscale=[[0, 'lightgray'], [0.5, 'steelblue'], [1, 'darkorange']] if color_col else None,
                showscale=False,
                line=dict(width=0.5, color='white')
            ),
            text=df_plot['label'] if 'label' in df_plot else None,
            diagonal_visible=False,
            showupperhalf=False,
        ))

        if selected_combination is not None:
            selected_idx = df['slices'].apply(lambda x: x == selected_combination)
            if selected_idx.any():
                selected_vals = [df_plot.loc[selected_idx, obj].values[0] for obj in obj_cols]
                n_dims = len(obj_cols)
                for i in range(n_dims):
                    for j in range(i):
                        xaxis = f'x{j*n_dims + i + 1}' if (j*n_dims + i) > 0 else 'x'
                        yaxis = f'y{j*n_dims + i + 1}' if (j*n_dims + i) > 0 else 'y'

        fig.update_layout(
            title='Scatter Matrix: All Objectives',
            height=150 * len(obj_cols),
            width=150 * len(obj_cols),
            showlegend=False,
        )

        return fig

__init__

__init__(objectives: list[str] | None = None)

Initialize Pareto scatter matrix diagnostic.

Parameters:

Name Type Description Default
objectives list[str] | None

List of objective names to include (None = all).

None
Source code in energy_repset/diagnostics/results/pareto_scatter.py
201
202
203
204
205
206
207
def __init__(self, objectives: list[str] | None = None):
    """Initialize Pareto scatter matrix diagnostic.

    Args:
        objectives: List of objective names to include (None = all).
    """
    self.objectives = objectives

plot

plot(search_algorithm: ObjectiveDrivenCombinatorialSearchAlgorithm, selected_combination: SliceCombination | None = None) -> Figure

Create scatter matrix of Pareto front.

Parameters:

Name Type Description Default
search_algorithm ObjectiveDrivenCombinatorialSearchAlgorithm

Search algorithm after find_selection() has been called.

required
selected_combination SliceCombination | None

Optional combination to highlight.

None

Returns:

Type Description
Figure

Plotly figure with scatter matrix.

Raises:

Type Description
ValueError

If find_selection() hasn't been called.

Source code in energy_repset/diagnostics/results/pareto_scatter.py
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
def plot(
    self,
    search_algorithm: ObjectiveDrivenCombinatorialSearchAlgorithm,
    selected_combination: SliceCombination | None = None,
) -> go.Figure:
    """Create scatter matrix of Pareto front.

    Args:
        search_algorithm: Search algorithm after find_selection() has been called.
        selected_combination: Optional combination to highlight.

    Returns:
        Plotly figure with scatter matrix.

    Raises:
        ValueError: If find_selection() hasn't been called.
    """
    df = search_algorithm.get_all_scores()

    if self.objectives is None:
        obj_cols = [col for col in df.columns if col not in ['slices', 'label']]
    else:
        obj_cols = self.objectives
        for obj in obj_cols:
            if obj not in df.columns:
                raise ValueError(f"Objective '{obj}' not found in scores")

    if len(obj_cols) < 2:
        raise ValueError("Need at least 2 objectives for scatter matrix")

    has_pareto = hasattr(search_algorithm.selection_policy, 'pareto_mask')
    pareto_mask = None
    feasible_mask = None

    if has_pareto and search_algorithm.selection_policy.pareto_mask is not None:
        pareto_mask = search_algorithm.selection_policy.pareto_mask
        feasible_mask = search_algorithm.selection_policy.feasible_mask

    color_col = None
    if has_pareto and pareto_mask is not None:
        df_plot = df.copy()
        pareto = pareto_mask.values
        feasible = feasible_mask.values
        df_plot['category'] = 'Dominated'
        df_plot.loc[~feasible, 'category'] = 'Infeasible'
        df_plot.loc[pareto & feasible, 'category'] = 'Pareto Front'
        color_col = 'category'
    else:
        df_plot = df.copy()

    dimensions = []
    for obj in obj_cols:
        dimensions.append(dict(
            label=obj,
            values=df_plot[obj]
        ))

    fig = go.Figure(data=go.Splom(
        dimensions=dimensions,
        marker=dict(
            size=5,
            color=df_plot[color_col].map({
                'Infeasible': 0,
                'Dominated': 1,
                'Pareto Front': 2
            }) if color_col else None,
            colorscale=[[0, 'lightgray'], [0.5, 'steelblue'], [1, 'darkorange']] if color_col else None,
            showscale=False,
            line=dict(width=0.5, color='white')
        ),
        text=df_plot['label'] if 'label' in df_plot else None,
        diagonal_visible=False,
        showupperhalf=False,
    ))

    if selected_combination is not None:
        selected_idx = df['slices'].apply(lambda x: x == selected_combination)
        if selected_idx.any():
            selected_vals = [df_plot.loc[selected_idx, obj].values[0] for obj in obj_cols]
            n_dims = len(obj_cols)
            for i in range(n_dims):
                for j in range(i):
                    xaxis = f'x{j*n_dims + i + 1}' if (j*n_dims + i) > 0 else 'x'
                    yaxis = f'y{j*n_dims + i + 1}' if (j*n_dims + i) > 0 else 'y'

    fig.update_layout(
        title='Scatter Matrix: All Objectives',
        height=150 * len(obj_cols),
        width=150 * len(obj_cols),
        showlegend=False,
    )

    return fig

ParetoParallelCoordinates

Parallel coordinates plot of Pareto front.

Visualizes multi-objective trade-offs using parallel coordinates where each vertical axis represents one objective. Lines connecting axes show individual solutions, with Pareto-optimal solutions highlighted.

Parameters:

Name Type Description Default
objectives list[str] | None

List of objective names to include (None = all objectives).

None

Examples:

>>> from energy_repset.diagnostics.results import ParetoParallelCoordinates
>>> parallel = ParetoParallelCoordinates()
>>> fig = parallel.plot(search_algorithm=workflow.search_algorithm)
>>> fig.update_layout(title='Pareto Front: Parallel Coordinates')
>>> fig.show()
Source code in energy_repset/diagnostics/results/pareto_parallel_coords.py
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
class ParetoParallelCoordinates:
    """Parallel coordinates plot of Pareto front.

    Visualizes multi-objective trade-offs using parallel coordinates where each
    vertical axis represents one objective. Lines connecting axes show individual
    solutions, with Pareto-optimal solutions highlighted.

    Args:
        objectives: List of objective names to include (None = all objectives).

    Examples:
        >>> from energy_repset.diagnostics.results import ParetoParallelCoordinates
        >>> parallel = ParetoParallelCoordinates()
        >>> fig = parallel.plot(search_algorithm=workflow.search_algorithm)
        >>> fig.update_layout(title='Pareto Front: Parallel Coordinates')
        >>> fig.show()
    """

    def __init__(self, objectives: list[str] | None = None):
        """Initialize parallel coordinates diagnostic.

        Args:
            objectives: List of objective names to include (None = all).
        """
        self.objectives = objectives

    def plot(
        self,
        search_algorithm: ObjectiveDrivenCombinatorialSearchAlgorithm,
    ) -> go.Figure:
        """Create parallel coordinates plot of Pareto front.

        Args:
            search_algorithm: Search algorithm after find_selection() has been called.

        Returns:
            Plotly figure with parallel coordinates plot.

        Raises:
            ValueError: If find_selection() hasn't been called.
        """
        df = search_algorithm.get_all_scores()

        if self.objectives is None:
            obj_cols = [col for col in df.columns if col not in ['slices', 'label']]
        else:
            obj_cols = self.objectives
            for obj in obj_cols:
                if obj not in df.columns:
                    raise ValueError(f"Objective '{obj}' not found in scores")

        if len(obj_cols) < 2:
            raise ValueError("Need at least 2 objectives for parallel coordinates")

        has_pareto = hasattr(search_algorithm.selection_policy, 'pareto_mask')
        pareto_mask = None
        feasible_mask = None

        if has_pareto and search_algorithm.selection_policy.pareto_mask is not None:
            pareto_mask = search_algorithm.selection_policy.pareto_mask
            feasible_mask = search_algorithm.selection_policy.feasible_mask

        dimensions = []
        for obj in obj_cols:
            dimensions.append(dict(
                label=obj,
                values=df[obj]
            ))

        if has_pareto and pareto_mask is not None:
            pareto = pareto_mask.values
            feasible = feasible_mask.values

            color_values = []
            for i in range(len(df)):
                if not feasible[i]:
                    color_values.append(0)
                elif pareto[i]:
                    color_values.append(2)
                else:
                    color_values.append(1)

            fig = go.Figure(data=go.Parcoords(
                dimensions=dimensions,
                line=dict(
                    color=color_values,
                    colorscale=[
                        [0, 'lightgray'],
                        [0.5, 'steelblue'],
                        [1, 'darkorange']
                    ],
                    showscale=True,
                    cmin=0,
                    cmax=2,
                    colorbar=dict(
                        title='Status',
                        tickvals=[0, 1, 2],
                        ticktext=['Infeasible', 'Dominated', 'Pareto'],
                    )
                )
            ))
        else:
            fig = go.Figure(data=go.Parcoords(
                dimensions=dimensions,
                line=dict(
                    color='steelblue',
                    showscale=False,
                )
            ))

        fig.update_layout(
            title='Parallel Coordinates: All Objectives',
            height=500,
        )

        return fig

__init__

__init__(objectives: list[str] | None = None)

Initialize parallel coordinates diagnostic.

Parameters:

Name Type Description Default
objectives list[str] | None

List of objective names to include (None = all).

None
Source code in energy_repset/diagnostics/results/pareto_parallel_coords.py
29
30
31
32
33
34
35
def __init__(self, objectives: list[str] | None = None):
    """Initialize parallel coordinates diagnostic.

    Args:
        objectives: List of objective names to include (None = all).
    """
    self.objectives = objectives

plot

plot(search_algorithm: ObjectiveDrivenCombinatorialSearchAlgorithm) -> Figure

Create parallel coordinates plot of Pareto front.

Parameters:

Name Type Description Default
search_algorithm ObjectiveDrivenCombinatorialSearchAlgorithm

Search algorithm after find_selection() has been called.

required

Returns:

Type Description
Figure

Plotly figure with parallel coordinates plot.

Raises:

Type Description
ValueError

If find_selection() hasn't been called.

Source code in energy_repset/diagnostics/results/pareto_parallel_coords.py
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
def plot(
    self,
    search_algorithm: ObjectiveDrivenCombinatorialSearchAlgorithm,
) -> go.Figure:
    """Create parallel coordinates plot of Pareto front.

    Args:
        search_algorithm: Search algorithm after find_selection() has been called.

    Returns:
        Plotly figure with parallel coordinates plot.

    Raises:
        ValueError: If find_selection() hasn't been called.
    """
    df = search_algorithm.get_all_scores()

    if self.objectives is None:
        obj_cols = [col for col in df.columns if col not in ['slices', 'label']]
    else:
        obj_cols = self.objectives
        for obj in obj_cols:
            if obj not in df.columns:
                raise ValueError(f"Objective '{obj}' not found in scores")

    if len(obj_cols) < 2:
        raise ValueError("Need at least 2 objectives for parallel coordinates")

    has_pareto = hasattr(search_algorithm.selection_policy, 'pareto_mask')
    pareto_mask = None
    feasible_mask = None

    if has_pareto and search_algorithm.selection_policy.pareto_mask is not None:
        pareto_mask = search_algorithm.selection_policy.pareto_mask
        feasible_mask = search_algorithm.selection_policy.feasible_mask

    dimensions = []
    for obj in obj_cols:
        dimensions.append(dict(
            label=obj,
            values=df[obj]
        ))

    if has_pareto and pareto_mask is not None:
        pareto = pareto_mask.values
        feasible = feasible_mask.values

        color_values = []
        for i in range(len(df)):
            if not feasible[i]:
                color_values.append(0)
            elif pareto[i]:
                color_values.append(2)
            else:
                color_values.append(1)

        fig = go.Figure(data=go.Parcoords(
            dimensions=dimensions,
            line=dict(
                color=color_values,
                colorscale=[
                    [0, 'lightgray'],
                    [0.5, 'steelblue'],
                    [1, 'darkorange']
                ],
                showscale=True,
                cmin=0,
                cmax=2,
                colorbar=dict(
                    title='Status',
                    tickvals=[0, 1, 2],
                    ticktext=['Infeasible', 'Dominated', 'Pareto'],
                )
            )
        ))
    else:
        fig = go.Figure(data=go.Parcoords(
            dimensions=dimensions,
            line=dict(
                color='steelblue',
                showscale=False,
            )
        ))

    fig.update_layout(
        title='Parallel Coordinates: All Objectives',
        height=500,
    )

    return fig

ScoreContributionBars

Bar chart showing final scores from each objective component.

Visualizes the contribution of each score component to understand which objectives were most influential in the final selection. Can display absolute scores or normalized as fractions of total.

Examples:

>>> from energy_repset.diagnostics.results import ScoreContributionBars
>>> contrib = ScoreContributionBars()
>>> fig = contrib.plot(result.scores, normalize=True)
>>> fig.update_layout(title='Score Component Contributions')
>>> fig.show()
Source code in energy_repset/diagnostics/results/score_contribution_bars.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
class ScoreContributionBars:
    """Bar chart showing final scores from each objective component.

    Visualizes the contribution of each score component to understand which
    objectives were most influential in the final selection. Can display
    absolute scores or normalized as fractions of total.

    Examples:
        >>> from energy_repset.diagnostics.results import ScoreContributionBars
        >>> contrib = ScoreContributionBars()
        >>> fig = contrib.plot(result.scores, normalize=True)
        >>> fig.update_layout(title='Score Component Contributions')
        >>> fig.show()
    """

    def plot(
        self,
        scores: Dict[str, float],
        normalize: bool = False
    ) -> go.Figure:
        """Create bar chart of score component contributions.

        Args:
            scores: Dictionary of scores from each component (from result.scores).
            normalize: If True, show as fractions of total score.

        Returns:
            Plotly figure with bar chart.
        """
        if not scores:
            raise ValueError("Scores dictionary is empty")

        component_names = list(scores.keys())
        score_values = list(scores.values())

        if normalize:
            total = sum(score_values)
            if total == 0:
                raise ValueError("Cannot normalize: total score is zero")
            score_values = [v / total for v in score_values]
            y_title = 'Normalized Score (fraction)'
        else:
            y_title = 'Score Value'

        fig = go.Figure(data=[
            go.Bar(
                x=component_names,
                y=score_values,
                text=[f'{v:.4f}' for v in score_values],
                textposition='auto',
            )
        ])

        fig.update_layout(
            xaxis_title='Score Component',
            yaxis_title=y_title,
            showlegend=False,
            hovermode='x',
        )

        return fig

plot

plot(scores: dict[str, float], normalize: bool = False) -> Figure

Create bar chart of score component contributions.

Parameters:

Name Type Description Default
scores dict[str, float]

Dictionary of scores from each component (from result.scores).

required
normalize bool

If True, show as fractions of total score.

False

Returns:

Type Description
Figure

Plotly figure with bar chart.

Source code in energy_repset/diagnostics/results/score_contribution_bars.py
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
def plot(
    self,
    scores: Dict[str, float],
    normalize: bool = False
) -> go.Figure:
    """Create bar chart of score component contributions.

    Args:
        scores: Dictionary of scores from each component (from result.scores).
        normalize: If True, show as fractions of total score.

    Returns:
        Plotly figure with bar chart.
    """
    if not scores:
        raise ValueError("Scores dictionary is empty")

    component_names = list(scores.keys())
    score_values = list(scores.values())

    if normalize:
        total = sum(score_values)
        if total == 0:
            raise ValueError("Cannot normalize: total score is zero")
        score_values = [v / total for v in score_values]
        y_title = 'Normalized Score (fraction)'
    else:
        y_title = 'Score Value'

    fig = go.Figure(data=[
        go.Bar(
            x=component_names,
            y=score_values,
            text=[f'{v:.4f}' for v in score_values],
            textposition='auto',
        )
    ])

    fig.update_layout(
        xaxis_title='Score Component',
        yaxis_title=y_title,
        showlegend=False,
        hovermode='x',
    )

    return fig