Diagnostics¶

Feature Space¶

FeatureSpaceScatter2D ¶

2D scatter plot for visualizing feature space.

Creates an interactive scatter plot of any two features from df_features. Can highlight a specific selection of slices. Works with any feature columns including PCA components ('pc_0', 'pc_1'), statistical features ('mean__wind'), or mixed features.

Examples:

>>> # Visualize PCA space
>>> scatter = FeatureSpaceScatter2D()
>>> fig = scatter.plot(context.df_features, x='pc_0', y='pc_1')
>>> fig.update_layout(title='PCA Feature Space')
>>> fig.show()

>>> # Visualize with selection highlighted
>>> fig = scatter.plot(
...     context.df_features,
...     x='mean__demand',
...     y='pc_0',
...     selection=('2024-01', '2024-04', '2024-07')
... )

>>> # Color by another feature
>>> fig = scatter.plot(
...     context.df_features,
...     x='pc_0',
...     y='pc_1',
...     color='std__wind'
... )

Source code in energy_repset/diagnostics/feature_space/feature_space_scatter.py

class FeatureSpaceScatter2D:
    """2D scatter plot for visualizing feature space.

    Creates an interactive scatter plot of any two features from df_features.
    Can highlight a specific selection of slices. Works with any feature columns
    including PCA components ('pc_0', 'pc_1'), statistical features ('mean__wind'),
    or mixed features.

    Examples:

        >>> # Visualize PCA space
        >>> scatter = FeatureSpaceScatter2D()
        >>> fig = scatter.plot(context.df_features, x='pc_0', y='pc_1')
        >>> fig.update_layout(title='PCA Feature Space')
        >>> fig.show()

        >>> # Visualize with selection highlighted
        >>> fig = scatter.plot(
        ...     context.df_features,
        ...     x='mean__demand',
        ...     y='pc_0',
        ...     selection=('2024-01', '2024-04', '2024-07')
        ... )

        >>> # Color by another feature
        >>> fig = scatter.plot(
        ...     context.df_features,
        ...     x='pc_0',
        ...     y='pc_1',
        ...     color='std__wind'
        ... )
    """

    def __init__(self):
        """Initialize the scatter plot diagnostic."""
        pass

    def plot(
        self,
        df_features: pd.DataFrame,
        x: str,
        y: str,
        selection: SliceCombination = None,
        color: str = None,
    ) -> go.Figure:
        """Create a 2D scatter plot of feature space.

        Args:
            df_features: Feature matrix with slices as rows, features as columns.
            x: Column name for x-axis.
            y: Column name for y-axis.
            selection: Optional tuple of slice identifiers to highlight.
            color: Optional column name to use for color mapping.

        Returns:
            Plotly figure object ready for display or further customization.

        Raises:
            KeyError: If x, y, or color columns are not in df_features.
        """
        # Validate columns
        if x not in df_features.columns:
            raise KeyError(f"Column '{x}' not found in df_features")
        if y not in df_features.columns:
            raise KeyError(f"Column '{y}' not found in df_features")
        if color is not None and color not in df_features.columns:
            raise KeyError(f"Column '{color}' not found in df_features")

        # Prepare data
        plot_df = df_features.copy()
        plot_df['slice_label'] = plot_df.index.astype(str)

        # Add selection indicator
        if selection is not None:
            selection_set = set(selection)
            plot_df['is_selected'] = plot_df.index.isin(selection_set)
        else:
            plot_df['is_selected'] = False

        # Create scatter plot
        if color is not None:
            # Color by feature value
            fig = px.scatter(
                plot_df,
                x=x,
                y=y,
                color=color,
                hover_data=['slice_label'],
                symbol='is_selected' if selection is not None else None,
                symbol_map={True: 'star', False: 'circle'} if selection is not None else None,
            )
        else:
            # Color by selection status
            if selection is not None:
                fig = px.scatter(
                    plot_df,
                    x=x,
                    y=y,
                    color='is_selected',
                    hover_data=['slice_label'],
                    color_discrete_map={True: 'red', False: 'lightgray'},
                )
            else:
                fig = px.scatter(
                    plot_df,
                    x=x,
                    y=y,
                    hover_data=['slice_label'],
                )

        # Update layout for better readability
        fig.update_layout(
            xaxis_title=x,
            yaxis_title=y,
            hovermode='closest',
        )

        return fig

init ¶

__init__()

Initialize the scatter plot diagnostic.

Source code in energy_repset/diagnostics/feature_space/feature_space_scatter.py

def __init__(self):
    """Initialize the scatter plot diagnostic."""
    pass

plot ¶

plot(df_features: DataFrame, x: str, y: str, selection: SliceCombination = None, color: str = None) -> Figure

Create a 2D scatter plot of feature space.

Parameters:

Name	Type	Description	Default
`df_features`	`DataFrame`	Feature matrix with slices as rows, features as columns.	required
`x`	`str`	Column name for x-axis.	required
`y`	`str`	Column name for y-axis.	required
`selection`	`SliceCombination`	Optional tuple of slice identifiers to highlight.	`None`
`color`	`str`	Optional column name to use for color mapping.	`None`

Returns:

Type	Description
`Figure`	Plotly figure object ready for display or further customization.

Raises:

Type	Description
`KeyError`	If x, y, or color columns are not in df_features.

Source code in energy_repset/diagnostics/feature_space/feature_space_scatter.py

def plot(
    self,
    df_features: pd.DataFrame,
    x: str,
    y: str,
    selection: SliceCombination = None,
    color: str = None,
) -> go.Figure:
    """Create a 2D scatter plot of feature space.

    Args:
        df_features: Feature matrix with slices as rows, features as columns.
        x: Column name for x-axis.
        y: Column name for y-axis.
        selection: Optional tuple of slice identifiers to highlight.
        color: Optional column name to use for color mapping.

    Returns:
        Plotly figure object ready for display or further customization.

    Raises:
        KeyError: If x, y, or color columns are not in df_features.
    """
    # Validate columns
    if x not in df_features.columns:
        raise KeyError(f"Column '{x}' not found in df_features")
    if y not in df_features.columns:
        raise KeyError(f"Column '{y}' not found in df_features")
    if color is not None and color not in df_features.columns:
        raise KeyError(f"Column '{color}' not found in df_features")

    # Prepare data
    plot_df = df_features.copy()
    plot_df['slice_label'] = plot_df.index.astype(str)

    # Add selection indicator
    if selection is not None:
        selection_set = set(selection)
        plot_df['is_selected'] = plot_df.index.isin(selection_set)
    else:
        plot_df['is_selected'] = False

    # Create scatter plot
    if color is not None:
        # Color by feature value
        fig = px.scatter(
            plot_df,
            x=x,
            y=y,
            color=color,
            hover_data=['slice_label'],
            symbol='is_selected' if selection is not None else None,
            symbol_map={True: 'star', False: 'circle'} if selection is not None else None,
        )
    else:
        # Color by selection status
        if selection is not None:
            fig = px.scatter(
                plot_df,
                x=x,
                y=y,
                color='is_selected',
                hover_data=['slice_label'],
                color_discrete_map={True: 'red', False: 'lightgray'},
            )
        else:
            fig = px.scatter(
                plot_df,
                x=x,
                y=y,
                hover_data=['slice_label'],
            )

    # Update layout for better readability
    fig.update_layout(
        xaxis_title=x,
        yaxis_title=y,
        hovermode='closest',
    )

    return fig

FeatureSpaceScatter3D ¶

3D scatter plot for visualizing feature space.

Creates an interactive 3D scatter plot of any three features from df_features. Can highlight a specific selection of slices. Works with any feature columns including PCA components or statistical features.

Examples:

>>> # Visualize 3D PCA space
>>> scatter = FeatureSpaceScatter3D()
>>> fig = scatter.plot(
...     context.df_features,
...     x='pc_0',
...     y='pc_1',
...     z='pc_2'
... )
>>> fig.update_layout(title='3D PCA Space')
>>> fig.show()

>>> # Highlight selection
>>> fig = scatter.plot(
...     context.df_features,
...     x='pc_0',
...     y='pc_1',
...     z='pc_2',
...     selection=('2024-01', '2024-04')
... )

>>> # Color by feature value
>>> fig = scatter.plot(
...     context.df_features,
...     x='pc_0',
...     y='pc_1',
...     z='pc_2',
...     color='mean__demand'
... )

Source code in energy_repset/diagnostics/feature_space/feature_space_scatter.py

class FeatureSpaceScatter3D:
    """3D scatter plot for visualizing feature space.

    Creates an interactive 3D scatter plot of any three features from df_features.
    Can highlight a specific selection of slices. Works with any feature columns
    including PCA components or statistical features.

    Examples:

        >>> # Visualize 3D PCA space
        >>> scatter = FeatureSpaceScatter3D()
        >>> fig = scatter.plot(
        ...     context.df_features,
        ...     x='pc_0',
        ...     y='pc_1',
        ...     z='pc_2'
        ... )
        >>> fig.update_layout(title='3D PCA Space')
        >>> fig.show()

        >>> # Highlight selection
        >>> fig = scatter.plot(
        ...     context.df_features,
        ...     x='pc_0',
        ...     y='pc_1',
        ...     z='pc_2',
        ...     selection=('2024-01', '2024-04')
        ... )

        >>> # Color by feature value
        >>> fig = scatter.plot(
        ...     context.df_features,
        ...     x='pc_0',
        ...     y='pc_1',
        ...     z='pc_2',
        ...     color='mean__demand'
        ... )
    """

    def __init__(self):
        """Initialize the 3D scatter plot diagnostic."""
        pass

    def plot(
        self,
        df_features: pd.DataFrame,
        x: str,
        y: str,
        z: str,
        selection: SliceCombination = None,
        color: str = None,
    ) -> go.Figure:
        """Create a 3D scatter plot of feature space.

        Args:
            df_features: Feature matrix with slices as rows, features as columns.
            x: Column name for x-axis.
            y: Column name for y-axis.
            z: Column name for z-axis.
            selection: Optional tuple of slice identifiers to highlight.
            color: Optional column name to use for color mapping.

        Returns:
            Plotly figure object ready for display or further customization.

        Raises:
            KeyError: If x, y, z, or color columns are not in df_features.
        """
        # Validate columns
        if x not in df_features.columns:
            raise KeyError(f"Column '{x}' not found in df_features")
        if y not in df_features.columns:
            raise KeyError(f"Column '{y}' not found in df_features")
        if z not in df_features.columns:
            raise KeyError(f"Column '{z}' not found in df_features")
        if color is not None and color not in df_features.columns:
            raise KeyError(f"Column '{color}' not found in df_features")

        # Prepare data
        plot_df = df_features.copy()
        plot_df['slice_label'] = plot_df.index.astype(str)

        # Add selection indicator
        if selection is not None:
            selection_set = set(selection)
            plot_df['is_selected'] = plot_df.index.isin(selection_set)
        else:
            plot_df['is_selected'] = False

        # Create 3D scatter plot
        if color is not None:
            # Color by feature value
            fig = px.scatter_3d(
                plot_df,
                x=x,
                y=y,
                z=z,
                color=color,
                hover_data=['slice_label'],
                symbol='is_selected' if selection is not None else None,
                symbol_map={True: 'diamond', False: 'circle'} if selection is not None else None,
            )
        else:
            # Color by selection status
            if selection is not None:
                fig = px.scatter_3d(
                    plot_df,
                    x=x,
                    y=y,
                    z=z,
                    color='is_selected',
                    hover_data=['slice_label'],
                    color_discrete_map={True: 'red', False: 'lightgray'},
                )
            else:
                fig = px.scatter_3d(
                    plot_df,
                    x=x,
                    y=y,
                    z=z,
                    hover_data=['slice_label'],
                )

        # Update layout for better readability
        fig.update_layout(
            scene=dict(
                xaxis_title=x,
                yaxis_title=y,
                zaxis_title=z,
            ),
            hovermode='closest',
        )

        return fig

init ¶

__init__()

Initialize the 3D scatter plot diagnostic.

Source code in energy_repset/diagnostics/feature_space/feature_space_scatter.py

def __init__(self):
    """Initialize the 3D scatter plot diagnostic."""
    pass

plot ¶

plot(df_features: DataFrame, x: str, y: str, z: str, selection: SliceCombination = None, color: str = None) -> Figure

Create a 3D scatter plot of feature space.

Parameters:

Name	Type	Description	Default
`df_features`	`DataFrame`	Feature matrix with slices as rows, features as columns.	required
`x`	`str`	Column name for x-axis.	required
`y`	`str`	Column name for y-axis.	required
`z`	`str`	Column name for z-axis.	required
`selection`	`SliceCombination`	Optional tuple of slice identifiers to highlight.	`None`
`color`	`str`	Optional column name to use for color mapping.	`None`

Returns:

Type	Description
`Figure`	Plotly figure object ready for display or further customization.

Raises:

Type	Description
`KeyError`	If x, y, z, or color columns are not in df_features.

Source code in energy_repset/diagnostics/feature_space/feature_space_scatter.py

def plot(
    self,
    df_features: pd.DataFrame,
    x: str,
    y: str,
    z: str,
    selection: SliceCombination = None,
    color: str = None,
) -> go.Figure:
    """Create a 3D scatter plot of feature space.

    Args:
        df_features: Feature matrix with slices as rows, features as columns.
        x: Column name for x-axis.
        y: Column name for y-axis.
        z: Column name for z-axis.
        selection: Optional tuple of slice identifiers to highlight.
        color: Optional column name to use for color mapping.

    Returns:
        Plotly figure object ready for display or further customization.

    Raises:
        KeyError: If x, y, z, or color columns are not in df_features.
    """
    # Validate columns
    if x not in df_features.columns:
        raise KeyError(f"Column '{x}' not found in df_features")
    if y not in df_features.columns:
        raise KeyError(f"Column '{y}' not found in df_features")
    if z not in df_features.columns:
        raise KeyError(f"Column '{z}' not found in df_features")
    if color is not None and color not in df_features.columns:
        raise KeyError(f"Column '{color}' not found in df_features")

    # Prepare data
    plot_df = df_features.copy()
    plot_df['slice_label'] = plot_df.index.astype(str)

    # Add selection indicator
    if selection is not None:
        selection_set = set(selection)
        plot_df['is_selected'] = plot_df.index.isin(selection_set)
    else:
        plot_df['is_selected'] = False

    # Create 3D scatter plot
    if color is not None:
        # Color by feature value
        fig = px.scatter_3d(
            plot_df,
            x=x,
            y=y,
            z=z,
            color=color,
            hover_data=['slice_label'],
            symbol='is_selected' if selection is not None else None,
            symbol_map={True: 'diamond', False: 'circle'} if selection is not None else None,
        )
    else:
        # Color by selection status
        if selection is not None:
            fig = px.scatter_3d(
                plot_df,
                x=x,
                y=y,
                z=z,
                color='is_selected',
                hover_data=['slice_label'],
                color_discrete_map={True: 'red', False: 'lightgray'},
            )
        else:
            fig = px.scatter_3d(
                plot_df,
                x=x,
                y=y,
                z=z,
                hover_data=['slice_label'],
            )

    # Update layout for better readability
    fig.update_layout(
        scene=dict(
            xaxis_title=x,
            yaxis_title=y,
            zaxis_title=z,
        ),
        hovermode='closest',
    )

    return fig

FeatureSpaceScatterMatrix ¶

Scatter matrix (SPLOM) for visualizing relationships between multiple features.

Creates an interactive scatter plot matrix showing pairwise relationships between all specified features. Can highlight a specific selection of slices. Useful for exploring multi-dimensional feature spaces and identifying feature correlations.

Examples:

>>> # Visualize PCA components
>>> scatter_matrix = FeatureSpaceScatterMatrix()
>>> fig = scatter_matrix.plot(
...     context.df_features,
...     dimensions=['pc_0', 'pc_1', 'pc_2']
... )
>>> fig.update_layout(title='PCA Component Relationships')
>>> fig.show()

>>> # Visualize statistical features with selection
>>> fig = scatter_matrix.plot(
...     context.df_features,
...     dimensions=['mean__demand', 'std__demand', 'max__wind'],
...     selection=('2024-01', '2024-04', '2024-07')
... )

>>> # Color by a feature value
>>> fig = scatter_matrix.plot(
...     context.df_features,
...     dimensions=['pc_0', 'pc_1', 'pc_2', 'pc_3'],
...     color='mean__demand'
... )

>>> # All features
>>> fig = scatter_matrix.plot(context.df_features)

Source code in energy_repset/diagnostics/feature_space/feature_space_scatter_matrix.py

class FeatureSpaceScatterMatrix:
    """Scatter matrix (SPLOM) for visualizing relationships between multiple features.

    Creates an interactive scatter plot matrix showing pairwise relationships between
    all specified features. Can highlight a specific selection of slices. Useful for
    exploring multi-dimensional feature spaces and identifying feature correlations.

    Examples:

        >>> # Visualize PCA components
        >>> scatter_matrix = FeatureSpaceScatterMatrix()
        >>> fig = scatter_matrix.plot(
        ...     context.df_features,
        ...     dimensions=['pc_0', 'pc_1', 'pc_2']
        ... )
        >>> fig.update_layout(title='PCA Component Relationships')
        >>> fig.show()

        >>> # Visualize statistical features with selection
        >>> fig = scatter_matrix.plot(
        ...     context.df_features,
        ...     dimensions=['mean__demand', 'std__demand', 'max__wind'],
        ...     selection=('2024-01', '2024-04', '2024-07')
        ... )

        >>> # Color by a feature value
        >>> fig = scatter_matrix.plot(
        ...     context.df_features,
        ...     dimensions=['pc_0', 'pc_1', 'pc_2', 'pc_3'],
        ...     color='mean__demand'
        ... )

        >>> # All features
        >>> fig = scatter_matrix.plot(context.df_features)
    """

    def __init__(self):
        """Initialize the scatter matrix diagnostic."""
        pass

    def plot(
        self,
        df_features: pd.DataFrame,
        dimensions: list[str] = None,
        selection: SliceCombination = None,
        color: str = None,
    ) -> go.Figure:
        """Create a scatter plot matrix of feature space.

        Args:
            df_features: Feature matrix with slices as rows, features as columns.
            dimensions: List of column names to include in the matrix. If None,
                uses all columns (may be slow for many features).
            selection: Optional tuple of slice identifiers to highlight.
            color: Optional column name to use for color mapping. If None and
                selection is provided, colors by selection status.

        Returns:
            Plotly figure object ready for display or further customization.

        Raises:
            KeyError: If any dimension or color column is not in df_features.
            ValueError: If dimensions list is empty.
        """
        # Handle dimensions default
        if dimensions is None:
            dimensions = list(df_features.columns)

        if len(dimensions) == 0:
            raise ValueError("dimensions list cannot be empty")

        # Validate columns
        for dim in dimensions:
            if dim not in df_features.columns:
                raise KeyError(f"Column '{dim}' not found in df_features")
        if color is not None and color not in df_features.columns:
            raise KeyError(f"Column '{color}' not found in df_features")

        # Prepare data
        plot_df = df_features[dimensions].copy()
        plot_df['slice_label'] = df_features.index.astype(str)

        # Add selection indicator
        if selection is not None:
            selection_set = set(selection)
            plot_df['is_selected'] = df_features.index.isin(selection_set)
            # Order so selected points are drawn on top
            plot_df = pd.concat([
                plot_df[~plot_df['is_selected']],
                plot_df[plot_df['is_selected']]
            ], ignore_index=False)
        else:
            plot_df['is_selected'] = False

        # Create scatter matrix
        if color is not None:
            # Color by feature value
            fig = px.scatter_matrix(
                plot_df,
                dimensions=dimensions,
                color=color,
                hover_data=['slice_label'],
                symbol='is_selected' if selection is not None else None,
                symbol_map={True: 'star', False: 'circle'} if selection is not None else None,
            )
        else:
            # Color by selection status
            if selection is not None:
                fig = px.scatter_matrix(
                    plot_df,
                    dimensions=dimensions,
                    color='is_selected',
                    hover_data=['slice_label'],
                    color_discrete_map={True: 'red', False: 'lightgray'},
                    symbol='is_selected',
                    symbol_map={True: 'star', False: 'circle'},
                )
            else:
                fig = px.scatter_matrix(
                    plot_df,
                    dimensions=dimensions,
                    hover_data=['slice_label'],
                )

        # Update layout for better readability
        fig.update_traces(
            diagonal_visible=False,
            showupperhalf=False,
            marker=dict(size=4)
        )

        return fig

init ¶

__init__()

Initialize the scatter matrix diagnostic.

Source code in energy_repset/diagnostics/feature_space/feature_space_scatter_matrix.py

def __init__(self):
    """Initialize the scatter matrix diagnostic."""
    pass

plot ¶

plot(df_features: DataFrame, dimensions: list[str] = None, selection: SliceCombination = None, color: str = None) -> Figure

Create a scatter plot matrix of feature space.

Parameters:

Name	Type	Description	Default
`df_features`	`DataFrame`	Feature matrix with slices as rows, features as columns.	required
`dimensions`	`list[str]`	List of column names to include in the matrix. If None, uses all columns (may be slow for many features).	`None`
`selection`	`SliceCombination`	Optional tuple of slice identifiers to highlight.	`None`
`color`	`str`	Optional column name to use for color mapping. If None and selection is provided, colors by selection status.	`None`

Returns:

Type	Description
`Figure`	Plotly figure object ready for display or further customization.

Raises:

Type	Description
`KeyError`	If any dimension or color column is not in df_features.
`ValueError`	If dimensions list is empty.

Source code in energy_repset/diagnostics/feature_space/feature_space_scatter_matrix.py

def plot(
    self,
    df_features: pd.DataFrame,
    dimensions: list[str] = None,
    selection: SliceCombination = None,
    color: str = None,
) -> go.Figure:
    """Create a scatter plot matrix of feature space.

    Args:
        df_features: Feature matrix with slices as rows, features as columns.
        dimensions: List of column names to include in the matrix. If None,
            uses all columns (may be slow for many features).
        selection: Optional tuple of slice identifiers to highlight.
        color: Optional column name to use for color mapping. If None and
            selection is provided, colors by selection status.

    Returns:
        Plotly figure object ready for display or further customization.

    Raises:
        KeyError: If any dimension or color column is not in df_features.
        ValueError: If dimensions list is empty.
    """
    # Handle dimensions default
    if dimensions is None:
        dimensions = list(df_features.columns)

    if len(dimensions) == 0:
        raise ValueError("dimensions list cannot be empty")

    # Validate columns
    for dim in dimensions:
        if dim not in df_features.columns:
            raise KeyError(f"Column '{dim}' not found in df_features")
    if color is not None and color not in df_features.columns:
        raise KeyError(f"Column '{color}' not found in df_features")

    # Prepare data
    plot_df = df_features[dimensions].copy()
    plot_df['slice_label'] = df_features.index.astype(str)

    # Add selection indicator
    if selection is not None:
        selection_set = set(selection)
        plot_df['is_selected'] = df_features.index.isin(selection_set)
        # Order so selected points are drawn on top
        plot_df = pd.concat([
            plot_df[~plot_df['is_selected']],
            plot_df[plot_df['is_selected']]
        ], ignore_index=False)
    else:
        plot_df['is_selected'] = False

    # Create scatter matrix
    if color is not None:
        # Color by feature value
        fig = px.scatter_matrix(
            plot_df,
            dimensions=dimensions,
            color=color,
            hover_data=['slice_label'],
            symbol='is_selected' if selection is not None else None,
            symbol_map={True: 'star', False: 'circle'} if selection is not None else None,
        )
    else:
        # Color by selection status
        if selection is not None:
            fig = px.scatter_matrix(
                plot_df,
                dimensions=dimensions,
                color='is_selected',
                hover_data=['slice_label'],
                color_discrete_map={True: 'red', False: 'lightgray'},
                symbol='is_selected',
                symbol_map={True: 'star', False: 'circle'},
            )
        else:
            fig = px.scatter_matrix(
                plot_df,
                dimensions=dimensions,
                hover_data=['slice_label'],
            )

    # Update layout for better readability
    fig.update_traces(
        diagonal_visible=False,
        showupperhalf=False,
        marker=dict(size=4)
    )

    return fig

PCAVarianceExplained ¶

Visualize explained variance ratio for PCA components.

Creates a bar chart showing the proportion of variance explained by each principal component, along with cumulative variance. Helps determine how many components are needed to capture most of the data's variance.

This diagnostic requires the fitted PCAFeatureEngineer instance to access the explained_variance_ratio_ attribute.

Examples:

>>> # Get PCA engineer from pipeline
>>> pca_engineer = pipeline.engineers['pca']
>>> variance_plot = PCAVarianceExplained(pca_engineer)
>>> fig = variance_plot.plot()
>>> fig.update_layout(title='PCA Variance Explained')
>>> fig.show()

>>> # With custom number of components shown
>>> fig = variance_plot.plot(n_components=10)

>>> # After running workflow
>>> context_with_features = workflow.feature_engineer.run(context)
>>> pca_eng = workflow.feature_engineer.engineers['pca']
>>> variance_plot = PCAVarianceExplained(pca_eng)
>>> fig = variance_plot.plot()

Source code in energy_repset/diagnostics/feature_space/pca_variance_explained.py

class PCAVarianceExplained:
    """Visualize explained variance ratio for PCA components.

    Creates a bar chart showing the proportion of variance explained by each
    principal component, along with cumulative variance. Helps determine how
    many components are needed to capture most of the data's variance.

    This diagnostic requires the fitted PCAFeatureEngineer instance to access
    the explained_variance_ratio_ attribute.

    Examples:

        >>> # Get PCA engineer from pipeline
        >>> pca_engineer = pipeline.engineers['pca']
        >>> variance_plot = PCAVarianceExplained(pca_engineer)
        >>> fig = variance_plot.plot()
        >>> fig.update_layout(title='PCA Variance Explained')
        >>> fig.show()

        >>> # With custom number of components shown
        >>> fig = variance_plot.plot(n_components=10)

        >>> # After running workflow
        >>> context_with_features = workflow.feature_engineer.run(context)
        >>> pca_eng = workflow.feature_engineer.engineers['pca']
        >>> variance_plot = PCAVarianceExplained(pca_eng)
        >>> fig = variance_plot.plot()
    """

    def __init__(self, pca_engineer: PCAFeatureEngineer):
        """Initialize the PCA variance explained diagnostic.

        Args:
            pca_engineer: A fitted PCAFeatureEngineer instance. Must have been
                fitted on data (i.e., calc_and_get_features_df has been called).
        """
        self.pca_engineer = pca_engineer

    def plot(self, n_components: int = None, show_cumulative: bool = True) -> go.Figure:
        """Create a bar chart of explained variance ratios.

        Args:
            n_components: Number of components to show. If None, shows all components.
            show_cumulative: If True, adds a line showing cumulative variance explained.

        Returns:
            Plotly figure object ready for display or further customization.

        Raises:
            AttributeError: If the PCA engineer has not been fitted yet.
        """
        # Get variance ratios
        if not hasattr(self.pca_engineer, 'explained_variance_ratio_'):
            raise AttributeError(
                "PCA engineer has not been fitted. Call calc_and_get_features_df() first."
            )

        variance_ratio = self.pca_engineer.explained_variance_ratio_

        # Limit to requested number of components
        if n_components is not None:
            variance_ratio = variance_ratio[:n_components]

        # Prepare data
        n = len(variance_ratio)
        component_labels = [f'PC{i}' for i in range(n)]
        cumulative_variance = variance_ratio.cumsum()

        # Create figure
        fig = go.Figure()

        # Add variance bars
        fig.add_trace(go.Bar(
            x=component_labels,
            y=variance_ratio,
            name='Individual',
            marker_color='lightblue',
            text=[f'{v:.1%}' for v in variance_ratio],
            textposition='outside',
        ))

        # Add cumulative line if requested
        if show_cumulative:
            fig.add_trace(go.Scatter(
                x=component_labels,
                y=cumulative_variance,
                name='Cumulative',
                mode='lines+markers',
                line=dict(color='red', width=2),
                yaxis='y2',
                text=[f'{v:.1%}' for v in cumulative_variance],
                textposition='top center',
            ))

        # Update layout
        layout_kwargs = dict(
            xaxis_title='Principal Component',
            yaxis_title='Explained Variance Ratio',
            hovermode='x unified',
            yaxis=dict(tickformat='.0%'),
        )

        if show_cumulative:
            layout_kwargs['yaxis2'] = dict(
                title='Cumulative Variance',
                overlaying='y',
                side='right',
                tickformat='.0%',
                range=[0, 1.05],
            )

        fig.update_layout(**layout_kwargs)

        return fig

init ¶

__init__(pca_engineer: PCAFeatureEngineer)

Initialize the PCA variance explained diagnostic.

Parameters:

Name	Type	Description	Default
`pca_engineer`	`PCAFeatureEngineer`	A fitted PCAFeatureEngineer instance. Must have been fitted on data (i.e., calc_and_get_features_df has been called).	required

Source code in energy_repset/diagnostics/feature_space/pca_variance_explained.py

def __init__(self, pca_engineer: PCAFeatureEngineer):
    """Initialize the PCA variance explained diagnostic.

    Args:
        pca_engineer: A fitted PCAFeatureEngineer instance. Must have been
            fitted on data (i.e., calc_and_get_features_df has been called).
    """
    self.pca_engineer = pca_engineer

plot ¶

plot(n_components: int = None, show_cumulative: bool = True) -> Figure

Create a bar chart of explained variance ratios.

Parameters:

Name	Type	Description	Default
`n_components`	`int`	Number of components to show. If None, shows all components.	`None`
`show_cumulative`	`bool`	If True, adds a line showing cumulative variance explained.	`True`

Returns:

Type	Description
`Figure`	Plotly figure object ready for display or further customization.

Raises:

Type	Description
`AttributeError`	If the PCA engineer has not been fitted yet.

Source code in energy_repset/diagnostics/feature_space/pca_variance_explained.py

def plot(self, n_components: int = None, show_cumulative: bool = True) -> go.Figure:
    """Create a bar chart of explained variance ratios.

    Args:
        n_components: Number of components to show. If None, shows all components.
        show_cumulative: If True, adds a line showing cumulative variance explained.

    Returns:
        Plotly figure object ready for display or further customization.

    Raises:
        AttributeError: If the PCA engineer has not been fitted yet.
    """
    # Get variance ratios
    if not hasattr(self.pca_engineer, 'explained_variance_ratio_'):
        raise AttributeError(
            "PCA engineer has not been fitted. Call calc_and_get_features_df() first."
        )

    variance_ratio = self.pca_engineer.explained_variance_ratio_

    # Limit to requested number of components
    if n_components is not None:
        variance_ratio = variance_ratio[:n_components]

    # Prepare data
    n = len(variance_ratio)
    component_labels = [f'PC{i}' for i in range(n)]
    cumulative_variance = variance_ratio.cumsum()

    # Create figure
    fig = go.Figure()

    # Add variance bars
    fig.add_trace(go.Bar(
        x=component_labels,
        y=variance_ratio,
        name='Individual',
        marker_color='lightblue',
        text=[f'{v:.1%}' for v in variance_ratio],
        textposition='outside',
    ))

    # Add cumulative line if requested
    if show_cumulative:
        fig.add_trace(go.Scatter(
            x=component_labels,
            y=cumulative_variance,
            name='Cumulative',
            mode='lines+markers',
            line=dict(color='red', width=2),
            yaxis='y2',
            text=[f'{v:.1%}' for v in cumulative_variance],
            textposition='top center',
        ))

    # Update layout
    layout_kwargs = dict(
        xaxis_title='Principal Component',
        yaxis_title='Explained Variance Ratio',
        hovermode='x unified',
        yaxis=dict(tickformat='.0%'),
    )

    if show_cumulative:
        layout_kwargs['yaxis2'] = dict(
            title='Cumulative Variance',
            overlaying='y',
            side='right',
            tickformat='.0%',
            range=[0, 1.05],
        )

    fig.update_layout(**layout_kwargs)

    return fig

FeatureCorrelationHeatmap ¶

Visualize correlation matrix of features.

Creates an interactive heatmap showing Pearson correlations between all features in the feature matrix. Helps identify redundant features and understand feature relationships. Can optionally show only the lower triangle to avoid redundancy.

Examples:

>>> # Visualize all feature correlations
>>> heatmap = FeatureCorrelationHeatmap()
>>> fig = heatmap.plot(context.df_features)
>>> fig.update_layout(title='Feature Correlation Matrix')
>>> fig.show()

>>> # Show only lower triangle
>>> fig = heatmap.plot(context.df_features, show_lower_only=True)

>>> # Subset of features
>>> selected_features = context.df_features[['pc_0', 'pc_1', 'mean__demand']]
>>> fig = heatmap.plot(selected_features)

Source code in energy_repset/diagnostics/feature_space/feature_correlation_heatmap.py

class FeatureCorrelationHeatmap:
    """Visualize correlation matrix of features.

    Creates an interactive heatmap showing Pearson correlations between all features
    in the feature matrix. Helps identify redundant features and understand feature
    relationships. Can optionally show only the lower triangle to avoid redundancy.

    Examples:

        >>> # Visualize all feature correlations
        >>> heatmap = FeatureCorrelationHeatmap()
        >>> fig = heatmap.plot(context.df_features)
        >>> fig.update_layout(title='Feature Correlation Matrix')
        >>> fig.show()

        >>> # Show only lower triangle
        >>> fig = heatmap.plot(context.df_features, show_lower_only=True)

        >>> # Subset of features
        >>> selected_features = context.df_features[['pc_0', 'pc_1', 'mean__demand']]
        >>> fig = heatmap.plot(selected_features)
    """

    def __init__(self):
        """Initialize the feature correlation heatmap diagnostic."""
        pass

    def plot(
        self,
        df_features: pd.DataFrame,
        method: str = 'pearson',
        show_lower_only: bool = False,
    ) -> go.Figure:
        """Create a heatmap of feature correlations.

        Args:
            df_features: Feature matrix with slices as rows, features as columns.
            method: Correlation method ('pearson', 'spearman', or 'kendall').
                Default is 'pearson'.
            show_lower_only: If True, shows only the lower triangle of the
                correlation matrix (removes redundant upper triangle and diagonal).

        Returns:
            Plotly figure object ready for display or further customization.

        Raises:
            ValueError: If method is not one of the supported correlation methods.
        """
        if method not in ['pearson', 'spearman', 'kendall']:
            raise ValueError(
                f"method must be 'pearson', 'spearman', or 'kendall', got '{method}'"
            )

        # Calculate correlation matrix
        corr_matrix = df_features.corr(method=method)

        # Mask upper triangle if requested
        if show_lower_only:
            mask = pd.DataFrame(
                False,
                index=corr_matrix.index,
                columns=corr_matrix.columns
            )
            # Set upper triangle and diagonal to True (to be masked)
            for i in range(len(corr_matrix)):
                for j in range(i, len(corr_matrix)):
                    mask.iloc[i, j] = True

            # Apply mask by setting values to NaN
            corr_matrix = corr_matrix.where(~mask)

        # Create heatmap
        fig = px.imshow(
            corr_matrix,
            x=corr_matrix.columns,
            y=corr_matrix.index,
            color_continuous_scale='RdBu_r',
            color_continuous_midpoint=0,
            zmin=-1,
            zmax=1,
            aspect='auto',
        )

        # Update layout for better readability
        fig.update_layout(
            xaxis_title='',
            yaxis_title='',
            coloraxis_colorbar=dict(title='Correlation'),
        )

        # Improve text readability
        fig.update_traces(
            text=corr_matrix.round(2).values,
            texttemplate='%{text}',
            textfont=dict(size=10),
        )

        return fig

init ¶

__init__()

Initialize the feature correlation heatmap diagnostic.

Source code in energy_repset/diagnostics/feature_space/feature_correlation_heatmap.py

def __init__(self):
    """Initialize the feature correlation heatmap diagnostic."""
    pass

plot ¶

plot(df_features: DataFrame, method: str = 'pearson', show_lower_only: bool = False) -> Figure

Create a heatmap of feature correlations.

Parameters:

Name	Type	Description	Default
`df_features`	`DataFrame`	Feature matrix with slices as rows, features as columns.	required
`method`	`str`	Correlation method ('pearson', 'spearman', or 'kendall'). Default is 'pearson'.	`'pearson'`
`show_lower_only`	`bool`	If True, shows only the lower triangle of the correlation matrix (removes redundant upper triangle and diagonal).	`False`

Returns:

Type	Description
`Figure`	Plotly figure object ready for display or further customization.

Raises:

Type	Description
`ValueError`	If method is not one of the supported correlation methods.

Source code in energy_repset/diagnostics/feature_space/feature_correlation_heatmap.py

def plot(
    self,
    df_features: pd.DataFrame,
    method: str = 'pearson',
    show_lower_only: bool = False,
) -> go.Figure:
    """Create a heatmap of feature correlations.

    Args:
        df_features: Feature matrix with slices as rows, features as columns.
        method: Correlation method ('pearson', 'spearman', or 'kendall').
            Default is 'pearson'.
        show_lower_only: If True, shows only the lower triangle of the
            correlation matrix (removes redundant upper triangle and diagonal).

    Returns:
        Plotly figure object ready for display or further customization.

    Raises:
        ValueError: If method is not one of the supported correlation methods.
    """
    if method not in ['pearson', 'spearman', 'kendall']:
        raise ValueError(
            f"method must be 'pearson', 'spearman', or 'kendall', got '{method}'"
        )

    # Calculate correlation matrix
    corr_matrix = df_features.corr(method=method)

    # Mask upper triangle if requested
    if show_lower_only:
        mask = pd.DataFrame(
            False,
            index=corr_matrix.index,
            columns=corr_matrix.columns
        )
        # Set upper triangle and diagonal to True (to be masked)
        for i in range(len(corr_matrix)):
            for j in range(i, len(corr_matrix)):
                mask.iloc[i, j] = True

        # Apply mask by setting values to NaN
        corr_matrix = corr_matrix.where(~mask)

    # Create heatmap
    fig = px.imshow(
        corr_matrix,
        x=corr_matrix.columns,
        y=corr_matrix.index,
        color_continuous_scale='RdBu_r',
        color_continuous_midpoint=0,
        zmin=-1,
        zmax=1,
        aspect='auto',
    )

    # Update layout for better readability
    fig.update_layout(
        xaxis_title='',
        yaxis_title='',
        coloraxis_colorbar=dict(title='Correlation'),
    )

    # Improve text readability
    fig.update_traces(
        text=corr_matrix.round(2).values,
        texttemplate='%{text}',
        textfont=dict(size=10),
    )

    return fig

FeatureDistributions ¶

Visualize distributions of all features as histograms.

Creates a grid of histograms showing the distribution of each feature across all slices. Helps identify feature scales, skewness, and potential outliers. Useful for understanding the feature space before selection.

Examples:

>>> # Visualize all feature distributions
>>> dist_plot = FeatureDistributions()
>>> fig = dist_plot.plot(context.df_features)
>>> fig.update_layout(title='Feature Distributions')
>>> fig.show()

>>> # Subset of features
>>> selected_features = context.df_features[['pc_0', 'pc_1', 'mean__demand']]
>>> fig = dist_plot.plot(selected_features)

>>> # With custom bin count
>>> fig = dist_plot.plot(context.df_features, nbins=30)

Source code in energy_repset/diagnostics/feature_space/feature_distributions.py

class FeatureDistributions:
    """Visualize distributions of all features as histograms.

    Creates a grid of histograms showing the distribution of each feature across
    all slices. Helps identify feature scales, skewness, and potential outliers.
    Useful for understanding the feature space before selection.

    Examples:

        >>> # Visualize all feature distributions
        >>> dist_plot = FeatureDistributions()
        >>> fig = dist_plot.plot(context.df_features)
        >>> fig.update_layout(title='Feature Distributions')
        >>> fig.show()

        >>> # Subset of features
        >>> selected_features = context.df_features[['pc_0', 'pc_1', 'mean__demand']]
        >>> fig = dist_plot.plot(selected_features)

        >>> # With custom bin count
        >>> fig = dist_plot.plot(context.df_features, nbins=30)
    """

    def __init__(self):
        """Initialize the feature distributions diagnostic."""
        pass

    def plot(
        self,
        df_features: pd.DataFrame,
        nbins: int = 20,
        cols: int = 3,
    ) -> go.Figure:
        """Create a grid of histograms for all features.

        Args:
            df_features: Feature matrix with slices as rows, features as columns.
            nbins: Number of bins for each histogram. Default is 20.
            cols: Number of columns in the subplot grid. Default is 3.

        Returns:
            Plotly figure object ready for display or further customization.

        Raises:
            ValueError: If df_features is empty or nbins/cols are invalid.
        """
        if df_features.empty:
            raise ValueError("df_features cannot be empty")
        if nbins <= 0:
            raise ValueError("nbins must be positive")
        if cols <= 0:
            raise ValueError("cols must be positive")

        features = list(df_features.columns)
        n_features = len(features)

        # Calculate grid dimensions
        rows = (n_features + cols - 1) // cols  # Ceiling division

        # Create subplots
        fig = make_subplots(
            rows=rows,
            cols=cols,
            subplot_titles=features,
            vertical_spacing=0.12 / rows if rows > 1 else 0.1,
            horizontal_spacing=0.1 / cols if cols > 1 else 0.1,
        )

        # Add histogram for each feature
        for idx, feature in enumerate(features):
            row = idx // cols + 1
            col = idx % cols + 1

            fig.add_trace(
                go.Histogram(
                    x=df_features[feature],
                    nbinsx=nbins,
                    name=feature,
                    showlegend=False,
                    marker_color='lightblue',
                ),
                row=row,
                col=col,
            )

            # Update axes labels
            fig.update_xaxes(title_text=feature, row=row, col=col)
            fig.update_yaxes(title_text='Count', row=row, col=col)

        # Update overall layout
        fig.update_layout(
            height=300 * rows,
            showlegend=False,
        )

        return fig

init ¶

__init__()

Initialize the feature distributions diagnostic.

Source code in energy_repset/diagnostics/feature_space/feature_distributions.py

def __init__(self):
    """Initialize the feature distributions diagnostic."""
    pass

plot ¶

plot(df_features: DataFrame, nbins: int = 20, cols: int = 3) -> Figure

Create a grid of histograms for all features.

Parameters:

Name	Type	Description	Default
`df_features`	`DataFrame`	Feature matrix with slices as rows, features as columns.	required
`nbins`	`int`	Number of bins for each histogram. Default is 20.	`20`
`cols`	`int`	Number of columns in the subplot grid. Default is 3.	`3`

Returns:

Type	Description
`Figure`	Plotly figure object ready for display or further customization.

Raises:

Type	Description
`ValueError`	If df_features is empty or nbins/cols are invalid.

Source code in energy_repset/diagnostics/feature_space/feature_distributions.py

def plot(
    self,
    df_features: pd.DataFrame,
    nbins: int = 20,
    cols: int = 3,
) -> go.Figure:
    """Create a grid of histograms for all features.

    Args:
        df_features: Feature matrix with slices as rows, features as columns.
        nbins: Number of bins for each histogram. Default is 20.
        cols: Number of columns in the subplot grid. Default is 3.

    Returns:
        Plotly figure object ready for display or further customization.

    Raises:
        ValueError: If df_features is empty or nbins/cols are invalid.
    """
    if df_features.empty:
        raise ValueError("df_features cannot be empty")
    if nbins <= 0:
        raise ValueError("nbins must be positive")
    if cols <= 0:
        raise ValueError("cols must be positive")

    features = list(df_features.columns)
    n_features = len(features)

    # Calculate grid dimensions
    rows = (n_features + cols - 1) // cols  # Ceiling division

    # Create subplots
    fig = make_subplots(
        rows=rows,
        cols=cols,
        subplot_titles=features,
        vertical_spacing=0.12 / rows if rows > 1 else 0.1,
        horizontal_spacing=0.1 / cols if cols > 1 else 0.1,
    )

    # Add histogram for each feature
    for idx, feature in enumerate(features):
        row = idx // cols + 1
        col = idx % cols + 1

        fig.add_trace(
            go.Histogram(
                x=df_features[feature],
                nbinsx=nbins,
                name=feature,
                showlegend=False,
                marker_color='lightblue',
            ),
            row=row,
            col=col,
        )

        # Update axes labels
        fig.update_xaxes(title_text=feature, row=row, col=col)
        fig.update_yaxes(title_text='Count', row=row, col=col)

    # Update overall layout
    fig.update_layout(
        height=300 * rows,
        showlegend=False,
    )

    return fig

Score Components¶

DistributionOverlayECDF ¶

Overlay empirical cumulative distribution functions (ECDF) to compare distributions.

Creates a plot showing the ECDF of a variable for both the full dataset and a selection. This helps visualize how well the selection represents the full distribution, which is what WassersteinFidelity measures.

Examples:

>>> # Compare demand distribution
>>> ecdf_plot = DistributionOverlayECDF()
>>> full_data = context.df_raw['demand']
>>> selected_indices = context.slicer.get_indices_for_slices(result.selection)
>>> selected_data = context.df_raw.loc[selected_indices, 'demand']
>>> fig = ecdf_plot.plot(full_data, selected_data)
>>> fig.update_layout(title='Demand Distribution: Full vs Selected')
>>> fig.show()

>>> # Alternative: using iloc
>>> selection_mask = context.df_raw.index.isin(selected_indices)
>>> fig = ecdf_plot.plot(
...     context.df_raw['wind'],
...     context.df_raw.loc[selection_mask, 'wind']
... )

Source code in energy_repset/diagnostics/score_components/distribution_overlay.py

class DistributionOverlayECDF:
    """Overlay empirical cumulative distribution functions (ECDF) to compare distributions.

    Creates a plot showing the ECDF of a variable for both the full dataset and
    a selection. This helps visualize how well the selection represents the full
    distribution, which is what WassersteinFidelity measures.

    Examples:

        >>> # Compare demand distribution
        >>> ecdf_plot = DistributionOverlayECDF()
        >>> full_data = context.df_raw['demand']
        >>> selected_indices = context.slicer.get_indices_for_slices(result.selection)
        >>> selected_data = context.df_raw.loc[selected_indices, 'demand']
        >>> fig = ecdf_plot.plot(full_data, selected_data)
        >>> fig.update_layout(title='Demand Distribution: Full vs Selected')
        >>> fig.show()

        >>> # Alternative: using iloc
        >>> selection_mask = context.df_raw.index.isin(selected_indices)
        >>> fig = ecdf_plot.plot(
        ...     context.df_raw['wind'],
        ...     context.df_raw.loc[selection_mask, 'wind']
        ... )
    """

    def __init__(self):
        """Initialize the ECDF overlay diagnostic."""
        pass

    def plot(
        self,
        df_full: pd.Series,
        df_selection: pd.Series,
        full_label: str = 'Full',
        selection_label: str = 'Selection',
    ) -> go.Figure:
        """Create an ECDF overlay plot.

        Args:
            df_full: Series containing values for the full dataset.
            df_selection: Series containing values for the selection.
            full_label: Label for the full dataset in the legend. Default 'Full'.
            selection_label: Label for the selection in the legend. Default 'Selection'.

        Returns:
            Plotly figure object ready for display or further customization.
        """
        # Drop NaN values
        full_values = df_full.dropna().values
        selection_values = df_selection.dropna().values

        # Calculate ECDF for full dataset
        full_sorted = np.sort(full_values)
        full_ecdf = np.arange(1, len(full_sorted) + 1) / len(full_sorted)

        # Calculate ECDF for selection
        selection_sorted = np.sort(selection_values)
        selection_ecdf = np.arange(1, len(selection_sorted) + 1) / len(selection_sorted)

        # Create figure
        fig = go.Figure()

        # Add full dataset ECDF
        fig.add_trace(go.Scatter(
            x=full_sorted,
            y=full_ecdf,
            mode='lines',
            name=full_label,
            line=dict(width=2),
        ))

        # Add selection ECDF
        fig.add_trace(go.Scatter(
            x=selection_sorted,
            y=selection_ecdf,
            mode='lines',
            name=selection_label,
            line=dict(width=2, dash='dash'),
        ))

        # Update layout
        fig.update_layout(
            xaxis_title=df_full.name or 'Value',
            yaxis_title='Cumulative Probability',
            hovermode='x unified',
            yaxis=dict(tickformat='.0%', range=[0, 1]),
        )

        return fig

init ¶

__init__()

Initialize the ECDF overlay diagnostic.

Source code in energy_repset/diagnostics/score_components/distribution_overlay.py

def __init__(self):
    """Initialize the ECDF overlay diagnostic."""
    pass

plot ¶

plot(df_full: Series, df_selection: Series, full_label: str = 'Full', selection_label: str = 'Selection') -> Figure

Create an ECDF overlay plot.

Parameters:

Name	Type	Description	Default
`df_full`	`Series`	Series containing values for the full dataset.	required
`df_selection`	`Series`	Series containing values for the selection.	required
`full_label`	`str`	Label for the full dataset in the legend. Default 'Full'.	`'Full'`
`selection_label`	`str`	Label for the selection in the legend. Default 'Selection'.	`'Selection'`

Returns:

Type	Description
`Figure`	Plotly figure object ready for display or further customization.

Source code in energy_repset/diagnostics/score_components/distribution_overlay.py

def plot(
    self,
    df_full: pd.Series,
    df_selection: pd.Series,
    full_label: str = 'Full',
    selection_label: str = 'Selection',
) -> go.Figure:
    """Create an ECDF overlay plot.

    Args:
        df_full: Series containing values for the full dataset.
        df_selection: Series containing values for the selection.
        full_label: Label for the full dataset in the legend. Default 'Full'.
        selection_label: Label for the selection in the legend. Default 'Selection'.

    Returns:
        Plotly figure object ready for display or further customization.
    """
    # Drop NaN values
    full_values = df_full.dropna().values
    selection_values = df_selection.dropna().values

    # Calculate ECDF for full dataset
    full_sorted = np.sort(full_values)
    full_ecdf = np.arange(1, len(full_sorted) + 1) / len(full_sorted)

    # Calculate ECDF for selection
    selection_sorted = np.sort(selection_values)
    selection_ecdf = np.arange(1, len(selection_sorted) + 1) / len(selection_sorted)

    # Create figure
    fig = go.Figure()

    # Add full dataset ECDF
    fig.add_trace(go.Scatter(
        x=full_sorted,
        y=full_ecdf,
        mode='lines',
        name=full_label,
        line=dict(width=2),
    ))

    # Add selection ECDF
    fig.add_trace(go.Scatter(
        x=selection_sorted,
        y=selection_ecdf,
        mode='lines',
        name=selection_label,
        line=dict(width=2, dash='dash'),
    ))

    # Update layout
    fig.update_layout(
        xaxis_title=df_full.name or 'Value',
        yaxis_title='Cumulative Probability',
        hovermode='x unified',
        yaxis=dict(tickformat='.0%', range=[0, 1]),
    )

    return fig

DistributionOverlayHistogram ¶

Overlay histograms to compare distributions.

Creates a plot showing normalized histograms of a variable for both the full dataset and a selection. Alternative to ECDF that may be more intuitive for some users. Shows probability density rather than cumulative probability.

Examples:

>>> # Compare demand distribution
>>> hist_plot = DistributionOverlayHistogram()
>>> full_data = context.df_raw['demand']
>>> selected_indices = context.slicer.get_indices_for_slices(result.selection)
>>> selected_data = context.df_raw.loc[selected_indices, 'demand']
>>> fig = hist_plot.plot(full_data, selected_data)
>>> fig.update_layout(title='Demand Distribution: Full vs Selected')
>>> fig.show()

>>> # With custom bin count
>>> fig = hist_plot.plot(full_data, selected_data, nbins=50)

>>> # Using density mode
>>> fig = hist_plot.plot(full_data, selected_data, histnorm='probability density')

Source code in energy_repset/diagnostics/score_components/distribution_overlay.py

class DistributionOverlayHistogram:
    """Overlay histograms to compare distributions.

    Creates a plot showing normalized histograms of a variable for both the
    full dataset and a selection. Alternative to ECDF that may be more intuitive
    for some users. Shows probability density rather than cumulative probability.

    Examples:

        >>> # Compare demand distribution
        >>> hist_plot = DistributionOverlayHistogram()
        >>> full_data = context.df_raw['demand']
        >>> selected_indices = context.slicer.get_indices_for_slices(result.selection)
        >>> selected_data = context.df_raw.loc[selected_indices, 'demand']
        >>> fig = hist_plot.plot(full_data, selected_data)
        >>> fig.update_layout(title='Demand Distribution: Full vs Selected')
        >>> fig.show()

        >>> # With custom bin count
        >>> fig = hist_plot.plot(full_data, selected_data, nbins=50)

        >>> # Using density mode
        >>> fig = hist_plot.plot(full_data, selected_data, histnorm='probability density')
    """

    def __init__(self):
        """Initialize the histogram overlay diagnostic."""
        pass

    def plot(
        self,
        df_full: pd.Series,
        df_selection: pd.Series,
        nbins: int = 30,
        histnorm: str = 'probability',
        full_label: str = 'Full',
        selection_label: str = 'Selection',
    ) -> go.Figure:
        """Create a histogram overlay plot.

        Args:
            df_full: Series containing values for the full dataset.
            df_selection: Series containing values for the selection.
            nbins: Number of bins for the histogram. Default is 30.
            histnorm: Histogram normalization mode. Options: 'probability',
                'probability density', 'percent'. Default is 'probability'.
            full_label: Label for the full dataset in the legend. Default 'Full'.
            selection_label: Label for the selection in the legend. Default 'Selection'.

        Returns:
            Plotly figure object ready for display or further customization.

        Raises:
            ValueError: If histnorm is not a valid option.
        """
        valid_histnorms = ['probability', 'probability density', 'percent', '']
        if histnorm not in valid_histnorms:
            raise ValueError(
                f"histnorm must be one of {valid_histnorms}, got '{histnorm}'"
            )

        # Drop NaN values
        full_values = df_full.dropna().values
        selection_values = df_selection.dropna().values

        # Create figure
        fig = go.Figure()

        # Add full dataset histogram
        fig.add_trace(go.Histogram(
            x=full_values,
            name=full_label,
            nbinsx=nbins,
            histnorm=histnorm,
            opacity=0.6,
        ))

        # Add selection histogram
        fig.add_trace(go.Histogram(
            x=selection_values,
            name=selection_label,
            nbinsx=nbins,
            histnorm=histnorm,
            opacity=0.6,
        ))

        # Update layout
        yaxis_title = {
            'probability': 'Probability',
            'probability density': 'Probability Density',
            'percent': 'Percent',
            '': 'Count',
        }.get(histnorm, 'Frequency')

        fig.update_layout(
            xaxis_title=df_full.name or 'Value',
            yaxis_title=yaxis_title,
            barmode='overlay',
            hovermode='x unified',
        )

        return fig

init ¶

__init__()

Initialize the histogram overlay diagnostic.

Source code in energy_repset/diagnostics/score_components/distribution_overlay.py

def __init__(self):
    """Initialize the histogram overlay diagnostic."""
    pass

plot ¶

plot(df_full: Series, df_selection: Series, nbins: int = 30, histnorm: str = 'probability', full_label: str = 'Full', selection_label: str = 'Selection') -> Figure

Create a histogram overlay plot.

Parameters:

Name	Type	Description	Default
`df_full`	`Series`	Series containing values for the full dataset.	required
`df_selection`	`Series`	Series containing values for the selection.	required
`nbins`	`int`	Number of bins for the histogram. Default is 30.	`30`
`histnorm`	`str`	Histogram normalization mode. Options: 'probability', 'probability density', 'percent'. Default is 'probability'.	`'probability'`
`full_label`	`str`	Label for the full dataset in the legend. Default 'Full'.	`'Full'`
`selection_label`	`str`	Label for the selection in the legend. Default 'Selection'.	`'Selection'`

Returns:

Type	Description
`Figure`	Plotly figure object ready for display or further customization.

Raises:

Type	Description
`ValueError`	If histnorm is not a valid option.

Source code in energy_repset/diagnostics/score_components/distribution_overlay.py

def plot(
    self,
    df_full: pd.Series,
    df_selection: pd.Series,
    nbins: int = 30,
    histnorm: str = 'probability',
    full_label: str = 'Full',
    selection_label: str = 'Selection',
) -> go.Figure:
    """Create a histogram overlay plot.

    Args:
        df_full: Series containing values for the full dataset.
        df_selection: Series containing values for the selection.
        nbins: Number of bins for the histogram. Default is 30.
        histnorm: Histogram normalization mode. Options: 'probability',
            'probability density', 'percent'. Default is 'probability'.
        full_label: Label for the full dataset in the legend. Default 'Full'.
        selection_label: Label for the selection in the legend. Default 'Selection'.

    Returns:
        Plotly figure object ready for display or further customization.

    Raises:
        ValueError: If histnorm is not a valid option.
    """
    valid_histnorms = ['probability', 'probability density', 'percent', '']
    if histnorm not in valid_histnorms:
        raise ValueError(
            f"histnorm must be one of {valid_histnorms}, got '{histnorm}'"
        )

    # Drop NaN values
    full_values = df_full.dropna().values
    selection_values = df_selection.dropna().values

    # Create figure
    fig = go.Figure()

    # Add full dataset histogram
    fig.add_trace(go.Histogram(
        x=full_values,
        name=full_label,
        nbinsx=nbins,
        histnorm=histnorm,
        opacity=0.6,
    ))

    # Add selection histogram
    fig.add_trace(go.Histogram(
        x=selection_values,
        name=selection_label,
        nbinsx=nbins,
        histnorm=histnorm,
        opacity=0.6,
    ))

    # Update layout
    yaxis_title = {
        'probability': 'Probability',
        'probability density': 'Probability Density',
        'percent': 'Percent',
        '': 'Count',
    }.get(histnorm, 'Frequency')

    fig.update_layout(
        xaxis_title=df_full.name or 'Value',
        yaxis_title=yaxis_title,
        barmode='overlay',
        hovermode='x unified',
    )

    return fig

CorrelationDifferenceHeatmap ¶

Visualize the difference between correlation matrices.

Creates a heatmap showing the difference between the correlation matrix of the full dataset and the selection. This helps identify which variable relationships are well-preserved or poorly-preserved by the selection. Related to CorrelationFidelity score component.

Positive values (red) indicate the selection has stronger correlation than the full dataset. Negative values (blue) indicate weaker correlation.

Examples:

>>> # Compare correlation structure
>>> corr_diff = CorrelationDifferenceHeatmap()
>>> full_data = context.df_raw[['demand', 'wind', 'solar']]
>>> selected_indices = context.slicer.get_indices_for_slices(result.selection)
>>> selected_data = context.df_raw.loc[selected_indices, ['demand', 'wind', 'solar']]
>>> fig = corr_diff.plot(full_data, selected_data)
>>> fig.update_layout(title='Correlation Difference: Selection - Full')
>>> fig.show()

>>> # With Spearman correlation
>>> fig = corr_diff.plot(full_data, selected_data, method='spearman')

>>> # Show only lower triangle
>>> fig = corr_diff.plot(full_data, selected_data, show_lower_only=True)

Source code in energy_repset/diagnostics/score_components/correlation_difference_heatmap.py

class CorrelationDifferenceHeatmap:
    """Visualize the difference between correlation matrices.

    Creates a heatmap showing the difference between the correlation matrix of
    the full dataset and the selection. This helps identify which variable
    relationships are well-preserved or poorly-preserved by the selection.
    Related to CorrelationFidelity score component.

    Positive values (red) indicate the selection has stronger correlation than
    the full dataset. Negative values (blue) indicate weaker correlation.

    Examples:

        >>> # Compare correlation structure
        >>> corr_diff = CorrelationDifferenceHeatmap()
        >>> full_data = context.df_raw[['demand', 'wind', 'solar']]
        >>> selected_indices = context.slicer.get_indices_for_slices(result.selection)
        >>> selected_data = context.df_raw.loc[selected_indices, ['demand', 'wind', 'solar']]
        >>> fig = corr_diff.plot(full_data, selected_data)
        >>> fig.update_layout(title='Correlation Difference: Selection - Full')
        >>> fig.show()

        >>> # With Spearman correlation
        >>> fig = corr_diff.plot(full_data, selected_data, method='spearman')

        >>> # Show only lower triangle
        >>> fig = corr_diff.plot(full_data, selected_data, show_lower_only=True)
    """

    def __init__(self):
        """Initialize the correlation difference heatmap diagnostic."""
        pass

    def plot(
        self,
        df_full: pd.DataFrame,
        df_selection: pd.DataFrame,
        method: str = 'pearson',
        show_lower_only: bool = False,
    ) -> go.Figure:
        """Create a heatmap of correlation differences.

        Args:
            df_full: DataFrame containing variables for the full dataset.
            df_selection: DataFrame containing variables for the selection.
                Must have the same columns as df_full.
            method: Correlation method ('pearson', 'spearman', or 'kendall').
                Default is 'pearson'.
            show_lower_only: If True, shows only the lower triangle of the
                difference matrix (removes redundant upper triangle and diagonal).

        Returns:
            Plotly figure object ready for display or further customization.

        Raises:
            ValueError: If method is invalid or columns don't match.
        """
        if method not in ['pearson', 'spearman', 'kendall']:
            raise ValueError(
                f"method must be 'pearson', 'spearman', or 'kendall', got '{method}'"
            )

        if not df_full.columns.equals(df_selection.columns):
            raise ValueError(
                "df_full and df_selection must have the same columns"
            )

        # Calculate correlation matrices
        corr_full = df_full.corr(method=method)
        corr_selection = df_selection.corr(method=method)

        # Calculate difference (selection - full)
        corr_diff = corr_selection - corr_full

        # Mask upper triangle if requested
        if show_lower_only:
            mask = pd.DataFrame(
                False,
                index=corr_diff.index,
                columns=corr_diff.columns
            )
            # Set upper triangle and diagonal to True (to be masked)
            for i in range(len(corr_diff)):
                for j in range(i, len(corr_diff)):
                    mask.iloc[i, j] = True

            # Apply mask by setting values to NaN
            corr_diff = corr_diff.where(~mask)

        # Determine color scale range (symmetric around 0)
        max_abs = max(abs(corr_diff.min().min()), abs(corr_diff.max().max()))
        if pd.isna(max_abs):
            max_abs = 1.0

        # Create heatmap
        fig = px.imshow(
            corr_diff,
            x=corr_diff.columns,
            y=corr_diff.index,
            color_continuous_scale='RdBu_r',
            color_continuous_midpoint=0,
            zmin=-max_abs,
            zmax=max_abs,
            aspect='auto',
        )

        # Update layout for better readability
        fig.update_layout(
            xaxis_title='',
            yaxis_title='',
            coloraxis_colorbar=dict(title='Δ Correlation<br>(Selection - Full)'),
        )

        # Improve text readability
        fig.update_traces(
            text=corr_diff.round(2).values,
            texttemplate='%{text}',
            textfont=dict(size=10),
        )

        return fig

init ¶

__init__()

Initialize the correlation difference heatmap diagnostic.

Source code in energy_repset/diagnostics/score_components/correlation_difference_heatmap.py

def __init__(self):
    """Initialize the correlation difference heatmap diagnostic."""
    pass

plot ¶

plot(df_full: DataFrame, df_selection: DataFrame, method: str = 'pearson', show_lower_only: bool = False) -> Figure

Create a heatmap of correlation differences.

Parameters:

Name	Type	Description	Default
`df_full`	`DataFrame`	DataFrame containing variables for the full dataset.	required
`df_selection`	`DataFrame`	DataFrame containing variables for the selection. Must have the same columns as df_full.	required
`method`	`str`	Correlation method ('pearson', 'spearman', or 'kendall'). Default is 'pearson'.	`'pearson'`
`show_lower_only`	`bool`	If True, shows only the lower triangle of the difference matrix (removes redundant upper triangle and diagonal).	`False`

Returns:

Type	Description
`Figure`	Plotly figure object ready for display or further customization.

Raises:

Type	Description
`ValueError`	If method is invalid or columns don't match.

Source code in energy_repset/diagnostics/score_components/correlation_difference_heatmap.py

def plot(
    self,
    df_full: pd.DataFrame,
    df_selection: pd.DataFrame,
    method: str = 'pearson',
    show_lower_only: bool = False,
) -> go.Figure:
    """Create a heatmap of correlation differences.

    Args:
        df_full: DataFrame containing variables for the full dataset.
        df_selection: DataFrame containing variables for the selection.
            Must have the same columns as df_full.
        method: Correlation method ('pearson', 'spearman', or 'kendall').
            Default is 'pearson'.
        show_lower_only: If True, shows only the lower triangle of the
            difference matrix (removes redundant upper triangle and diagonal).

    Returns:
        Plotly figure object ready for display or further customization.

    Raises:
        ValueError: If method is invalid or columns don't match.
    """
    if method not in ['pearson', 'spearman', 'kendall']:
        raise ValueError(
            f"method must be 'pearson', 'spearman', or 'kendall', got '{method}'"
        )

    if not df_full.columns.equals(df_selection.columns):
        raise ValueError(
            "df_full and df_selection must have the same columns"
        )

    # Calculate correlation matrices
    corr_full = df_full.corr(method=method)
    corr_selection = df_selection.corr(method=method)

    # Calculate difference (selection - full)
    corr_diff = corr_selection - corr_full

    # Mask upper triangle if requested
    if show_lower_only:
        mask = pd.DataFrame(
            False,
            index=corr_diff.index,
            columns=corr_diff.columns
        )
        # Set upper triangle and diagonal to True (to be masked)
        for i in range(len(corr_diff)):
            for j in range(i, len(corr_diff)):
                mask.iloc[i, j] = True

        # Apply mask by setting values to NaN
        corr_diff = corr_diff.where(~mask)

    # Determine color scale range (symmetric around 0)
    max_abs = max(abs(corr_diff.min().min()), abs(corr_diff.max().max()))
    if pd.isna(max_abs):
        max_abs = 1.0

    # Create heatmap
    fig = px.imshow(
        corr_diff,
        x=corr_diff.columns,
        y=corr_diff.index,
        color_continuous_scale='RdBu_r',
        color_continuous_midpoint=0,
        zmin=-max_abs,
        zmax=max_abs,
        aspect='auto',
    )

    # Update layout for better readability
    fig.update_layout(
        xaxis_title='',
        yaxis_title='',
        coloraxis_colorbar=dict(title='Δ Correlation<br>(Selection - Full)'),
    )

    # Improve text readability
    fig.update_traces(
        text=corr_diff.round(2).values,
        texttemplate='%{text}',
        textfont=dict(size=10),
    )

    return fig

DiurnalProfileOverlay ¶

Overlay mean diurnal (hour-of-day) profiles for full vs selected data.

Creates a plot showing the average value by hour of day for each variable, comparing the full dataset to the selection. This helps visualize how well the selection preserves daily patterns, which is related to DiurnalFidelity score component.

Examples:

>>> # Compare diurnal patterns
>>> diurnal_plot = DiurnalProfileOverlay()
>>> full_data = context.df_raw[['demand', 'wind', 'solar']]
>>> selected_indices = context.slicer.get_indices_for_slices(result.selection)
>>> selected_data = context.df_raw.loc[selected_indices, ['demand', 'wind', 'solar']]
>>> fig = diurnal_plot.plot(full_data, selected_data)
>>> fig.update_layout(title='Diurnal Profiles: Full vs Selected')
>>> fig.show()

>>> # Single variable
>>> fig = diurnal_plot.plot(
...     full_data[['demand']],
...     selected_data[['demand']]
... )

>>> # Subset of variables
>>> fig = diurnal_plot.plot(
...     full_data,
...     selected_data,
...     variables=['demand', 'wind']
... )

Source code in energy_repset/diagnostics/score_components/diurnal_profile_overlay.py

class DiurnalProfileOverlay:
    """Overlay mean diurnal (hour-of-day) profiles for full vs selected data.

    Creates a plot showing the average value by hour of day for each variable,
    comparing the full dataset to the selection. This helps visualize how well
    the selection preserves daily patterns, which is related to DiurnalFidelity
    score component.

    Examples:

        >>> # Compare diurnal patterns
        >>> diurnal_plot = DiurnalProfileOverlay()
        >>> full_data = context.df_raw[['demand', 'wind', 'solar']]
        >>> selected_indices = context.slicer.get_indices_for_slices(result.selection)
        >>> selected_data = context.df_raw.loc[selected_indices, ['demand', 'wind', 'solar']]
        >>> fig = diurnal_plot.plot(full_data, selected_data)
        >>> fig.update_layout(title='Diurnal Profiles: Full vs Selected')
        >>> fig.show()

        >>> # Single variable
        >>> fig = diurnal_plot.plot(
        ...     full_data[['demand']],
        ...     selected_data[['demand']]
        ... )

        >>> # Subset of variables
        >>> fig = diurnal_plot.plot(
        ...     full_data,
        ...     selected_data,
        ...     variables=['demand', 'wind']
        ... )
    """

    def __init__(self):
        """Initialize the diurnal profile overlay diagnostic."""
        pass

    def plot(
        self,
        df_full: pd.DataFrame,
        df_selection: pd.DataFrame,
        variables: list[str] = None,
        full_label: str = 'Full',
        selection_label: str = 'Selection',
    ) -> go.Figure:
        """Create a diurnal profile overlay plot.

        Args:
            df_full: DataFrame with DatetimeIndex and variable columns for full dataset.
            df_selection: DataFrame with DatetimeIndex and variable columns for selection.
                Must have the same columns as df_full.
            variables: List of variable names to include. If None, uses all columns.
            full_label: Label suffix for full dataset traces. Default 'Full'.
            selection_label: Label suffix for selection traces. Default 'Selection'.

        Returns:
            Plotly figure object ready for display or further customization.

        Raises:
            ValueError: If DataFrames don't have DatetimeIndex or columns don't match.
        """
        if not isinstance(df_full.index, pd.DatetimeIndex):
            raise ValueError("df_full must have a DatetimeIndex")
        if not isinstance(df_selection.index, pd.DatetimeIndex):
            raise ValueError("df_selection must have a DatetimeIndex")
        if not df_full.columns.equals(df_selection.columns):
            raise ValueError("df_full and df_selection must have the same columns")

        # Determine which variables to plot
        if variables is None:
            variables = list(df_full.columns)
        else:
            # Validate requested variables
            missing = set(variables) - set(df_full.columns)
            if missing:
                raise ValueError(f"Variables not found in DataFrames: {missing}")

        # Extract hour from index
        df_full_with_hour = df_full[variables].copy()
        df_full_with_hour['hour'] = df_full.index.hour

        df_selection_with_hour = df_selection[variables].copy()
        df_selection_with_hour['hour'] = df_selection.index.hour

        # Calculate mean profiles
        full_profile = df_full_with_hour.groupby('hour').mean(numeric_only=True)
        selection_profile = df_selection_with_hour.groupby('hour').mean(numeric_only=True)

        # Create figure
        fig = go.Figure()

        # Add traces for each variable
        for variable in variables:
            # Full dataset trace
            fig.add_trace(go.Scatter(
                x=full_profile.index,
                y=full_profile[variable],
                mode='lines+markers',
                name=f'{variable} ({full_label})',
                line=dict(width=2),
                marker=dict(size=6),
            ))

            # Selection trace
            fig.add_trace(go.Scatter(
                x=selection_profile.index,
                y=selection_profile[variable],
                mode='lines+markers',
                name=f'{variable} ({selection_label})',
                line=dict(width=2, dash='dash'),
                marker=dict(size=6, symbol='diamond'),
            ))

        # Update layout
        fig.update_layout(
            xaxis_title='Hour of Day',
            yaxis_title='Mean Value',
            hovermode='x unified',
            xaxis=dict(
                tickmode='linear',
                tick0=0,
                dtick=2,
                range=[-0.5, 23.5],
            ),
        )

        return fig

init ¶

__init__()

Initialize the diurnal profile overlay diagnostic.

Source code in energy_repset/diagnostics/score_components/diurnal_profile_overlay.py

def __init__(self):
    """Initialize the diurnal profile overlay diagnostic."""
    pass

plot ¶

plot(df_full: DataFrame, df_selection: DataFrame, variables: list[str] = None, full_label: str = 'Full', selection_label: str = 'Selection') -> Figure

Create a diurnal profile overlay plot.

Parameters:

Name	Type	Description	Default
`df_full`	`DataFrame`	DataFrame with DatetimeIndex and variable columns for full dataset.	required
`df_selection`	`DataFrame`	DataFrame with DatetimeIndex and variable columns for selection. Must have the same columns as df_full.	required
`variables`	`list[str]`	List of variable names to include. If None, uses all columns.	`None`
`full_label`	`str`	Label suffix for full dataset traces. Default 'Full'.	`'Full'`
`selection_label`	`str`	Label suffix for selection traces. Default 'Selection'.	`'Selection'`

Returns:

Type	Description
`Figure`	Plotly figure object ready for display or further customization.

Raises:

Type	Description
`ValueError`	If DataFrames don't have DatetimeIndex or columns don't match.

Source code in energy_repset/diagnostics/score_components/diurnal_profile_overlay.py

def plot(
    self,
    df_full: pd.DataFrame,
    df_selection: pd.DataFrame,
    variables: list[str] = None,
    full_label: str = 'Full',
    selection_label: str = 'Selection',
) -> go.Figure:
    """Create a diurnal profile overlay plot.

    Args:
        df_full: DataFrame with DatetimeIndex and variable columns for full dataset.
        df_selection: DataFrame with DatetimeIndex and variable columns for selection.
            Must have the same columns as df_full.
        variables: List of variable names to include. If None, uses all columns.
        full_label: Label suffix for full dataset traces. Default 'Full'.
        selection_label: Label suffix for selection traces. Default 'Selection'.

    Returns:
        Plotly figure object ready for display or further customization.

    Raises:
        ValueError: If DataFrames don't have DatetimeIndex or columns don't match.
    """
    if not isinstance(df_full.index, pd.DatetimeIndex):
        raise ValueError("df_full must have a DatetimeIndex")
    if not isinstance(df_selection.index, pd.DatetimeIndex):
        raise ValueError("df_selection must have a DatetimeIndex")
    if not df_full.columns.equals(df_selection.columns):
        raise ValueError("df_full and df_selection must have the same columns")

    # Determine which variables to plot
    if variables is None:
        variables = list(df_full.columns)
    else:
        # Validate requested variables
        missing = set(variables) - set(df_full.columns)
        if missing:
            raise ValueError(f"Variables not found in DataFrames: {missing}")

    # Extract hour from index
    df_full_with_hour = df_full[variables].copy()
    df_full_with_hour['hour'] = df_full.index.hour

    df_selection_with_hour = df_selection[variables].copy()
    df_selection_with_hour['hour'] = df_selection.index.hour

    # Calculate mean profiles
    full_profile = df_full_with_hour.groupby('hour').mean(numeric_only=True)
    selection_profile = df_selection_with_hour.groupby('hour').mean(numeric_only=True)

    # Create figure
    fig = go.Figure()

    # Add traces for each variable
    for variable in variables:
        # Full dataset trace
        fig.add_trace(go.Scatter(
            x=full_profile.index,
            y=full_profile[variable],
            mode='lines+markers',
            name=f'{variable} ({full_label})',
            line=dict(width=2),
            marker=dict(size=6),
        ))

        # Selection trace
        fig.add_trace(go.Scatter(
            x=selection_profile.index,
            y=selection_profile[variable],
            mode='lines+markers',
            name=f'{variable} ({selection_label})',
            line=dict(width=2, dash='dash'),
            marker=dict(size=6, symbol='diamond'),
        ))

    # Update layout
    fig.update_layout(
        xaxis_title='Hour of Day',
        yaxis_title='Mean Value',
        hovermode='x unified',
        xaxis=dict(
            tickmode='linear',
            tick0=0,
            dtick=2,
            range=[-0.5, 23.5],
        ),
    )

    return fig

Results¶

ResponsibilityBars ¶

Bar chart showing responsibility weights for selected representatives.

Visualizes the weight distribution across selected periods as computed by a RepresentationModel. Each bar shows how much each representative contributes to the full dataset representation.

Optionally displays a reference line showing uniform weights (1/k) for comparison with non-uniform weighting schemes like cluster-size based weights.

Examples:

>>> from energy_repset.diagnostics.results import ResponsibilityBars
>>>
>>> # After running workflow with result containing weights
>>> weights = result.weights  # e.g., {Period('2024-01'): 0.35, ...}
>>> bars = ResponsibilityBars()
>>> fig = bars.plot(weights, show_uniform_reference=True)
>>> fig.update_layout(title='Responsibility Weights')
>>> fig.show()

Source code in energy_repset/diagnostics/results/responsibility_bars.py

class ResponsibilityBars:
    """Bar chart showing responsibility weights for selected representatives.

    Visualizes the weight distribution across selected periods as computed by
    a RepresentationModel. Each bar shows how much each representative
    contributes to the full dataset representation.

    Optionally displays a reference line showing uniform weights (1/k) for
    comparison with non-uniform weighting schemes like cluster-size based
    weights.

    Examples:

        >>> from energy_repset.diagnostics.results import ResponsibilityBars
        >>>
        >>> # After running workflow with result containing weights
        >>> weights = result.weights  # e.g., {Period('2024-01'): 0.35, ...}
        >>> bars = ResponsibilityBars()
        >>> fig = bars.plot(weights, show_uniform_reference=True)
        >>> fig.update_layout(title='Responsibility Weights')
        >>> fig.show()
    """

    def __init__(self):
        """Initialize ResponsibilityBars diagnostic."""
        pass

    def plot(
        self,
        weights: Dict[Hashable, float],
        show_uniform_reference: bool = True,
    ) -> go.Figure:
        """Create bar chart of responsibility weights.

        Args:
            weights: Dictionary mapping slice identifiers to their weights.
                Weights should sum to 1.0 for meaningful comparison with
                the uniform reference line.
            show_uniform_reference: If True, adds horizontal dashed line
                showing uniform weight (1/k) for comparison.

        Returns:
            Plotly figure with bar chart. X-axis shows slice labels, Y-axis
            shows weight values. Text labels show weights to 3 decimal places.

        Raises:
            ValueError: If weights dictionary is empty.
        """
        if not weights:
            raise ValueError("Weights dictionary cannot be empty")

        # Prepare data for plotting
        df = pd.DataFrame({
            'slice': [str(s) for s in weights.keys()],
            'weight': list(weights.values())
        })

        # Create bar chart
        fig = px.bar(
            df,
            x='slice',
            y='weight',
            text='weight'
        )

        # Format text labels to 3 decimal places, position outside bars
        fig.update_traces(
            texttemplate='%{y:.3f}',
            textposition='outside'
        )

        # Set y-axis range and label
        fig.update_yaxes(
            range=[0, max(df['weight']) * 1.15],  # Add headroom for text labels
            title='Responsibility Weight'
        )

        fig.update_xaxes(title='Selected Period')

        # Add uniform reference line if requested
        if show_uniform_reference and len(weights) > 0:
            uniform_weight = 1.0 / len(weights)
            fig.add_hline(
                y=uniform_weight,
                line_dash='dot',
                annotation_text=f'Uniform ({uniform_weight:.3f})',
                annotation_position='top left'
            )

        return fig

init ¶

__init__()

Initialize ResponsibilityBars diagnostic.

Source code in energy_repset/diagnostics/results/responsibility_bars.py

def __init__(self):
    """Initialize ResponsibilityBars diagnostic."""
    pass

plot ¶

plot(weights: dict[Hashable, float], show_uniform_reference: bool = True) -> Figure

Create bar chart of responsibility weights.

Parameters:

Name	Type	Description	Default
`weights`	`dict[Hashable, float]`	Dictionary mapping slice identifiers to their weights. Weights should sum to 1.0 for meaningful comparison with the uniform reference line.	required
`show_uniform_reference`	`bool`	If True, adds horizontal dashed line showing uniform weight (1/k) for comparison.	`True`

Returns:

Type	Description
`Figure`	Plotly figure with bar chart. X-axis shows slice labels, Y-axis
`Figure`	shows weight values. Text labels show weights to 3 decimal places.

Raises:

Type	Description
`ValueError`	If weights dictionary is empty.

Source code in energy_repset/diagnostics/results/responsibility_bars.py

def plot(
    self,
    weights: Dict[Hashable, float],
    show_uniform_reference: bool = True,
) -> go.Figure:
    """Create bar chart of responsibility weights.

    Args:
        weights: Dictionary mapping slice identifiers to their weights.
            Weights should sum to 1.0 for meaningful comparison with
            the uniform reference line.
        show_uniform_reference: If True, adds horizontal dashed line
            showing uniform weight (1/k) for comparison.

    Returns:
        Plotly figure with bar chart. X-axis shows slice labels, Y-axis
        shows weight values. Text labels show weights to 3 decimal places.

    Raises:
        ValueError: If weights dictionary is empty.
    """
    if not weights:
        raise ValueError("Weights dictionary cannot be empty")

    # Prepare data for plotting
    df = pd.DataFrame({
        'slice': [str(s) for s in weights.keys()],
        'weight': list(weights.values())
    })

    # Create bar chart
    fig = px.bar(
        df,
        x='slice',
        y='weight',
        text='weight'
    )

    # Format text labels to 3 decimal places, position outside bars
    fig.update_traces(
        texttemplate='%{y:.3f}',
        textposition='outside'
    )

    # Set y-axis range and label
    fig.update_yaxes(
        range=[0, max(df['weight']) * 1.15],  # Add headroom for text labels
        title='Responsibility Weight'
    )

    fig.update_xaxes(title='Selected Period')

    # Add uniform reference line if requested
    if show_uniform_reference and len(weights) > 0:
        uniform_weight = 1.0 / len(weights)
        fig.add_hline(
            y=uniform_weight,
            line_dash='dot',
            annotation_text=f'Uniform ({uniform_weight:.3f})',
            annotation_position='top left'
        )

    return fig

ParetoScatter2D ¶

2D scatter plot of all evaluated combinations with Pareto front highlighted.

Visualizes the objective space for two objectives, showing: - All evaluated combinations as scatter points - Pareto-optimal solutions highlighted - Selected combination (if provided) marked distinctly - Feasible vs infeasible solutions (if constraints exist)

Parameters:

Name	Type	Description	Default
`objective_x`	`str`	Name of objective for x-axis.	required
`objective_y`	`str`	Name of objective for y-axis.	required

Examples:

>>> from energy_repset.diagnostics.results import ParetoScatter2D
>>> scatter = ParetoScatter2D(objective_x='wasserstein', objective_y='correlation')
>>> fig = scatter.plot(
...     search_algorithm=workflow.search_algorithm,
...     selected_combination=result.selection
... )
>>> fig.update_layout(title='Pareto Front: Wasserstein vs Correlation')
>>> fig.show()

Source code in energy_repset/diagnostics/results/pareto_scatter.py

class ParetoScatter2D:
    """2D scatter plot of all evaluated combinations with Pareto front highlighted.

    Visualizes the objective space for two objectives, showing:
    - All evaluated combinations as scatter points
    - Pareto-optimal solutions highlighted
    - Selected combination (if provided) marked distinctly
    - Feasible vs infeasible solutions (if constraints exist)

    Args:
        objective_x: Name of objective for x-axis.
        objective_y: Name of objective for y-axis.

    Examples:
        >>> from energy_repset.diagnostics.results import ParetoScatter2D
        >>> scatter = ParetoScatter2D(objective_x='wasserstein', objective_y='correlation')
        >>> fig = scatter.plot(
        ...     search_algorithm=workflow.search_algorithm,
        ...     selected_combination=result.selection
        ... )
        >>> fig.update_layout(title='Pareto Front: Wasserstein vs Correlation')
        >>> fig.show()
    """

    def __init__(self, objective_x: str, objective_y: str):
        """Initialize Pareto scatter diagnostic.

        Args:
            objective_x: Name of objective for x-axis.
            objective_y: Name of objective for y-axis.
        """
        self.objective_x = objective_x
        self.objective_y = objective_y

    def plot(
        self,
        search_algorithm: ObjectiveDrivenCombinatorialSearchAlgorithm,
        selected_combination: SliceCombination | None = None,
    ) -> go.Figure:
        """Create 2D scatter plot of Pareto front.

        Args:
            search_algorithm: Search algorithm after find_selection() has been called.
            selected_combination: Optional combination to highlight (e.g., result.selection).

        Returns:
            Plotly figure with scatter plot.

        Raises:
            ValueError: If find_selection() hasn't been called or objectives not found.
        """
        df = search_algorithm.get_all_scores()

        if self.objective_x not in df.columns:
            raise ValueError(f"Objective '{self.objective_x}' not found in scores")
        if self.objective_y not in df.columns:
            raise ValueError(f"Objective '{self.objective_y}' not found in scores")

        has_pareto = hasattr(search_algorithm.selection_policy, 'pareto_mask')
        pareto_mask = None
        feasible_mask = None

        if has_pareto and search_algorithm.selection_policy.pareto_mask is not None:
            pareto_mask = search_algorithm.selection_policy.pareto_mask
            feasible_mask = search_algorithm.selection_policy.feasible_mask

        fig = go.Figure()

        x_vals = df[self.objective_x]
        y_vals = df[self.objective_y]

        if has_pareto and pareto_mask is not None:
            pareto = pareto_mask.values
            feasible = feasible_mask.values

            infeasible = ~feasible
            if infeasible.any():
                fig.add_trace(go.Scatter(
                    x=x_vals[infeasible],
                    y=y_vals[infeasible],
                    mode='markers',
                    marker=dict(size=6, opacity=0.3),
                    name='Infeasible',
                    hovertemplate=(
                        f'{self.objective_x}: %{{x:.4f}}<br>'
                        f'{self.objective_y}: %{{y:.4f}}<br>'
                        '<extra></extra>'
                    ),
                ))

            dominated = feasible & ~pareto
            if dominated.any():
                fig.add_trace(go.Scatter(
                    x=x_vals[dominated],
                    y=y_vals[dominated],
                    mode='markers',
                    marker=dict(size=6, opacity=0.5),
                    name='Dominated',
                    hovertemplate=(
                        f'{self.objective_x}: %{{x:.4f}}<br>'
                        f'{self.objective_y}: %{{y:.4f}}<br>'
                        '<extra></extra>'
                    ),
                ))

            pareto_points = pareto & feasible
            if pareto_points.any():
                fig.add_trace(go.Scatter(
                    x=x_vals[pareto_points],
                    y=y_vals[pareto_points],
                    mode='markers',
                    marker=dict(size=10, symbol='diamond'),
                    name='Pareto Front',
                    hovertemplate=(
                        f'{self.objective_x}: %{{x:.4f}}<br>'
                        f'{self.objective_y}: %{{y:.4f}}<br>'
                        '<extra></extra>'
                    ),
                ))
        else:
            fig.add_trace(go.Scatter(
                x=x_vals,
                y=y_vals,
                mode='markers',
                marker=dict(size=6, opacity=0.5),
                name='All Combinations',
                hovertemplate=(
                    f'{self.objective_x}: %{{x:.4f}}<br>'
                    f'{self.objective_y}: %{{y:.4f}}<br>'
                    '<extra></extra>'
                ),
            ))

        if selected_combination is not None:
            selected_idx = df['slices'].apply(lambda x: x == selected_combination)
            if selected_idx.any():
                sel_x = x_vals[selected_idx].values[0]
                sel_y = y_vals[selected_idx].values[0]
                fig.add_trace(go.Scatter(
                    x=[sel_x],
                    y=[sel_y],
                    mode='markers',
                    marker=dict(
                        size=15,
                        symbol='star',
                        line=dict(width=2, color='black')
                    ),
                    name='Selected',
                    hovertemplate=(
                        f'{self.objective_x}: %{{x:.4f}}<br>'
                        f'{self.objective_y}: %{{y:.4f}}<br>'
                        '<b>SELECTED</b><br>'
                        '<extra></extra>'
                    ),
                ))

        fig.update_layout(
            xaxis_title=self.objective_x,
            yaxis_title=self.objective_y,
            hovermode='closest',
            showlegend=True,
        )

        return fig

init ¶

__init__(objective_x: str, objective_y: str)

Initialize Pareto scatter diagnostic.

Parameters:

Name	Type	Description	Default
`objective_x`	`str`	Name of objective for x-axis.	required
`objective_y`	`str`	Name of objective for y-axis.	required

Source code in energy_repset/diagnostics/results/pareto_scatter.py

def __init__(self, objective_x: str, objective_y: str):
    """Initialize Pareto scatter diagnostic.

    Args:
        objective_x: Name of objective for x-axis.
        objective_y: Name of objective for y-axis.
    """
    self.objective_x = objective_x
    self.objective_y = objective_y

plot ¶

plot(search_algorithm: ObjectiveDrivenCombinatorialSearchAlgorithm, selected_combination: SliceCombination | None = None) -> Figure

Create 2D scatter plot of Pareto front.

Parameters:

Name	Type	Description	Default
`search_algorithm`	`ObjectiveDrivenCombinatorialSearchAlgorithm`	Search algorithm after find_selection() has been called.	required
`selected_combination`	`SliceCombination \| None`	Optional combination to highlight (e.g., result.selection).	`None`

Returns:

Type	Description
`Figure`	Plotly figure with scatter plot.

Raises:

Type	Description
`ValueError`	If find_selection() hasn't been called or objectives not found.

Source code in energy_repset/diagnostics/results/pareto_scatter.py

def plot(
    self,
    search_algorithm: ObjectiveDrivenCombinatorialSearchAlgorithm,
    selected_combination: SliceCombination | None = None,
) -> go.Figure:
    """Create 2D scatter plot of Pareto front.

    Args:
        search_algorithm: Search algorithm after find_selection() has been called.
        selected_combination: Optional combination to highlight (e.g., result.selection).

    Returns:
        Plotly figure with scatter plot.

    Raises:
        ValueError: If find_selection() hasn't been called or objectives not found.
    """
    df = search_algorithm.get_all_scores()

    if self.objective_x not in df.columns:
        raise ValueError(f"Objective '{self.objective_x}' not found in scores")
    if self.objective_y not in df.columns:
        raise ValueError(f"Objective '{self.objective_y}' not found in scores")

    has_pareto = hasattr(search_algorithm.selection_policy, 'pareto_mask')
    pareto_mask = None
    feasible_mask = None

    if has_pareto and search_algorithm.selection_policy.pareto_mask is not None:
        pareto_mask = search_algorithm.selection_policy.pareto_mask
        feasible_mask = search_algorithm.selection_policy.feasible_mask

    fig = go.Figure()

    x_vals = df[self.objective_x]
    y_vals = df[self.objective_y]

    if has_pareto and pareto_mask is not None:
        pareto = pareto_mask.values
        feasible = feasible_mask.values

        infeasible = ~feasible
        if infeasible.any():
            fig.add_trace(go.Scatter(
                x=x_vals[infeasible],
                y=y_vals[infeasible],
                mode='markers',
                marker=dict(size=6, opacity=0.3),
                name='Infeasible',
                hovertemplate=(
                    f'{self.objective_x}: %{{x:.4f}}<br>'
                    f'{self.objective_y}: %{{y:.4f}}<br>'
                    '<extra></extra>'
                ),
            ))

        dominated = feasible & ~pareto
        if dominated.any():
            fig.add_trace(go.Scatter(
                x=x_vals[dominated],
                y=y_vals[dominated],
                mode='markers',
                marker=dict(size=6, opacity=0.5),
                name='Dominated',
                hovertemplate=(
                    f'{self.objective_x}: %{{x:.4f}}<br>'
                    f'{self.objective_y}: %{{y:.4f}}<br>'
                    '<extra></extra>'
                ),
            ))

        pareto_points = pareto & feasible
        if pareto_points.any():
            fig.add_trace(go.Scatter(
                x=x_vals[pareto_points],
                y=y_vals[pareto_points],
                mode='markers',
                marker=dict(size=10, symbol='diamond'),
                name='Pareto Front',
                hovertemplate=(
                    f'{self.objective_x}: %{{x:.4f}}<br>'
                    f'{self.objective_y}: %{{y:.4f}}<br>'
                    '<extra></extra>'
                ),
            ))
    else:
        fig.add_trace(go.Scatter(
            x=x_vals,
            y=y_vals,
            mode='markers',
            marker=dict(size=6, opacity=0.5),
            name='All Combinations',
            hovertemplate=(
                f'{self.objective_x}: %{{x:.4f}}<br>'
                f'{self.objective_y}: %{{y:.4f}}<br>'
                '<extra></extra>'
            ),
        ))

    if selected_combination is not None:
        selected_idx = df['slices'].apply(lambda x: x == selected_combination)
        if selected_idx.any():
            sel_x = x_vals[selected_idx].values[0]
            sel_y = y_vals[selected_idx].values[0]
            fig.add_trace(go.Scatter(
                x=[sel_x],
                y=[sel_y],
                mode='markers',
                marker=dict(
                    size=15,
                    symbol='star',
                    line=dict(width=2, color='black')
                ),
                name='Selected',
                hovertemplate=(
                    f'{self.objective_x}: %{{x:.4f}}<br>'
                    f'{self.objective_y}: %{{y:.4f}}<br>'
                    '<b>SELECTED</b><br>'
                    '<extra></extra>'
                ),
            ))

    fig.update_layout(
        xaxis_title=self.objective_x,
        yaxis_title=self.objective_y,
        hovermode='closest',
        showlegend=True,
    )

    return fig

ParetoScatterMatrix ¶

Scatter matrix of all objectives showing Pareto front.

Creates a scatter plot matrix (SPLOM) showing pairwise relationships between all objectives. Each subplot shows two objectives with Pareto front highlighted.

Parameters:

Name	Type	Description	Default
`objectives`	`list[str] \| None`	List of objective names to include (None = all objectives).	`None`

Examples:

>>> from energy_repset.diagnostics.results import ParetoScatterMatrix
>>> scatter_matrix = ParetoScatterMatrix(
...     objectives=['wasserstein', 'correlation', 'diurnal']
... )
>>> fig = scatter_matrix.plot(
...     search_algorithm=workflow.search_algorithm,
...     selected_combination=result.selection
... )
>>> fig.update_layout(title='Pareto Front: All Objectives')
>>> fig.show()

Source code in energy_repset/diagnostics/results/pareto_scatter.py

class ParetoScatterMatrix:
    """Scatter matrix of all objectives showing Pareto front.

    Creates a scatter plot matrix (SPLOM) showing pairwise relationships between
    all objectives. Each subplot shows two objectives with Pareto front highlighted.

    Args:
        objectives: List of objective names to include (None = all objectives).

    Examples:
        >>> from energy_repset.diagnostics.results import ParetoScatterMatrix
        >>> scatter_matrix = ParetoScatterMatrix(
        ...     objectives=['wasserstein', 'correlation', 'diurnal']
        ... )
        >>> fig = scatter_matrix.plot(
        ...     search_algorithm=workflow.search_algorithm,
        ...     selected_combination=result.selection
        ... )
        >>> fig.update_layout(title='Pareto Front: All Objectives')
        >>> fig.show()
    """

    def __init__(self, objectives: list[str] | None = None):
        """Initialize Pareto scatter matrix diagnostic.

        Args:
            objectives: List of objective names to include (None = all).
        """
        self.objectives = objectives

    def plot(
        self,
        search_algorithm: ObjectiveDrivenCombinatorialSearchAlgorithm,
        selected_combination: SliceCombination | None = None,
    ) -> go.Figure:
        """Create scatter matrix of Pareto front.

        Args:
            search_algorithm: Search algorithm after find_selection() has been called.
            selected_combination: Optional combination to highlight.

        Returns:
            Plotly figure with scatter matrix.

        Raises:
            ValueError: If find_selection() hasn't been called.
        """
        df = search_algorithm.get_all_scores()

        if self.objectives is None:
            obj_cols = [col for col in df.columns if col not in ['slices', 'label']]
        else:
            obj_cols = self.objectives
            for obj in obj_cols:
                if obj not in df.columns:
                    raise ValueError(f"Objective '{obj}' not found in scores")

        if len(obj_cols) < 2:
            raise ValueError("Need at least 2 objectives for scatter matrix")

        has_pareto = hasattr(search_algorithm.selection_policy, 'pareto_mask')
        pareto_mask = None
        feasible_mask = None

        if has_pareto and search_algorithm.selection_policy.pareto_mask is not None:
            pareto_mask = search_algorithm.selection_policy.pareto_mask
            feasible_mask = search_algorithm.selection_policy.feasible_mask

        color_col = None
        if has_pareto and pareto_mask is not None:
            df_plot = df.copy()
            pareto = pareto_mask.values
            feasible = feasible_mask.values
            df_plot['category'] = 'Dominated'
            df_plot.loc[~feasible, 'category'] = 'Infeasible'
            df_plot.loc[pareto & feasible, 'category'] = 'Pareto Front'
            color_col = 'category'
        else:
            df_plot = df.copy()

        dimensions = []
        for obj in obj_cols:
            dimensions.append(dict(
                label=obj,
                values=df_plot[obj]
            ))

        fig = go.Figure(data=go.Splom(
            dimensions=dimensions,
            marker=dict(
                size=5,
                color=df_plot[color_col].map({
                    'Infeasible': 0,
                    'Dominated': 1,
                    'Pareto Front': 2
                }) if color_col else None,
                colorscale=[[0, 'lightgray'], [0.5, 'steelblue'], [1, 'darkorange']] if color_col else None,
                showscale=False,
                line=dict(width=0.5, color='white')
            ),
            text=df_plot['label'] if 'label' in df_plot else None,
            diagonal_visible=False,
            showupperhalf=False,
        ))

        if selected_combination is not None:
            selected_idx = df['slices'].apply(lambda x: x == selected_combination)
            if selected_idx.any():
                selected_vals = [df_plot.loc[selected_idx, obj].values[0] for obj in obj_cols]
                n_dims = len(obj_cols)
                for i in range(n_dims):
                    for j in range(i):
                        xaxis = f'x{j*n_dims + i + 1}' if (j*n_dims + i) > 0 else 'x'
                        yaxis = f'y{j*n_dims + i + 1}' if (j*n_dims + i) > 0 else 'y'

        fig.update_layout(
            title='Scatter Matrix: All Objectives',
            height=150 * len(obj_cols),
            width=150 * len(obj_cols),
            showlegend=False,
        )

        return fig

init ¶

__init__(objectives: list[str] | None = None)

Initialize Pareto scatter matrix diagnostic.

Parameters:

Name	Type	Description	Default
`objectives`	`list[str] \| None`	List of objective names to include (None = all).	`None`

Source code in energy_repset/diagnostics/results/pareto_scatter.py

def __init__(self, objectives: list[str] | None = None):
    """Initialize Pareto scatter matrix diagnostic.

    Args:
        objectives: List of objective names to include (None = all).
    """
    self.objectives = objectives

plot ¶

plot(search_algorithm: ObjectiveDrivenCombinatorialSearchAlgorithm, selected_combination: SliceCombination | None = None) -> Figure

Create scatter matrix of Pareto front.

Parameters:

Name	Type	Description	Default
`search_algorithm`	`ObjectiveDrivenCombinatorialSearchAlgorithm`	Search algorithm after find_selection() has been called.	required
`selected_combination`	`SliceCombination \| None`	Optional combination to highlight.	`None`

Returns:

Type	Description
`Figure`	Plotly figure with scatter matrix.

Raises:

Type	Description
`ValueError`	If find_selection() hasn't been called.

Source code in energy_repset/diagnostics/results/pareto_scatter.py

def plot(
    self,
    search_algorithm: ObjectiveDrivenCombinatorialSearchAlgorithm,
    selected_combination: SliceCombination | None = None,
) -> go.Figure:
    """Create scatter matrix of Pareto front.

    Args:
        search_algorithm: Search algorithm after find_selection() has been called.
        selected_combination: Optional combination to highlight.

    Returns:
        Plotly figure with scatter matrix.

    Raises:
        ValueError: If find_selection() hasn't been called.
    """
    df = search_algorithm.get_all_scores()

    if self.objectives is None:
        obj_cols = [col for col in df.columns if col not in ['slices', 'label']]
    else:
        obj_cols = self.objectives
        for obj in obj_cols:
            if obj not in df.columns:
                raise ValueError(f"Objective '{obj}' not found in scores")

    if len(obj_cols) < 2:
        raise ValueError("Need at least 2 objectives for scatter matrix")

    has_pareto = hasattr(search_algorithm.selection_policy, 'pareto_mask')
    pareto_mask = None
    feasible_mask = None

    if has_pareto and search_algorithm.selection_policy.pareto_mask is not None:
        pareto_mask = search_algorithm.selection_policy.pareto_mask
        feasible_mask = search_algorithm.selection_policy.feasible_mask

    color_col = None
    if has_pareto and pareto_mask is not None:
        df_plot = df.copy()
        pareto = pareto_mask.values
        feasible = feasible_mask.values
        df_plot['category'] = 'Dominated'
        df_plot.loc[~feasible, 'category'] = 'Infeasible'
        df_plot.loc[pareto & feasible, 'category'] = 'Pareto Front'
        color_col = 'category'
    else:
        df_plot = df.copy()

    dimensions = []
    for obj in obj_cols:
        dimensions.append(dict(
            label=obj,
            values=df_plot[obj]
        ))

    fig = go.Figure(data=go.Splom(
        dimensions=dimensions,
        marker=dict(
            size=5,
            color=df_plot[color_col].map({
                'Infeasible': 0,
                'Dominated': 1,
                'Pareto Front': 2
            }) if color_col else None,
            colorscale=[[0, 'lightgray'], [0.5, 'steelblue'], [1, 'darkorange']] if color_col else None,
            showscale=False,
            line=dict(width=0.5, color='white')
        ),
        text=df_plot['label'] if 'label' in df_plot else None,
        diagonal_visible=False,
        showupperhalf=False,
    ))

    if selected_combination is not None:
        selected_idx = df['slices'].apply(lambda x: x == selected_combination)
        if selected_idx.any():
            selected_vals = [df_plot.loc[selected_idx, obj].values[0] for obj in obj_cols]
            n_dims = len(obj_cols)
            for i in range(n_dims):
                for j in range(i):
                    xaxis = f'x{j*n_dims + i + 1}' if (j*n_dims + i) > 0 else 'x'
                    yaxis = f'y{j*n_dims + i + 1}' if (j*n_dims + i) > 0 else 'y'

    fig.update_layout(
        title='Scatter Matrix: All Objectives',
        height=150 * len(obj_cols),
        width=150 * len(obj_cols),
        showlegend=False,
    )

    return fig

ParetoParallelCoordinates ¶

Parallel coordinates plot of Pareto front.

Visualizes multi-objective trade-offs using parallel coordinates where each vertical axis represents one objective. Lines connecting axes show individual solutions, with Pareto-optimal solutions highlighted.

Parameters:

Name	Type	Description	Default
`objectives`	`list[str] \| None`	List of objective names to include (None = all objectives).	`None`

Examples:

>>> from energy_repset.diagnostics.results import ParetoParallelCoordinates
>>> parallel = ParetoParallelCoordinates()
>>> fig = parallel.plot(search_algorithm=workflow.search_algorithm)
>>> fig.update_layout(title='Pareto Front: Parallel Coordinates')
>>> fig.show()

Source code in energy_repset/diagnostics/results/pareto_parallel_coords.py

class ParetoParallelCoordinates:
    """Parallel coordinates plot of Pareto front.

    Visualizes multi-objective trade-offs using parallel coordinates where each
    vertical axis represents one objective. Lines connecting axes show individual
    solutions, with Pareto-optimal solutions highlighted.

    Args:
        objectives: List of objective names to include (None = all objectives).

    Examples:
        >>> from energy_repset.diagnostics.results import ParetoParallelCoordinates
        >>> parallel = ParetoParallelCoordinates()
        >>> fig = parallel.plot(search_algorithm=workflow.search_algorithm)
        >>> fig.update_layout(title='Pareto Front: Parallel Coordinates')
        >>> fig.show()
    """

    def __init__(self, objectives: list[str] | None = None):
        """Initialize parallel coordinates diagnostic.

        Args:
            objectives: List of objective names to include (None = all).
        """
        self.objectives = objectives

    def plot(
        self,
        search_algorithm: ObjectiveDrivenCombinatorialSearchAlgorithm,
    ) -> go.Figure:
        """Create parallel coordinates plot of Pareto front.

        Args:
            search_algorithm: Search algorithm after find_selection() has been called.

        Returns:
            Plotly figure with parallel coordinates plot.

        Raises:
            ValueError: If find_selection() hasn't been called.
        """
        df = search_algorithm.get_all_scores()

        if self.objectives is None:
            obj_cols = [col for col in df.columns if col not in ['slices', 'label']]
        else:
            obj_cols = self.objectives
            for obj in obj_cols:
                if obj not in df.columns:
                    raise ValueError(f"Objective '{obj}' not found in scores")

        if len(obj_cols) < 2:
            raise ValueError("Need at least 2 objectives for parallel coordinates")

        has_pareto = hasattr(search_algorithm.selection_policy, 'pareto_mask')
        pareto_mask = None
        feasible_mask = None

        if has_pareto and search_algorithm.selection_policy.pareto_mask is not None:
            pareto_mask = search_algorithm.selection_policy.pareto_mask
            feasible_mask = search_algorithm.selection_policy.feasible_mask

        dimensions = []
        for obj in obj_cols:
            dimensions.append(dict(
                label=obj,
                values=df[obj]
            ))

        if has_pareto and pareto_mask is not None:
            pareto = pareto_mask.values
            feasible = feasible_mask.values

            color_values = []
            for i in range(len(df)):
                if not feasible[i]:
                    color_values.append(0)
                elif pareto[i]:
                    color_values.append(2)
                else:
                    color_values.append(1)

            fig = go.Figure(data=go.Parcoords(
                dimensions=dimensions,
                line=dict(
                    color=color_values,
                    colorscale=[
                        [0, 'lightgray'],
                        [0.5, 'steelblue'],
                        [1, 'darkorange']
                    ],
                    showscale=True,
                    cmin=0,
                    cmax=2,
                    colorbar=dict(
                        title='Status',
                        tickvals=[0, 1, 2],
                        ticktext=['Infeasible', 'Dominated', 'Pareto'],
                    )
                )
            ))
        else:
            fig = go.Figure(data=go.Parcoords(
                dimensions=dimensions,
                line=dict(
                    color='steelblue',
                    showscale=False,
                )
            ))

        fig.update_layout(
            title='Parallel Coordinates: All Objectives',
            height=500,
        )

        return fig

init ¶

__init__(objectives: list[str] | None = None)

Initialize parallel coordinates diagnostic.

Parameters:

Name	Type	Description	Default
`objectives`	`list[str] \| None`	List of objective names to include (None = all).	`None`

Source code in energy_repset/diagnostics/results/pareto_parallel_coords.py

def __init__(self, objectives: list[str] | None = None):
    """Initialize parallel coordinates diagnostic.

    Args:
        objectives: List of objective names to include (None = all).
    """
    self.objectives = objectives

plot ¶

plot(search_algorithm: ObjectiveDrivenCombinatorialSearchAlgorithm) -> Figure

Create parallel coordinates plot of Pareto front.

Parameters:

Name	Type	Description	Default
`search_algorithm`	`ObjectiveDrivenCombinatorialSearchAlgorithm`	Search algorithm after find_selection() has been called.	required

Returns:

Type	Description
`Figure`	Plotly figure with parallel coordinates plot.

Raises:

Type	Description
`ValueError`	If find_selection() hasn't been called.

Source code in energy_repset/diagnostics/results/pareto_parallel_coords.py

def plot(
    self,
    search_algorithm: ObjectiveDrivenCombinatorialSearchAlgorithm,
) -> go.Figure:
    """Create parallel coordinates plot of Pareto front.

    Args:
        search_algorithm: Search algorithm after find_selection() has been called.

    Returns:
        Plotly figure with parallel coordinates plot.

    Raises:
        ValueError: If find_selection() hasn't been called.
    """
    df = search_algorithm.get_all_scores()

    if self.objectives is None:
        obj_cols = [col for col in df.columns if col not in ['slices', 'label']]
    else:
        obj_cols = self.objectives
        for obj in obj_cols:
            if obj not in df.columns:
                raise ValueError(f"Objective '{obj}' not found in scores")

    if len(obj_cols) < 2:
        raise ValueError("Need at least 2 objectives for parallel coordinates")

    has_pareto = hasattr(search_algorithm.selection_policy, 'pareto_mask')
    pareto_mask = None
    feasible_mask = None

    if has_pareto and search_algorithm.selection_policy.pareto_mask is not None:
        pareto_mask = search_algorithm.selection_policy.pareto_mask
        feasible_mask = search_algorithm.selection_policy.feasible_mask

    dimensions = []
    for obj in obj_cols:
        dimensions.append(dict(
            label=obj,
            values=df[obj]
        ))

    if has_pareto and pareto_mask is not None:
        pareto = pareto_mask.values
        feasible = feasible_mask.values

        color_values = []
        for i in range(len(df)):
            if not feasible[i]:
                color_values.append(0)
            elif pareto[i]:
                color_values.append(2)
            else:
                color_values.append(1)

        fig = go.Figure(data=go.Parcoords(
            dimensions=dimensions,
            line=dict(
                color=color_values,
                colorscale=[
                    [0, 'lightgray'],
                    [0.5, 'steelblue'],
                    [1, 'darkorange']
                ],
                showscale=True,
                cmin=0,
                cmax=2,
                colorbar=dict(
                    title='Status',
                    tickvals=[0, 1, 2],
                    ticktext=['Infeasible', 'Dominated', 'Pareto'],
                )
            )
        ))
    else:
        fig = go.Figure(data=go.Parcoords(
            dimensions=dimensions,
            line=dict(
                color='steelblue',
                showscale=False,
            )
        ))

    fig.update_layout(
        title='Parallel Coordinates: All Objectives',
        height=500,
    )

    return fig

ScoreContributionBars ¶

Bar chart showing final scores from each objective component.

Visualizes the contribution of each score component to understand which objectives were most influential in the final selection. Can display absolute scores or normalized as fractions of total.

Examples:

>>> from energy_repset.diagnostics.results import ScoreContributionBars
>>> contrib = ScoreContributionBars()
>>> fig = contrib.plot(result.scores, normalize=True)
>>> fig.update_layout(title='Score Component Contributions')
>>> fig.show()

Source code in energy_repset/diagnostics/results/score_contribution_bars.py

class ScoreContributionBars:
    """Bar chart showing final scores from each objective component.

    Visualizes the contribution of each score component to understand which
    objectives were most influential in the final selection. Can display
    absolute scores or normalized as fractions of total.

    Examples:
        >>> from energy_repset.diagnostics.results import ScoreContributionBars
        >>> contrib = ScoreContributionBars()
        >>> fig = contrib.plot(result.scores, normalize=True)
        >>> fig.update_layout(title='Score Component Contributions')
        >>> fig.show()
    """

    def plot(
        self,
        scores: Dict[str, float],
        normalize: bool = False
    ) -> go.Figure:
        """Create bar chart of score component contributions.

        Args:
            scores: Dictionary of scores from each component (from result.scores).
            normalize: If True, show as fractions of total score.

        Returns:
            Plotly figure with bar chart.
        """
        if not scores:
            raise ValueError("Scores dictionary is empty")

        component_names = list(scores.keys())
        score_values = list(scores.values())

        if normalize:
            total = sum(score_values)
            if total == 0:
                raise ValueError("Cannot normalize: total score is zero")
            score_values = [v / total for v in score_values]
            y_title = 'Normalized Score (fraction)'
        else:
            y_title = 'Score Value'

        fig = go.Figure(data=[
            go.Bar(
                x=component_names,
                y=score_values,
                text=[f'{v:.4f}' for v in score_values],
                textposition='auto',
            )
        ])

        fig.update_layout(
            xaxis_title='Score Component',
            yaxis_title=y_title,
            showlegend=False,
            hovermode='x',
        )

        return fig

plot ¶

plot(scores: dict[str, float], normalize: bool = False) -> Figure

Create bar chart of score component contributions.

Parameters:

Name	Type	Description	Default
`scores`	`dict[str, float]`	Dictionary of scores from each component (from result.scores).	required
`normalize`	`bool`	If True, show as fractions of total score.	`False`

Returns:

Type	Description
`Figure`	Plotly figure with bar chart.

Source code in energy_repset/diagnostics/results/score_contribution_bars.py

def plot(
    self,
    scores: Dict[str, float],
    normalize: bool = False
) -> go.Figure:
    """Create bar chart of score component contributions.

    Args:
        scores: Dictionary of scores from each component (from result.scores).
        normalize: If True, show as fractions of total score.

    Returns:
        Plotly figure with bar chart.
    """
    if not scores:
        raise ValueError("Scores dictionary is empty")

    component_names = list(scores.keys())
    score_values = list(scores.values())

    if normalize:
        total = sum(score_values)
        if total == 0:
            raise ValueError("Cannot normalize: total score is zero")
        score_values = [v / total for v in score_values]
        y_title = 'Normalized Score (fraction)'
    else:
        y_title = 'Score Value'

    fig = go.Figure(data=[
        go.Bar(
            x=component_names,
            y=score_values,
            text=[f'{v:.4f}' for v in score_values],
            textposition='auto',
        )
    ])

    fig.update_layout(
        xaxis_title='Score Component',
        yaxis_title=y_title,
        showlegend=False,
        hovermode='x',
    )

    return fig

Diagnostics¶

Feature Space¶

FeatureSpaceScatter2D ¶

__init__ ¶

plot ¶

FeatureSpaceScatter3D ¶

__init__ ¶

plot ¶

FeatureSpaceScatterMatrix ¶

__init__ ¶

plot ¶

PCAVarianceExplained ¶

__init__ ¶

plot ¶

FeatureCorrelationHeatmap ¶

__init__ ¶

plot ¶

FeatureDistributions ¶

__init__ ¶

plot ¶

Score Components¶

DistributionOverlayECDF ¶

__init__ ¶

plot ¶

DistributionOverlayHistogram ¶

__init__ ¶

plot ¶

CorrelationDifferenceHeatmap ¶

__init__ ¶

plot ¶

DiurnalProfileOverlay ¶

__init__ ¶

plot ¶

Results¶

ResponsibilityBars ¶

__init__ ¶

plot ¶

ParetoScatter2D ¶

__init__ ¶

plot ¶

ParetoScatterMatrix ¶

__init__ ¶

plot ¶

ParetoParallelCoordinates ¶

__init__ ¶

plot ¶

ScoreContributionBars ¶

plot ¶

init ¶

init ¶

init ¶

init ¶

init ¶

init ¶

init ¶

init ¶

init ¶

init ¶

init ¶

init ¶

init ¶

init ¶