Skip to content

Representation Models

RepresentationModel

Bases: ABC

Base class for representation models (Pillar R).

Defines how selected representative periods represent the full dataset by calculating responsibility weights. The model is first fitted to learn about the entire dataset, then the weigh() method calculates weights for specific selections.

Different models implement different weighting strategies: - Uniform: Equal weights (e.g., 365/k for yearly data) - Cluster-based: Weights proportional to cluster sizes - Blended: Soft assignment where each period is a weighted mix of representatives

Examples:

>>> class UniformWeights(RepresentationModel):
...     def fit(self, context: ProblemContext):
...         self.n_total = len(context.slicer.slices)
...
...     def weigh(self, combination: SliceCombination) -> Dict[Hashable, float]:
...         weight = self.n_total / len(combination)
...         return {slice_id: weight for slice_id in combination}
...
>>> model = UniformWeights()
>>> model.fit(context)
>>> weights = model.weigh((0, 3, 6, 9))
>>> print(weights)  # {0: 91.25, 3: 91.25, 6: 91.25, 9: 91.25} for 365 days, k=4
>>> class ClusterSizeWeights(RepresentationModel):
...     def fit(self, context: ProblemContext):
...         from sklearn.cluster import KMeans
...         self.kmeans = KMeans(n_clusters=4)
...         self.kmeans.fit(context.df_features)
...
...     def weigh(self, combination: SliceCombination) -> Dict[Hashable, float]:
...         labels = self.kmeans.labels_
...         weights = {}
...         for i, slice_id in enumerate(combination):
...             cluster_size = (labels == i).sum()
...             weights[slice_id] = cluster_size
...         return weights
...
>>> model = ClusterSizeWeights()
>>> model.fit(context)
>>> weights = model.weigh((0, 3, 6, 9))
>>> print(weights)  # Weights proportional to cluster membership
Source code in energy_repset/representation/representation.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
class RepresentationModel(ABC):
    """Base class for representation models (Pillar R).

    Defines how selected representative periods represent the full dataset by
    calculating responsibility weights. The model is first fitted to learn about
    the entire dataset, then the weigh() method calculates weights for specific
    selections.

    Different models implement different weighting strategies:
    - Uniform: Equal weights (e.g., 365/k for yearly data)
    - Cluster-based: Weights proportional to cluster sizes
    - Blended: Soft assignment where each period is a weighted mix of representatives

    Examples:
        >>> class UniformWeights(RepresentationModel):
        ...     def fit(self, context: ProblemContext):
        ...         self.n_total = len(context.slicer.slices)
        ...
        ...     def weigh(self, combination: SliceCombination) -> Dict[Hashable, float]:
        ...         weight = self.n_total / len(combination)
        ...         return {slice_id: weight for slice_id in combination}
        ...
        >>> model = UniformWeights()
        >>> model.fit(context)
        >>> weights = model.weigh((0, 3, 6, 9))
        >>> print(weights)  # {0: 91.25, 3: 91.25, 6: 91.25, 9: 91.25} for 365 days, k=4

        >>> class ClusterSizeWeights(RepresentationModel):
        ...     def fit(self, context: ProblemContext):
        ...         from sklearn.cluster import KMeans
        ...         self.kmeans = KMeans(n_clusters=4)
        ...         self.kmeans.fit(context.df_features)
        ...
        ...     def weigh(self, combination: SliceCombination) -> Dict[Hashable, float]:
        ...         labels = self.kmeans.labels_
        ...         weights = {}
        ...         for i, slice_id in enumerate(combination):
        ...             cluster_size = (labels == i).sum()
        ...             weights[slice_id] = cluster_size
        ...         return weights
        ...
        >>> model = ClusterSizeWeights()
        >>> model.fit(context)
        >>> weights = model.weigh((0, 3, 6, 9))
        >>> print(weights)  # Weights proportional to cluster membership
    """

    @abstractmethod
    def fit(self, context: 'ProblemContext'):
        """Fit the representation model to the full dataset.

        This method performs any necessary pre-computation based on the full set
        of candidate slices (e.g., storing the feature matrix, fitting clustering
        models, computing distance matrices).

        Args:
            context: The problem context with df_features populated. Feature
                engineering must be run before calling this method.
        """
        ...

    @abstractmethod
    def weigh(
        self,
        combination: SliceCombination
    ) -> Union[Dict[Hashable, float], pd.DataFrame]:
        """Calculate representation weights for a given selection.

        This method should only be called after the model has been fitted.

        Args:
            combination: Tuple of selected slice identifiers for which to
                calculate representation weights.

        Returns:
            The calculated weights, either as a dictionary mapping each selected
            slice to its weight, or as a DataFrame for more complex weight
            structures (e.g., blended models where each original period has
            weights across multiple representatives).
        """
        ...

fit abstractmethod

fit(context: 'ProblemContext')

Fit the representation model to the full dataset.

This method performs any necessary pre-computation based on the full set of candidate slices (e.g., storing the feature matrix, fitting clustering models, computing distance matrices).

Parameters:

Name Type Description Default
context 'ProblemContext'

The problem context with df_features populated. Feature engineering must be run before calling this method.

required
Source code in energy_repset/representation/representation.py
60
61
62
63
64
65
66
67
68
69
70
71
72
@abstractmethod
def fit(self, context: 'ProblemContext'):
    """Fit the representation model to the full dataset.

    This method performs any necessary pre-computation based on the full set
    of candidate slices (e.g., storing the feature matrix, fitting clustering
    models, computing distance matrices).

    Args:
        context: The problem context with df_features populated. Feature
            engineering must be run before calling this method.
    """
    ...

weigh abstractmethod

weigh(combination: SliceCombination) -> dict[Hashable, float] | DataFrame

Calculate representation weights for a given selection.

This method should only be called after the model has been fitted.

Parameters:

Name Type Description Default
combination SliceCombination

Tuple of selected slice identifiers for which to calculate representation weights.

required

Returns:

Type Description
dict[Hashable, float] | DataFrame

The calculated weights, either as a dictionary mapping each selected

dict[Hashable, float] | DataFrame

slice to its weight, or as a DataFrame for more complex weight

dict[Hashable, float] | DataFrame

structures (e.g., blended models where each original period has

dict[Hashable, float] | DataFrame

weights across multiple representatives).

Source code in energy_repset/representation/representation.py
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
@abstractmethod
def weigh(
    self,
    combination: SliceCombination
) -> Union[Dict[Hashable, float], pd.DataFrame]:
    """Calculate representation weights for a given selection.

    This method should only be called after the model has been fitted.

    Args:
        combination: Tuple of selected slice identifiers for which to
            calculate representation weights.

    Returns:
        The calculated weights, either as a dictionary mapping each selected
        slice to its weight, or as a DataFrame for more complex weight
        structures (e.g., blended models where each original period has
        weights across multiple representatives).
    """
    ...

UniformRepresentationModel

Bases: RepresentationModel

Assigns equal weights to all selected representatives.

The simplest representation model where each selected period gets weight 1/k. This is appropriate when you want each representative to contribute equally to downstream modeling, regardless of how many original periods it represents.

Examples:

>>> model = UniformRepresentationModel()
>>> model.fit(context)
>>> weights = model.weigh((0, 3, 6, 9))
>>> print(weights)
    {0: 0.25, 3: 0.25, 6: 0.25, 9: 0.25}

>>> # For yearly data with k=4 months, each month represents ~91 days
>>> # Weights sum to 1.0 for normalized analysis
Source code in energy_repset/representation/uniform.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
class UniformRepresentationModel(RepresentationModel):
    """Assigns equal weights to all selected representatives.

    The simplest representation model where each selected period gets weight
    1/k. This is appropriate when you want each representative to contribute
    equally to downstream modeling, regardless of how many original periods
    it represents.

    Examples:

        >>> model = UniformRepresentationModel()
        >>> model.fit(context)
        >>> weights = model.weigh((0, 3, 6, 9))
        >>> print(weights)
            {0: 0.25, 3: 0.25, 6: 0.25, 9: 0.25}

        >>> # For yearly data with k=4 months, each month represents ~91 days
        >>> # Weights sum to 1.0 for normalized analysis
    """

    def fit(self, context: ProblemContext):
        """No fitting required for uniform weighting.

        Args:
            context: Problem context (unused but required by protocol).
        """
        pass

    def weigh(self, combination: SliceCombination) -> Dict[Hashable, float]:
        """Calculate uniform weights (1/k for each selected period).

        Args:
            combination: Tuple of selected slice identifiers.

        Returns:
            Dictionary mapping each slice ID to its weight (1/k).
        """
        if not combination:
            return {}
        weight = 1.0 / len(combination)
        return {label: weight for label in combination}

fit

fit(context: ProblemContext)

No fitting required for uniform weighting.

Parameters:

Name Type Description Default
context ProblemContext

Problem context (unused but required by protocol).

required
Source code in energy_repset/representation/uniform.py
32
33
34
35
36
37
38
def fit(self, context: ProblemContext):
    """No fitting required for uniform weighting.

    Args:
        context: Problem context (unused but required by protocol).
    """
    pass

weigh

weigh(combination: SliceCombination) -> dict[Hashable, float]

Calculate uniform weights (1/k for each selected period).

Parameters:

Name Type Description Default
combination SliceCombination

Tuple of selected slice identifiers.

required

Returns:

Type Description
dict[Hashable, float]

Dictionary mapping each slice ID to its weight (1/k).

Source code in energy_repset/representation/uniform.py
40
41
42
43
44
45
46
47
48
49
50
51
52
def weigh(self, combination: SliceCombination) -> Dict[Hashable, float]:
    """Calculate uniform weights (1/k for each selected period).

    Args:
        combination: Tuple of selected slice identifiers.

    Returns:
        Dictionary mapping each slice ID to its weight (1/k).
    """
    if not combination:
        return {}
    weight = 1.0 / len(combination)
    return {label: weight for label in combination}

KMedoidsClustersizeRepresentation

Bases: RepresentationModel

Assigns weights based on k-medoids cluster sizes (hard assignment).

This representation model performs virtual k-medoids clustering where the selected periods are enforced as medoids (cluster centers). Each candidate period is assigned to its nearest medoid, and weights are calculated as the proportion of periods assigned to each medoid.

The weights reflect how many original periods each representative is responsible for, making this appropriate when representatives should be weighted by their "sphere of influence" in feature space.

Attributes:

Name Type Description
all_features_

Feature matrix for all candidate periods (set during fit).

all_slice_labels_

Labels for all candidate periods (set during fit).

Examples:

>>> model = KMedoidsClustersizeRepresentation()
>>> model.fit(context)  # context has 12 monthly candidates
>>> weights = model.weigh((Period('2024-01', 'M'), Period('2024-06', 'M')))
>>> print(weights)
    {Period('2024-01', 'M'): 0.583, Period('2024-06', 'M'): 0.417}
>>> # Jan represents 7 months, Jun represents 5 months
Source code in energy_repset/representation/k_medoids_clustersize.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
class KMedoidsClustersizeRepresentation(RepresentationModel):
    """Assigns weights based on k-medoids cluster sizes (hard assignment).

    This representation model performs virtual k-medoids clustering where the
    selected periods are enforced as medoids (cluster centers). Each candidate
    period is assigned to its nearest medoid, and weights are calculated as
    the proportion of periods assigned to each medoid.

    The weights reflect how many original periods each representative is
    responsible for, making this appropriate when representatives should be
    weighted by their "sphere of influence" in feature space.

    Attributes:
        all_features_: Feature matrix for all candidate periods (set during fit).
        all_slice_labels_: Labels for all candidate periods (set during fit).

    Examples:

        >>> model = KMedoidsClustersizeRepresentation()
        >>> model.fit(context)  # context has 12 monthly candidates
        >>> weights = model.weigh((Period('2024-01', 'M'), Period('2024-06', 'M')))
        >>> print(weights)
            {Period('2024-01', 'M'): 0.583, Period('2024-06', 'M'): 0.417}
        >>> # Jan represents 7 months, Jun represents 5 months
    """

    def fit(self, context: ProblemContext):
        """Store the full feature matrix for later clustering.

        Args:
            context: Problem context containing df_features and candidates.
        """
        self.all_features_ = context.df_features
        self.all_slice_labels_ = context.slicer.unique_slices(context.df_raw.index)

    def weigh(self, combination: SliceCombination) -> Dict[Hashable, float]:
        """Calculate weights based on cluster sizes from hard assignment.

        Performs virtual k-medoids clustering where:
        1. Selected periods are enforced as medoids
        2. Each candidate is assigned to its nearest medoid (Euclidean distance)
        3. Weight = (cluster size) / (total candidates)

        Args:
            combination: Tuple of selected slice identifiers.

        Returns:
            Dictionary mapping each slice ID to its weight (proportion of
            candidates assigned to it).

        Raises:
            ValueError: If combination contains slices not in the feature matrix.
        """
        if not combination:
            return {}

        # Extract feature vectors for selected medoids
        medoid_indices = []
        for slice_label in combination:
            if slice_label not in self.all_slice_labels_:
                raise ValueError(f"Slice {slice_label} not found in candidates")
            medoid_indices.append(self.all_slice_labels_.index(slice_label))

        medoid_features = self.all_features_.iloc[medoid_indices].values
        all_features = self.all_features_.values

        # Compute pairwise distances: shape (n_candidates, k_medoids)
        distances = cdist(all_features, medoid_features, metric='euclidean')

        # Hard assignment: each candidate assigned to nearest medoid
        assignments = np.argmin(distances, axis=1)

        # Count cluster sizes
        cluster_sizes = np.bincount(assignments, minlength=len(combination))

        # Calculate weights as proportions
        total_candidates = len(self.all_slice_labels_)
        weights = {
            slice_label: float(cluster_sizes[i]) / total_candidates
            for i, slice_label in enumerate(combination)
        }

        return weights

fit

fit(context: ProblemContext)

Store the full feature matrix for later clustering.

Parameters:

Name Type Description Default
context ProblemContext

Problem context containing df_features and candidates.

required
Source code in energy_repset/representation/k_medoids_clustersize.py
41
42
43
44
45
46
47
48
def fit(self, context: ProblemContext):
    """Store the full feature matrix for later clustering.

    Args:
        context: Problem context containing df_features and candidates.
    """
    self.all_features_ = context.df_features
    self.all_slice_labels_ = context.slicer.unique_slices(context.df_raw.index)

weigh

weigh(combination: SliceCombination) -> dict[Hashable, float]

Calculate weights based on cluster sizes from hard assignment.

Performs virtual k-medoids clustering where: 1. Selected periods are enforced as medoids 2. Each candidate is assigned to its nearest medoid (Euclidean distance) 3. Weight = (cluster size) / (total candidates)

Parameters:

Name Type Description Default
combination SliceCombination

Tuple of selected slice identifiers.

required

Returns:

Type Description
dict[Hashable, float]

Dictionary mapping each slice ID to its weight (proportion of

dict[Hashable, float]

candidates assigned to it).

Raises:

Type Description
ValueError

If combination contains slices not in the feature matrix.

Source code in energy_repset/representation/k_medoids_clustersize.py
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
def weigh(self, combination: SliceCombination) -> Dict[Hashable, float]:
    """Calculate weights based on cluster sizes from hard assignment.

    Performs virtual k-medoids clustering where:
    1. Selected periods are enforced as medoids
    2. Each candidate is assigned to its nearest medoid (Euclidean distance)
    3. Weight = (cluster size) / (total candidates)

    Args:
        combination: Tuple of selected slice identifiers.

    Returns:
        Dictionary mapping each slice ID to its weight (proportion of
        candidates assigned to it).

    Raises:
        ValueError: If combination contains slices not in the feature matrix.
    """
    if not combination:
        return {}

    # Extract feature vectors for selected medoids
    medoid_indices = []
    for slice_label in combination:
        if slice_label not in self.all_slice_labels_:
            raise ValueError(f"Slice {slice_label} not found in candidates")
        medoid_indices.append(self.all_slice_labels_.index(slice_label))

    medoid_features = self.all_features_.iloc[medoid_indices].values
    all_features = self.all_features_.values

    # Compute pairwise distances: shape (n_candidates, k_medoids)
    distances = cdist(all_features, medoid_features, metric='euclidean')

    # Hard assignment: each candidate assigned to nearest medoid
    assignments = np.argmin(distances, axis=1)

    # Count cluster sizes
    cluster_sizes = np.bincount(assignments, minlength=len(combination))

    # Calculate weights as proportions
    total_candidates = len(self.all_slice_labels_)
    weights = {
        slice_label: float(cluster_sizes[i]) / total_candidates
        for i, slice_label in enumerate(combination)
    }

    return weights

BlendedRepresentationModel

Bases: RepresentationModel

Assigns weights using a blended representation (R_soft).

Each original slice in the full dataset is represented as a unique weighted combination of all the selected representatives. This is found by solving a small optimization problem for each original slice.

The output is a DataFrame where rows are the original slice labels, columns are the selected representative labels, and values are the weights.

Source code in energy_repset/representation/blended.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
class BlendedRepresentationModel(RepresentationModel):
    """
    Assigns weights using a blended representation (R_soft).

    Each original slice in the full dataset is represented as a unique
    weighted combination of all the selected representatives. This is found by
    solving a small optimization problem for each original slice.

    The output is a DataFrame where rows are the original slice labels,
    columns are the selected representative labels, and values are the weights.
    """

    def __init__(self, blend_type: str = 'convex'):
        """
        Parameters
        ----------
        blend_type : str, optional
            The type of blend to perform. 'convex' is the most common,
            ensuring weights are non-negative and sum to 1.
            (default is 'convex')
        """
        if blend_type != 'convex':
            raise NotImplementedError("Only 'convex' blend type is currently supported.")
        self.blend_type = blend_type

    def fit(self, context: 'ProblemContext'):
        """Stores the full feature matrix for later use."""
        self.all_features_ = context.df_features

    def weigh(self, combination: SliceCombination) -> pd.DataFrame:
        if not combination:
            return pd.DataFrame()

        all_features = self.all_features_
        rep_features = all_features.loc[list(combination)]

        weight_results = {}

        # 2. Loop through every original slice in the full dataset.
        for original_label, original_vec in all_features.iterrows():
            # 3. For each one, solve an optimization problem to find the best blend.
            # Objective: minimize || original_vec - sum(weights * rep_vecs) ||^2
            def objective_func(weights):
                blended_vec = np.dot(weights, rep_features.values)
                return np.sum((original_vec.values - blended_vec) ** 2)

            # Initial guess: uniform weights
            initial_weights = np.ones(len(combination)) / len(combination)

            # Constraints and bounds for a convex blend
            constraints = ({'type': 'eq', 'fun': lambda w: np.sum(w) - 1.0})
            bounds = [(0, 1) for _ in range(len(combination))]

            # 4. Solve for the optimal weights for this specific original_slice.
            result = minimize(
                objective_func,
                initial_weights,
                method='SLSQP',
                bounds=bounds,
                constraints=constraints
            )

            weight_results[original_label] = result.x

        # 5. Assemble the results into a final DataFrame and return.
        blended_weights_df = pd.DataFrame.from_dict(
            weight_results,
            orient='index',
            columns=combination
        )

        return blended_weights_df

__init__

__init__(blend_type: str = 'convex')
Parameters

blend_type : str, optional The type of blend to perform. 'convex' is the most common, ensuring weights are non-negative and sum to 1. (default is 'convex')

Source code in energy_repset/representation/blended.py
24
25
26
27
28
29
30
31
32
33
34
35
def __init__(self, blend_type: str = 'convex'):
    """
    Parameters
    ----------
    blend_type : str, optional
        The type of blend to perform. 'convex' is the most common,
        ensuring weights are non-negative and sum to 1.
        (default is 'convex')
    """
    if blend_type != 'convex':
        raise NotImplementedError("Only 'convex' blend type is currently supported.")
    self.blend_type = blend_type

fit

fit(context: 'ProblemContext')

Stores the full feature matrix for later use.

Source code in energy_repset/representation/blended.py
37
38
39
def fit(self, context: 'ProblemContext'):
    """Stores the full feature matrix for later use."""
    self.all_features_ = context.df_features