"""Instance selection strategies for benchmark execution.
This module provides functions for selecting test instances to use
in benchmark runs. Strategies range from simple random sampling to
confidence-based stratified sampling.
"""
from __future__ import annotations
from typing import TYPE_CHECKING, Literal
import warnings
import numpy as np
from tscf_eval.counterfactuals.utils import soft_predict_proba_fn
if TYPE_CHECKING:
from .config import DatasetConfig, ModelConfig
__all__ = [
"N_CONFIDENCE_BINS",
"SelectionStrategy",
"compute_confidence_bins",
"select_instances",
]
SelectionStrategy = Literal["random", "stratified_confidence"]
"""Supported instance selection strategies."""
N_CONFIDENCE_BINS = 4
"""Number of quantile-based confidence bins for stratified selection."""
[docs]
def select_instances(
dataset: DatasetConfig,
model: ModelConfig,
n_instances: int | None,
strategy: SelectionStrategy,
random_state: int | None,
) -> tuple[np.ndarray, np.ndarray | None, np.ndarray | None]:
"""Select test instances according to the given strategy.
Parameters
----------
dataset : DatasetConfig
Dataset containing test instances.
model : ModelConfig
Fitted model (used for confidence-based strategies).
n_instances : int or None
Number of instances to select. None means use all.
strategy : {"random", "stratified_confidence"}
Instance selection strategy.
random_state : int or None
Random seed for reproducibility.
Returns
-------
X_test : np.ndarray
Selected test instances.
y_test : np.ndarray or None
Corresponding labels, or None if not available.
bin_indices : np.ndarray or None
Confidence bin assignment for each selected instance (computed
over the full test set), or ``None`` when stratified binning
was not performed (e.g. random strategy, no ``predict_proba``,
or no subsampling).
"""
X_test = dataset.X_test
y_test = dataset.y_test
# No subsampling needed
if n_instances is None or n_instances >= len(X_test):
return X_test, y_test, None
if strategy == "random":
indices = _select_random(len(X_test), n_instances, random_state)
bin_indices = None
elif strategy == "stratified_confidence":
indices, bin_indices = _select_stratified_confidence(
X_test,
model,
n_instances,
random_state,
)
else:
raise ValueError(
f"Unknown selection strategy: {strategy!r}. "
f"Expected 'random' or 'stratified_confidence'."
)
X_test = X_test[indices]
if y_test is not None:
y_test = y_test[indices]
return X_test, y_test, bin_indices
def _select_random(
n_total: int,
n_instances: int,
random_state: int | None,
) -> np.ndarray:
"""Select instances uniformly at random without replacement.
Parameters
----------
n_total : int
Total number of available instances.
n_instances : int
Number of instances to select.
random_state : int or None
Random seed for reproducibility.
Returns
-------
np.ndarray
Array of selected indices.
"""
rng = np.random.default_rng(random_state)
return rng.choice(n_total, size=n_instances, replace=False)
def _select_stratified_confidence(
X_test: np.ndarray,
model: ModelConfig,
n_instances: int,
random_state: int | None,
) -> tuple[np.ndarray, np.ndarray | None]:
"""Select instances stratified by model confidence.
Computes model confidence (max predicted probability) for each
test instance, divides instances into quantile-based bins
(25th, 50th, 75th percentiles), and samples from each bin.
This ensures the benchmark covers instances the model is both
very confident and very uncertain about.
Falls back to random selection if the model does not support
``predict_proba`` or if ``n_instances`` is too small.
Parameters
----------
X_test : np.ndarray
Test instances.
model : ModelConfig
Fitted model with optional ``predict_proba`` support.
n_instances : int
Total number of instances to select.
random_state : int or None
Random seed for reproducibility.
Returns
-------
indices : np.ndarray
Array of selected indices.
bin_indices : np.ndarray or None
Confidence bin assignment for each selected instance (computed
over the full test set), or ``None`` when falling back to
random selection.
"""
# Guard: need at least one instance per bin for stratification
if n_instances < N_CONFIDENCE_BINS:
warnings.warn(
f"n_instances={n_instances} is less than the number of "
f"confidence bins ({N_CONFIDENCE_BINS}). "
f"Falling back to random selection.",
UserWarning,
stacklevel=3,
)
return _select_random(len(X_test), n_instances, random_state), None
# Get soft probabilities for confidence estimation.
# Some classifiers (ROCKET, RDST) use RidgeClassifierCV internally and
# return hard 0/1 from predict_proba, making all confidences identical.
# soft_predict_proba_fn converts decision_function outputs to smooth
# probabilities via sigmoid/softmax, giving meaningful confidence spread.
# For classifiers with native smooth predict_proba (LR, MLP, deep
# learning) it falls through to predict_proba unchanged.
confidence = _get_soft_confidence(X_test, model)
if confidence is None:
warnings.warn(
f"Model '{model.name}' does not support predict_proba. "
f"Falling back to random instance selection.",
UserWarning,
stacklevel=3,
)
return _select_random(len(X_test), n_instances, random_state), None
# Create quantile-based bin edges (4 bins: 0-25%, 25-50%, 50-75%, 75-100%)
quantiles = np.linspace(0, 1, N_CONFIDENCE_BINS + 1)
bin_edges = np.quantile(confidence, quantiles)
# Assign each instance to a bin (over full test set)
# np.digitize with bin_edges[1:-1] maps to bins 0..N_CONFIDENCE_BINS-1
bin_indices_full = np.digitize(confidence, bin_edges[1:-1], right=True)
# Collect indices per bin
bins: list[np.ndarray] = [np.where(bin_indices_full == b)[0] for b in range(N_CONFIDENCE_BINS)]
# Compute per-bin allocation
base_per_bin = n_instances // N_CONFIDENCE_BINS
remainder = n_instances % N_CONFIDENCE_BINS
allocations = [base_per_bin] * N_CONFIDENCE_BINS
for i in range(remainder):
allocations[i] += 1
# Sample from each bin; redistribute deficit if a bin is too small
rng = np.random.default_rng(random_state)
selected: list[np.ndarray] = []
deficit = 0
for b in range(N_CONFIDENCE_BINS):
available = len(bins[b])
desired = allocations[b] + deficit
deficit = 0
if available == 0:
deficit += desired
continue
take = min(desired, available)
chosen = rng.choice(bins[b], size=take, replace=False)
selected.append(chosen)
if take < desired:
deficit += desired - take
# If deficit remains, fill from any remaining instances
if deficit > 0:
already_selected = np.concatenate(selected) if selected else np.array([], dtype=int)
remaining = np.setdiff1d(np.arange(len(X_test)), already_selected)
if len(remaining) > 0:
extra = rng.choice(remaining, size=min(deficit, len(remaining)), replace=False)
selected.append(extra)
indices = np.concatenate(selected)
return indices, bin_indices_full[indices]
def compute_confidence_bins(
X_test: np.ndarray,
model: ModelConfig,
) -> np.ndarray | None:
"""Compute confidence quartile bin assignments for given instances.
Uses the same quantile-based binning as
:func:`_select_stratified_confidence`: confidence is defined as
``max(predict_proba)`` per instance, then split into
:data:`N_CONFIDENCE_BINS` equal-frequency bins.
Parameters
----------
X_test : np.ndarray
Test instances (already selected).
model : ModelConfig
Fitted model with optional ``predict_proba`` support.
Returns
-------
np.ndarray or None
Integer array of shape ``(len(X_test),)`` with values in
``{0, 1, ..., N_CONFIDENCE_BINS - 1}`` where 0 is the
lowest-confidence bin. Returns ``None`` if the model does not
support ``predict_proba``.
"""
confidence = _get_soft_confidence(X_test, model)
if confidence is None:
return None
quantiles = np.linspace(0, 1, N_CONFIDENCE_BINS + 1)
bin_edges = np.quantile(confidence, quantiles)
bin_indices: np.ndarray = np.digitize(confidence, bin_edges[1:-1], right=True)
return bin_indices
def _get_soft_confidence(
X_test: np.ndarray,
model: ModelConfig,
) -> np.ndarray | None:
"""Compute per-instance confidence using soft probabilities.
Uses :func:`soft_predict_proba_fn` so that classifiers with hard 0/1
``predict_proba`` (e.g. ROCKET, RDST with ``RidgeClassifierCV``) are
converted to smooth probabilities via their ``decision_function``.
Classifiers with native smooth ``predict_proba`` (logistic regression,
MLP, deep learning) pass through unchanged.
Parameters
----------
X_test : np.ndarray
Test instances.
model : ModelConfig
Fitted model wrapper.
Returns
-------
np.ndarray or None
Max predicted probability per instance, or ``None`` if the model
supports neither ``predict_proba`` nor ``decision_function``.
"""
# Try soft_predict_proba_fn first (handles ROCKET/RDST decision_function)
try:
soft_proba = soft_predict_proba_fn(model.model)
proba_arr: np.ndarray = np.asarray(soft_proba(X_test))
confidence: np.ndarray = np.max(proba_arr, axis=1)
return confidence
except (TypeError, AttributeError, ValueError) as exc:
warnings.warn(
f"soft_predict_proba_fn failed for model '{model.name}': {exc}. "
f"Falling back to raw predict_proba for confidence estimation.",
UserWarning,
stacklevel=2,
)
# Fallback to raw predict_proba
raw_proba = model.predict_proba(X_test)
if raw_proba is None:
return None
fallback_confidence: np.ndarray = np.max(raw_proba, axis=1)
return fallback_confidence