Source code for tscf_eval.benchmark.selection

"""Instance selection strategies for benchmark execution.

This module provides functions for selecting test instances to use
in benchmark runs. Strategies range from simple random sampling to
confidence-based stratified sampling.
"""

from __future__ import annotations

from typing import TYPE_CHECKING, Literal
import warnings

import numpy as np

from tscf_eval.counterfactuals.utils import soft_predict_proba_fn

if TYPE_CHECKING:
    from .config import DatasetConfig, ModelConfig

__all__ = [
    "N_CONFIDENCE_BINS",
    "SelectionStrategy",
    "compute_confidence_bins",
    "select_instances",
]

SelectionStrategy = Literal["random", "stratified_confidence"]
"""Supported instance selection strategies."""

N_CONFIDENCE_BINS = 4
"""Number of quantile-based confidence bins for stratified selection."""


[docs] def select_instances( dataset: DatasetConfig, model: ModelConfig, n_instances: int | None, strategy: SelectionStrategy, random_state: int | None, ) -> tuple[np.ndarray, np.ndarray | None, np.ndarray | None]: """Select test instances according to the given strategy. Parameters ---------- dataset : DatasetConfig Dataset containing test instances. model : ModelConfig Fitted model (used for confidence-based strategies). n_instances : int or None Number of instances to select. None means use all. strategy : {"random", "stratified_confidence"} Instance selection strategy. random_state : int or None Random seed for reproducibility. Returns ------- X_test : np.ndarray Selected test instances. y_test : np.ndarray or None Corresponding labels, or None if not available. bin_indices : np.ndarray or None Confidence bin assignment for each selected instance (computed over the full test set), or ``None`` when stratified binning was not performed (e.g. random strategy, no ``predict_proba``, or no subsampling). """ X_test = dataset.X_test y_test = dataset.y_test # No subsampling needed if n_instances is None or n_instances >= len(X_test): return X_test, y_test, None if strategy == "random": indices = _select_random(len(X_test), n_instances, random_state) bin_indices = None elif strategy == "stratified_confidence": indices, bin_indices = _select_stratified_confidence( X_test, model, n_instances, random_state, ) else: raise ValueError( f"Unknown selection strategy: {strategy!r}. " f"Expected 'random' or 'stratified_confidence'." ) X_test = X_test[indices] if y_test is not None: y_test = y_test[indices] return X_test, y_test, bin_indices
def _select_random( n_total: int, n_instances: int, random_state: int | None, ) -> np.ndarray: """Select instances uniformly at random without replacement. Parameters ---------- n_total : int Total number of available instances. n_instances : int Number of instances to select. random_state : int or None Random seed for reproducibility. Returns ------- np.ndarray Array of selected indices. """ rng = np.random.default_rng(random_state) return rng.choice(n_total, size=n_instances, replace=False) def _select_stratified_confidence( X_test: np.ndarray, model: ModelConfig, n_instances: int, random_state: int | None, ) -> tuple[np.ndarray, np.ndarray | None]: """Select instances stratified by model confidence. Computes model confidence (max predicted probability) for each test instance, divides instances into quantile-based bins (25th, 50th, 75th percentiles), and samples from each bin. This ensures the benchmark covers instances the model is both very confident and very uncertain about. Falls back to random selection if the model does not support ``predict_proba`` or if ``n_instances`` is too small. Parameters ---------- X_test : np.ndarray Test instances. model : ModelConfig Fitted model with optional ``predict_proba`` support. n_instances : int Total number of instances to select. random_state : int or None Random seed for reproducibility. Returns ------- indices : np.ndarray Array of selected indices. bin_indices : np.ndarray or None Confidence bin assignment for each selected instance (computed over the full test set), or ``None`` when falling back to random selection. """ # Guard: need at least one instance per bin for stratification if n_instances < N_CONFIDENCE_BINS: warnings.warn( f"n_instances={n_instances} is less than the number of " f"confidence bins ({N_CONFIDENCE_BINS}). " f"Falling back to random selection.", UserWarning, stacklevel=3, ) return _select_random(len(X_test), n_instances, random_state), None # Get soft probabilities for confidence estimation. # Some classifiers (ROCKET, RDST) use RidgeClassifierCV internally and # return hard 0/1 from predict_proba, making all confidences identical. # soft_predict_proba_fn converts decision_function outputs to smooth # probabilities via sigmoid/softmax, giving meaningful confidence spread. # For classifiers with native smooth predict_proba (LR, MLP, deep # learning) it falls through to predict_proba unchanged. confidence = _get_soft_confidence(X_test, model) if confidence is None: warnings.warn( f"Model '{model.name}' does not support predict_proba. " f"Falling back to random instance selection.", UserWarning, stacklevel=3, ) return _select_random(len(X_test), n_instances, random_state), None # Create quantile-based bin edges (4 bins: 0-25%, 25-50%, 50-75%, 75-100%) quantiles = np.linspace(0, 1, N_CONFIDENCE_BINS + 1) bin_edges = np.quantile(confidence, quantiles) # Assign each instance to a bin (over full test set) # np.digitize with bin_edges[1:-1] maps to bins 0..N_CONFIDENCE_BINS-1 bin_indices_full = np.digitize(confidence, bin_edges[1:-1], right=True) # Collect indices per bin bins: list[np.ndarray] = [np.where(bin_indices_full == b)[0] for b in range(N_CONFIDENCE_BINS)] # Compute per-bin allocation base_per_bin = n_instances // N_CONFIDENCE_BINS remainder = n_instances % N_CONFIDENCE_BINS allocations = [base_per_bin] * N_CONFIDENCE_BINS for i in range(remainder): allocations[i] += 1 # Sample from each bin; redistribute deficit if a bin is too small rng = np.random.default_rng(random_state) selected: list[np.ndarray] = [] deficit = 0 for b in range(N_CONFIDENCE_BINS): available = len(bins[b]) desired = allocations[b] + deficit deficit = 0 if available == 0: deficit += desired continue take = min(desired, available) chosen = rng.choice(bins[b], size=take, replace=False) selected.append(chosen) if take < desired: deficit += desired - take # If deficit remains, fill from any remaining instances if deficit > 0: already_selected = np.concatenate(selected) if selected else np.array([], dtype=int) remaining = np.setdiff1d(np.arange(len(X_test)), already_selected) if len(remaining) > 0: extra = rng.choice(remaining, size=min(deficit, len(remaining)), replace=False) selected.append(extra) indices = np.concatenate(selected) return indices, bin_indices_full[indices] def compute_confidence_bins( X_test: np.ndarray, model: ModelConfig, ) -> np.ndarray | None: """Compute confidence quartile bin assignments for given instances. Uses the same quantile-based binning as :func:`_select_stratified_confidence`: confidence is defined as ``max(predict_proba)`` per instance, then split into :data:`N_CONFIDENCE_BINS` equal-frequency bins. Parameters ---------- X_test : np.ndarray Test instances (already selected). model : ModelConfig Fitted model with optional ``predict_proba`` support. Returns ------- np.ndarray or None Integer array of shape ``(len(X_test),)`` with values in ``{0, 1, ..., N_CONFIDENCE_BINS - 1}`` where 0 is the lowest-confidence bin. Returns ``None`` if the model does not support ``predict_proba``. """ confidence = _get_soft_confidence(X_test, model) if confidence is None: return None quantiles = np.linspace(0, 1, N_CONFIDENCE_BINS + 1) bin_edges = np.quantile(confidence, quantiles) bin_indices: np.ndarray = np.digitize(confidence, bin_edges[1:-1], right=True) return bin_indices def _get_soft_confidence( X_test: np.ndarray, model: ModelConfig, ) -> np.ndarray | None: """Compute per-instance confidence using soft probabilities. Uses :func:`soft_predict_proba_fn` so that classifiers with hard 0/1 ``predict_proba`` (e.g. ROCKET, RDST with ``RidgeClassifierCV``) are converted to smooth probabilities via their ``decision_function``. Classifiers with native smooth ``predict_proba`` (logistic regression, MLP, deep learning) pass through unchanged. Parameters ---------- X_test : np.ndarray Test instances. model : ModelConfig Fitted model wrapper. Returns ------- np.ndarray or None Max predicted probability per instance, or ``None`` if the model supports neither ``predict_proba`` nor ``decision_function``. """ # Try soft_predict_proba_fn first (handles ROCKET/RDST decision_function) try: soft_proba = soft_predict_proba_fn(model.model) proba_arr: np.ndarray = np.asarray(soft_proba(X_test)) confidence: np.ndarray = np.max(proba_arr, axis=1) return confidence except (TypeError, AttributeError, ValueError) as exc: warnings.warn( f"soft_predict_proba_fn failed for model '{model.name}': {exc}. " f"Falling back to raw predict_proba for confidence estimation.", UserWarning, stacklevel=2, ) # Fallback to raw predict_proba raw_proba = model.predict_proba(X_test) if raw_proba is None: return None fallback_confidence: np.ndarray = np.max(raw_proba, axis=1) return fallback_confidence