Source code for tscf_eval.evaluator.metrics.model

"""Model-based counterfactual evaluation metrics.

This module provides metrics that require a fitted model to evaluate
counterfactuals: Confidence and Controllability.

Classes
-------
Confidence
    Reports model confidence statistics for instances.
Controllability
    Assesses how easily a counterfactual can be reverted.
"""

from __future__ import annotations

import numpy as np

from ..base import Metric
from ._utils import ensure_array


[docs] class Confidence(Metric): """Confidence summaries (maximum predicted probability) for instances. Reports the mean maximum predicted probability for both original and counterfactual instances, as well as the mean difference. See Le et al. (2023) for details. """ direction = "maximize" # Higher confidence in CF prediction is better
[docs] def name(self) -> str: """Return the metric name. Returns ------- str ``'confidence'``. """ return "confidence"
[docs] def compute( self, X: np.ndarray, X_cf: np.ndarray, model=None, **kwargs, ) -> dict[str, float]: """Compute confidence statistics. Parameters ---------- X : np.ndarray Original instances of shape ``(M, ...)``. X_cf : np.ndarray Counterfactual instances of shape ``(M, ...)``. model : object Classifier with a ``predict_proba`` method. **kwargs Additional keyword arguments (unused). Returns ------- dict Dictionary with keys: - ``mean_conf_orig``: Mean max probability for original instances. - ``mean_conf_cf``: Mean max probability for counterfactuals. - ``mean_conf_delta``: Mean difference (cf - orig). Raises ------ ValueError If ``model`` is ``None``. """ if model is None: raise ValueError("Confidence requires a `model` with predict_proba.") X = ensure_array(X) X_cf = ensure_array(X_cf) # Use cached probabilities if available (from Evaluator pre-computation) p_orig = kwargs.get("_cached_proba_X") p_cf = kwargs.get("_cached_proba_X_cf") if p_orig is None: p_orig = model.predict_proba(X) if p_cf is None: p_cf = model.predict_proba(X_cf) max_orig = np.max(p_orig, axis=1) max_cf = np.max(p_cf, axis=1) return { "mean_conf_orig": float(np.mean(max_orig)), "mean_conf_cf": float(np.mean(max_cf)), "mean_conf_delta": float(np.mean(max_cf - max_orig)), }
[docs] class Controllability(Metric): """How easily a counterfactual can be reverted by partial controlled edits. For each counterfactual, this metric reverts random subsets of changed features at several fraction levels and checks whether the original prediction is restored. The score is the fraction of revert attempts that succeed, averaged across fractions, samples, and instances. Parameters ---------- revert_fractions : list of float, optional Fractions of changed features to revert at each probe level. Default is ``[0.1, 0.2, 0.3, 0.4, 0.5]``. n_samples : int, optional Number of random subsets to draw per fraction per instance. Default is ``10``. random_state : int or None, optional Seed for reproducibility. Default is ``None``. See Verma et al. (2024) for details. """ direction = "maximize" # Higher controllability is better
[docs] def __init__( self, revert_fractions: list[float] | None = None, n_samples: int = 10, random_state: int | None = None, ): """Initialize the Controllability metric. Parameters ---------- revert_fractions : list of float, optional Fractions of changed features to revert at each probe level. Default is ``[0.1, 0.2, 0.3, 0.4, 0.5]``. n_samples : int, default 10 Number of random subsets to draw per fraction per instance. random_state : int or None, default None Seed for reproducibility. """ self.revert_fractions = revert_fractions or [0.1, 0.2, 0.3, 0.4, 0.5] self.n_samples = n_samples self.random_state = random_state
[docs] def name(self) -> str: """Return the metric name. Returns ------- str ``'controllability'``. """ return "controllability"
[docs] def compute( self, X: np.ndarray, X_cf: np.ndarray, model=None, **kwargs, ) -> float: """Compute controllability score via random subset reverts. For each instance the method identifies which features changed, then for every fraction in ``revert_fractions`` it draws ``n_samples`` random subsets of that size from the changed features, reverts them to their original values, and checks whether the model prediction is restored. Parameters ---------- X : np.ndarray Original instances of shape ``(M, ...)``. X_cf : np.ndarray Counterfactual instances of shape ``(M, ...)``. model : object Classifier with a ``predict`` method. **kwargs Additional keyword arguments (unused). Returns ------- float Mean controllability score in ``[0, 1]``. Higher values indicate that counterfactuals can be more easily reverted. Raises ------ ValueError If ``model`` is ``None``. """ if model is None: raise ValueError("Controllability requires a `model` to probe reverts.") X = ensure_array(X) X_cf = ensure_array(X_cf) M = X.shape[0] rng = np.random.default_rng(self.random_state) # Use cached predictions if available, otherwise batch predict once all_orig_labels = kwargs.get("_cached_y_pred") all_cf_labels = kwargs.get("_cached_y_cf_pred") if all_orig_labels is None: all_orig_labels = np.asarray(model.predict(X)) if all_cf_labels is None: all_cf_labels = np.asarray(model.predict(X_cf)) scores = [] for i in range(M): xi = X[i] xfi = X_cf[i] orig_label = int(all_orig_labels[i]) cf_label = int(all_cf_labels[i]) if orig_label == cf_label: scores.append(0.0) continue flat_x = xi.reshape(-1) flat_xf = xfi.reshape(-1) changed_idx = np.nonzero(~np.isclose(flat_x, flat_xf))[0] if changed_idx.size == 0: scores.append(0.0) continue # Build candidates across all fractions and samples candidates = [] for frac in self.revert_fractions: n_revert = max(1, round(frac * changed_idx.size)) for _ in range(self.n_samples): subset = rng.choice(changed_idx, size=n_revert, replace=False) cand = flat_xf.copy() cand[subset] = flat_x[subset] candidates.append(cand.reshape(xi.shape)) cand_batch = np.stack(candidates) cand_preds = np.asarray(model.predict(cand_batch)) n_reverting = int(np.sum(cand_preds == orig_label)) scores.append(n_reverting / float(len(candidates))) return float(np.mean(scores))