Source code for tscf_eval.data_loader.ucr

"""UCR Time Series Classification Archive loader.

This module provides the :class:`UCRLoader` class for loading datasets from
the UCR Time Series Classification Archive. The loader delegates to the
``aeon`` library for data retrieval and caching.

The UCR archive contains over 100 univariate and multivariate time-series
classification datasets commonly used for benchmarking. Datasets are
automatically downloaded and cached on first access.

Classes
-------
UCRLoader
    Loader for UCR archive datasets. Wraps ``aeon.datasets.load_classification``
    and returns data in the :class:`TSCData` container format.

Examples
--------
>>> from tscf_eval.data_loader import UCRLoader
>>>
>>> # Load the ItalyPowerDemand dataset
>>> loader = UCRLoader("ItalyPowerDemand")
>>> train_data = loader.load("train")
>>> test_data = loader.load("test")
>>>
>>> print(train_data.describe())
{'name': 'ItalyPowerDemand', 'split': 'train', ...}
>>>
>>> # Load both splits at once
>>> train, test = loader.load_both()

References
----------
.. [1] Dau, H. A., et al. (2019). The UCR time series archive.
       IEEE/CAA Journal of Automatica Sinica, 6(6), 1293-1305.
       https://www.cs.ucr.edu/~eamonn/time_series_data_2018/

See Also
--------
tscf_eval.data_loader.FileLoader : For loading custom file-based datasets.
tscf_eval.data_loader.TSCData : Data container returned by loaders.
"""

from __future__ import annotations

from aeon.datasets import load_classification
import numpy as np

from .base import DataLoader
from .tsc_data import Split, TSCData


[docs] class UCRLoader(DataLoader): """Loader for UCR time-series classification datasets from the UCR archive. This loader delegates to the ``aeon`` library's dataset utilities (``aeon.datasets.load_classification``). The ``aeon`` package must be installed for this loader to work. Parameters ---------- dataset_name : str Name of the UCR dataset (e.g., 'ItalyPowerDemand', 'GunPoint'). """
[docs] def __init__(self, dataset_name: str): """Create a loader for a named UCR dataset. Parameters ---------- dataset_name : str Name of the UCR dataset (e.g., 'ItalyPowerDemand'). """ self.dataset_name = dataset_name
[docs] def load(self, split: Split, **kwargs) -> TSCData: """Load a split ('train' or 'test') of the dataset using aeon. Parameters ---------- split : {'train', 'test'} Which split to load. **kwargs Additional arguments forwarded to the underlying loader in aeon. Returns ------- TSCData Dataset container with feature arrays ``X`` and labels ``y``. For univariate datasets, ``X`` has shape ``(N, T)``. For multivariate datasets, ``X`` has shape ``(N, C, T)`` where ``C`` is the number of channels/dimensions. """ # aeon versions differ: some return (X, y) when asked with # return_X_y=True, others return an object with attributes. res = load_classification(name=self.dataset_name, split=split) if isinstance(res, tuple) and len(res) == 2: X_raw, y_raw = res elif getattr(res, "data", None) is not None and getattr(res, "target", None) is not None: X_raw, y_raw = res.data, res.target elif getattr(res, "X", None) is not None and getattr(res, "y", None) is not None: X_raw, y_raw = res.X, res.y else: raise RuntimeError("Unexpected return value from aeon.datasets.load_classification") X = np.asarray(X_raw) y = np.asarray(y_raw) return TSCData.from_arrays( name=self.dataset_name, split=split, X=X, y=y, squeeze_univariate=True )
[docs] def describe(self) -> dict: """Return a compact description for the dataset. The description contains per-split metadata (from :meth:`TSCData.describe`) and an overall summary (currently the combined number of classes observed across splits). Returns ------- dict Dictionary with keys: - ``'name'``: Dataset name. - ``'splits'``: Dict mapping 'train'/'test' to their descriptions. - ``'overall'``: Dict with ``'n_classes'`` (total unique classes). """ tr = self.load("train") te = self.load("test") n_classes = int(np.unique(np.concatenate([tr.y, te.y])).size) return { "name": self.dataset_name, "splits": {"train": tr.describe(), "test": te.describe()}, "overall": {"n_classes": n_classes}, }