Module tsflex.features.segmenter.strided_rolling_factory

Factory class for creating the proper StridedRolling instances.

TODO

Also create a (SegmenterFactory) which the StridedRollingFactory implements

Expand source code
"""
Factory class for creating the proper StridedRolling instances.

.. TODO::
    Also create a (SegmenterFactory) which the StridedRollingFactory implements

"""

__author__ = "Jonas Van Der Donckt"

from typing import List, Optional, Union

import pandas as pd

from ...utils.attribute_parsing import AttributeParser, DataType
from .strided_rolling import (
    SequenceStridedRolling,
    StridedRolling,
    TimeIndexSampleStridedRolling,
    TimeStridedRolling,
)


class StridedRollingFactory:
    """Factory class for creating the appropriate StridedRolling segmenter."""

    _datatype_to_stroll = {
        DataType.TIME: TimeStridedRolling,
        DataType.SEQUENCE: SequenceStridedRolling,
    }

    @staticmethod
    def get_segmenter(  # type: ignore[no-untyped-def]
        data: Union[pd.Series, pd.DataFrame, List[Union[pd.Series, pd.DataFrame]]],
        window: Union[int, float, pd.Timedelta],
        strides: Optional[List[Union[int, float, pd.Timedelta]]],
        **kwargs,
    ) -> StridedRolling:
        """Get the appropriate StridedRolling instance for the passed data.

        The returned instance will be determined by the data its index type

        Parameters
        ----------
        data : Union[pd.Series, pd.DataFrame, List[Union[pd.Series, pd.DataFrame]]]
            The data to segment.
        window : Union[int, float, pd.Timedelta]
             The window size to use for the segmentation.
        strides : Union[List[Union[int, float, pd.Timedelta]], None]
            The stride(s) to use for the segmentation.
        **kwargs : dict, optional
            Additional keyword arguments, see the `StridedRolling` its documentation
            for more info.

        .. Note::
            When passing `time-based` data, but **int**-based window & stride params,
            the strided rolling will be `TimeIndexSampleStridedRolling`. This class
            **assumes** that **all data series** _roughly_ have the
            **same sample frequency**, as  the windows and strides are interpreted in
            terms of **number of samples**.

        Raises
        ------
        ValueError
            When incompatible data & window-stride data types are passed (e.g. time
            window-stride args on sequence data-index).

        Returns
        -------
        StridedRolling
            The constructed StridedRolling instance.

        """
        data_dtype = AttributeParser.determine_type(data)
        if strides is None:
            args_dtype = AttributeParser.determine_type(window)
        else:
            args_dtype = AttributeParser.determine_type([window] + strides)

        if window is None or data_dtype.value == args_dtype.value:
            return StridedRollingFactory._datatype_to_stroll[data_dtype](
                data, window, strides, **kwargs
            )
        elif data_dtype == DataType.TIME and args_dtype == DataType.SEQUENCE:
            # Note: this is very niche and thus requires advanced knowledge
            assert isinstance(window, int)
            if strides is not None:
                assert isinstance(strides, list) and all(
                    isinstance(s, int) for s in strides
                )
            return TimeIndexSampleStridedRolling(data, window, strides, **kwargs)
        elif data_dtype == DataType.SEQUENCE and args_dtype == DataType.TIME:
            raise ValueError("Cannot segment a sequence-series with a time window")

        # This should never happen
        raise ValueError(
            f"Cannot segment data of type {data_dtype} with window-stride of type {args_dtype}"
        )

Classes

class StridedRollingFactory
Expand source code
class StridedRollingFactory:
    """Factory class for creating the appropriate StridedRolling segmenter."""

    _datatype_to_stroll = {
        DataType.TIME: TimeStridedRolling,
        DataType.SEQUENCE: SequenceStridedRolling,
    }

    @staticmethod
    def get_segmenter(  # type: ignore[no-untyped-def]
        data: Union[pd.Series, pd.DataFrame, List[Union[pd.Series, pd.DataFrame]]],
        window: Union[int, float, pd.Timedelta],
        strides: Optional[List[Union[int, float, pd.Timedelta]]],
        **kwargs,
    ) -> StridedRolling:
        """Get the appropriate StridedRolling instance for the passed data.

        The returned instance will be determined by the data its index type

        Parameters
        ----------
        data : Union[pd.Series, pd.DataFrame, List[Union[pd.Series, pd.DataFrame]]]
            The data to segment.
        window : Union[int, float, pd.Timedelta]
             The window size to use for the segmentation.
        strides : Union[List[Union[int, float, pd.Timedelta]], None]
            The stride(s) to use for the segmentation.
        **kwargs : dict, optional
            Additional keyword arguments, see the `StridedRolling` its documentation
            for more info.

        .. Note::
            When passing `time-based` data, but **int**-based window & stride params,
            the strided rolling will be `TimeIndexSampleStridedRolling`. This class
            **assumes** that **all data series** _roughly_ have the
            **same sample frequency**, as  the windows and strides are interpreted in
            terms of **number of samples**.

        Raises
        ------
        ValueError
            When incompatible data & window-stride data types are passed (e.g. time
            window-stride args on sequence data-index).

        Returns
        -------
        StridedRolling
            The constructed StridedRolling instance.

        """
        data_dtype = AttributeParser.determine_type(data)
        if strides is None:
            args_dtype = AttributeParser.determine_type(window)
        else:
            args_dtype = AttributeParser.determine_type([window] + strides)

        if window is None or data_dtype.value == args_dtype.value:
            return StridedRollingFactory._datatype_to_stroll[data_dtype](
                data, window, strides, **kwargs
            )
        elif data_dtype == DataType.TIME and args_dtype == DataType.SEQUENCE:
            # Note: this is very niche and thus requires advanced knowledge
            assert isinstance(window, int)
            if strides is not None:
                assert isinstance(strides, list) and all(
                    isinstance(s, int) for s in strides
                )
            return TimeIndexSampleStridedRolling(data, window, strides, **kwargs)
        elif data_dtype == DataType.SEQUENCE and args_dtype == DataType.TIME:
            raise ValueError("Cannot segment a sequence-series with a time window")

        # This should never happen
        raise ValueError(
            f"Cannot segment data of type {data_dtype} with window-stride of type {args_dtype}"
        )

Factory class for creating the appropriate StridedRolling segmenter.

Static methods

def get_segmenter(data, window, strides, **kwargs)
Expand source code
@staticmethod
def get_segmenter(  # type: ignore[no-untyped-def]
    data: Union[pd.Series, pd.DataFrame, List[Union[pd.Series, pd.DataFrame]]],
    window: Union[int, float, pd.Timedelta],
    strides: Optional[List[Union[int, float, pd.Timedelta]]],
    **kwargs,
) -> StridedRolling:
    """Get the appropriate StridedRolling instance for the passed data.

    The returned instance will be determined by the data its index type

    Parameters
    ----------
    data : Union[pd.Series, pd.DataFrame, List[Union[pd.Series, pd.DataFrame]]]
        The data to segment.
    window : Union[int, float, pd.Timedelta]
         The window size to use for the segmentation.
    strides : Union[List[Union[int, float, pd.Timedelta]], None]
        The stride(s) to use for the segmentation.
    **kwargs : dict, optional
        Additional keyword arguments, see the `StridedRolling` its documentation
        for more info.

    .. Note::
        When passing `time-based` data, but **int**-based window & stride params,
        the strided rolling will be `TimeIndexSampleStridedRolling`. This class
        **assumes** that **all data series** _roughly_ have the
        **same sample frequency**, as  the windows and strides are interpreted in
        terms of **number of samples**.

    Raises
    ------
    ValueError
        When incompatible data & window-stride data types are passed (e.g. time
        window-stride args on sequence data-index).

    Returns
    -------
    StridedRolling
        The constructed StridedRolling instance.

    """
    data_dtype = AttributeParser.determine_type(data)
    if strides is None:
        args_dtype = AttributeParser.determine_type(window)
    else:
        args_dtype = AttributeParser.determine_type([window] + strides)

    if window is None or data_dtype.value == args_dtype.value:
        return StridedRollingFactory._datatype_to_stroll[data_dtype](
            data, window, strides, **kwargs
        )
    elif data_dtype == DataType.TIME and args_dtype == DataType.SEQUENCE:
        # Note: this is very niche and thus requires advanced knowledge
        assert isinstance(window, int)
        if strides is not None:
            assert isinstance(strides, list) and all(
                isinstance(s, int) for s in strides
            )
        return TimeIndexSampleStridedRolling(data, window, strides, **kwargs)
    elif data_dtype == DataType.SEQUENCE and args_dtype == DataType.TIME:
        raise ValueError("Cannot segment a sequence-series with a time window")

    # This should never happen
    raise ValueError(
        f"Cannot segment data of type {data_dtype} with window-stride of type {args_dtype}"
    )

Get the appropriate StridedRolling instance for the passed data.

The returned instance will be determined by the data its index type

Parameters

data : Union[pd.Series, pd.DataFrame, List[Union[pd.Series, pd.DataFrame]]]
The data to segment.
window : Union[int, float, pd.Timedelta]
The window size to use for the segmentation.
strides : Union[List[Union[int, float, pd.Timedelta]], None]
The stride(s) to use for the segmentation.
**kwargs : dict, optional
Additional keyword arguments, see the StridedRolling its documentation for more info.

Note

When passing time-based data, but int-based window & stride params, the strided rolling will be TimeIndexSampleStridedRolling. This class assumes that all data series roughly have the same sample frequency, as the windows and strides are interpreted in terms of number of samples.

Raises

ValueError
When incompatible data & window-stride data types are passed (e.g. time window-stride args on sequence data-index).

Returns

StridedRolling
The constructed StridedRolling instance.