Module tsflex.features.segmenter.strided_rolling_factory
Factory class for creating the proper StridedRolling instances.
TODO
Also create a (SegmenterFactory) which the StridedRollingFactory implements
Expand source code
"""
Factory class for creating the proper StridedRolling instances.
.. TODO::
Also create a (SegmenterFactory) which the StridedRollingFactory implements
"""
__author__ = "Jonas Van Der Donckt"
from typing import List, Optional, Union
import pandas as pd
from ...utils.attribute_parsing import AttributeParser, DataType
from .strided_rolling import (
SequenceStridedRolling,
StridedRolling,
TimeIndexSampleStridedRolling,
TimeStridedRolling,
)
class StridedRollingFactory:
"""Factory class for creating the appropriate StridedRolling segmenter."""
_datatype_to_stroll = {
DataType.TIME: TimeStridedRolling,
DataType.SEQUENCE: SequenceStridedRolling,
}
@staticmethod
def get_segmenter( # type: ignore[no-untyped-def]
data: Union[pd.Series, pd.DataFrame, List[Union[pd.Series, pd.DataFrame]]],
window: Union[int, float, pd.Timedelta],
strides: Optional[List[Union[int, float, pd.Timedelta]]],
**kwargs,
) -> StridedRolling:
"""Get the appropriate StridedRolling instance for the passed data.
The returned instance will be determined by the data its index type
Parameters
----------
data : Union[pd.Series, pd.DataFrame, List[Union[pd.Series, pd.DataFrame]]]
The data to segment.
window : Union[int, float, pd.Timedelta]
The window size to use for the segmentation.
strides : Union[List[Union[int, float, pd.Timedelta]], None]
The stride(s) to use for the segmentation.
**kwargs : dict, optional
Additional keyword arguments, see the `StridedRolling` its documentation
for more info.
.. Note::
When passing `time-based` data, but **int**-based window & stride params,
the strided rolling will be `TimeIndexSampleStridedRolling`. This class
**assumes** that **all data series** _roughly_ have the
**same sample frequency**, as the windows and strides are interpreted in
terms of **number of samples**.
Raises
------
ValueError
When incompatible data & window-stride data types are passed (e.g. time
window-stride args on sequence data-index).
Returns
-------
StridedRolling
The constructed StridedRolling instance.
"""
data_dtype = AttributeParser.determine_type(data)
if strides is None:
args_dtype = AttributeParser.determine_type(window)
else:
args_dtype = AttributeParser.determine_type([window] + strides)
if window is None or data_dtype.value == args_dtype.value:
return StridedRollingFactory._datatype_to_stroll[data_dtype](
data, window, strides, **kwargs
)
elif data_dtype == DataType.TIME and args_dtype == DataType.SEQUENCE:
# Note: this is very niche and thus requires advanced knowledge
assert isinstance(window, int)
if strides is not None:
assert isinstance(strides, list) and all(
isinstance(s, int) for s in strides
)
return TimeIndexSampleStridedRolling(data, window, strides, **kwargs)
elif data_dtype == DataType.SEQUENCE and args_dtype == DataType.TIME:
raise ValueError("Cannot segment a sequence-series with a time window")
# This should never happen
raise ValueError(
f"Cannot segment data of type {data_dtype} with window-stride of type {args_dtype}"
)
Classes
class StridedRollingFactory
-
Expand source code
class StridedRollingFactory: """Factory class for creating the appropriate StridedRolling segmenter.""" _datatype_to_stroll = { DataType.TIME: TimeStridedRolling, DataType.SEQUENCE: SequenceStridedRolling, } @staticmethod def get_segmenter( # type: ignore[no-untyped-def] data: Union[pd.Series, pd.DataFrame, List[Union[pd.Series, pd.DataFrame]]], window: Union[int, float, pd.Timedelta], strides: Optional[List[Union[int, float, pd.Timedelta]]], **kwargs, ) -> StridedRolling: """Get the appropriate StridedRolling instance for the passed data. The returned instance will be determined by the data its index type Parameters ---------- data : Union[pd.Series, pd.DataFrame, List[Union[pd.Series, pd.DataFrame]]] The data to segment. window : Union[int, float, pd.Timedelta] The window size to use for the segmentation. strides : Union[List[Union[int, float, pd.Timedelta]], None] The stride(s) to use for the segmentation. **kwargs : dict, optional Additional keyword arguments, see the `StridedRolling` its documentation for more info. .. Note:: When passing `time-based` data, but **int**-based window & stride params, the strided rolling will be `TimeIndexSampleStridedRolling`. This class **assumes** that **all data series** _roughly_ have the **same sample frequency**, as the windows and strides are interpreted in terms of **number of samples**. Raises ------ ValueError When incompatible data & window-stride data types are passed (e.g. time window-stride args on sequence data-index). Returns ------- StridedRolling The constructed StridedRolling instance. """ data_dtype = AttributeParser.determine_type(data) if strides is None: args_dtype = AttributeParser.determine_type(window) else: args_dtype = AttributeParser.determine_type([window] + strides) if window is None or data_dtype.value == args_dtype.value: return StridedRollingFactory._datatype_to_stroll[data_dtype]( data, window, strides, **kwargs ) elif data_dtype == DataType.TIME and args_dtype == DataType.SEQUENCE: # Note: this is very niche and thus requires advanced knowledge assert isinstance(window, int) if strides is not None: assert isinstance(strides, list) and all( isinstance(s, int) for s in strides ) return TimeIndexSampleStridedRolling(data, window, strides, **kwargs) elif data_dtype == DataType.SEQUENCE and args_dtype == DataType.TIME: raise ValueError("Cannot segment a sequence-series with a time window") # This should never happen raise ValueError( f"Cannot segment data of type {data_dtype} with window-stride of type {args_dtype}" )
Factory class for creating the appropriate StridedRolling segmenter.
Static methods
def get_segmenter(data, window, strides, **kwargs)
-
Expand source code
@staticmethod def get_segmenter( # type: ignore[no-untyped-def] data: Union[pd.Series, pd.DataFrame, List[Union[pd.Series, pd.DataFrame]]], window: Union[int, float, pd.Timedelta], strides: Optional[List[Union[int, float, pd.Timedelta]]], **kwargs, ) -> StridedRolling: """Get the appropriate StridedRolling instance for the passed data. The returned instance will be determined by the data its index type Parameters ---------- data : Union[pd.Series, pd.DataFrame, List[Union[pd.Series, pd.DataFrame]]] The data to segment. window : Union[int, float, pd.Timedelta] The window size to use for the segmentation. strides : Union[List[Union[int, float, pd.Timedelta]], None] The stride(s) to use for the segmentation. **kwargs : dict, optional Additional keyword arguments, see the `StridedRolling` its documentation for more info. .. Note:: When passing `time-based` data, but **int**-based window & stride params, the strided rolling will be `TimeIndexSampleStridedRolling`. This class **assumes** that **all data series** _roughly_ have the **same sample frequency**, as the windows and strides are interpreted in terms of **number of samples**. Raises ------ ValueError When incompatible data & window-stride data types are passed (e.g. time window-stride args on sequence data-index). Returns ------- StridedRolling The constructed StridedRolling instance. """ data_dtype = AttributeParser.determine_type(data) if strides is None: args_dtype = AttributeParser.determine_type(window) else: args_dtype = AttributeParser.determine_type([window] + strides) if window is None or data_dtype.value == args_dtype.value: return StridedRollingFactory._datatype_to_stroll[data_dtype]( data, window, strides, **kwargs ) elif data_dtype == DataType.TIME and args_dtype == DataType.SEQUENCE: # Note: this is very niche and thus requires advanced knowledge assert isinstance(window, int) if strides is not None: assert isinstance(strides, list) and all( isinstance(s, int) for s in strides ) return TimeIndexSampleStridedRolling(data, window, strides, **kwargs) elif data_dtype == DataType.SEQUENCE and args_dtype == DataType.TIME: raise ValueError("Cannot segment a sequence-series with a time window") # This should never happen raise ValueError( f"Cannot segment data of type {data_dtype} with window-stride of type {args_dtype}" )
Get the appropriate StridedRolling instance for the passed data.
The returned instance will be determined by the data its index type
Parameters
data
:Union[pd.Series, pd.DataFrame, List[Union[pd.Series, pd.DataFrame]]]
- The data to segment.
window
:Union[int, float, pd.Timedelta]
- The window size to use for the segmentation.
strides
:Union[List[Union[int, float, pd.Timedelta]], None]
- The stride(s) to use for the segmentation.
**kwargs
:dict
, optional- Additional keyword arguments, see the
StridedRolling
its documentation for more info.
Note
When passing
time-based
data, but int-based window & stride params, the strided rolling will beTimeIndexSampleStridedRolling
. This class assumes that all data series roughly have the same sample frequency, as the windows and strides are interpreted in terms of number of samples.Raises
ValueError
- When incompatible data & window-stride data types are passed (e.g. time window-stride args on sequence data-index).
Returns
StridedRolling
- The constructed StridedRolling instance.