Module tsflex.features.function_wrapper
FuncWrapper class for object-oriented representation of a function.
Expand source code
"""FuncWrapper class for object-oriented representation of a function."""
__author__ = "Jonas Van Der Donckt, Jeroen Van Der Donckt, Emiel Deprost"
import functools
from typing import Any, Callable, List, Optional, Union
import numpy as np
import pandas as pd
from .. import __pdoc__
from ..utils.classes import FrozenClass
from ..utils.data import SUPPORTED_STROLL_TYPES
__pdoc__["FuncWrapper.__call__"] = True
def _get_name(func: Callable) -> str:
"""Get the name of the function.
Parameters
----------
func: Callable
The function whose name has to be returned, should be either a function or an
object that is callable.
Returns
-------
str
The name of ``func`` in case of a function, or
- the name of the wrapped function in case of functools.partial.
- the name of the class in case of a callable object.
"""
assert callable(func), f"The given argument {func} is not callable!"
try:
return func.__name__
except AttributeError:
if isinstance(func, functools.partial):
return func.func.__name__
return type(func).__name__
class FuncWrapper(FrozenClass):
"""Function wrapper.
A function wrapper which takes a numpy array / pandas series as input and returns
one or multiple values. It also defines the names of the function outputs, and
stores the function its keyword arguments.
Parameters
----------
func : Callable
The wrapped function.
output_names : Union[List[str], str], optional
The name of the outputs of the function, by default None.
input_type: Union[np.array, pd.Series], optional
The input type that the function requires (either np.array or pd.Series), by
default np.array.
.. Note::
Make sure to only set this argument to pd.Series if the function requires
a pd.Series, since pd.Series strided-rolling is significantly less efficient.
For a np.array it is possible to create very efficient views, but there is no
such thing as a pd.Series view. Thus, for each stroll, a new series is created.
vectorized: bool, optional
Flag indicating whether `func` should be executed vectorized over all the
segmented windows, by default False.
.. Info::
A vectorized function should take one or multiple series that each have the
shape (nb. segmented windows, window size).
For example a vectorized version of `np.max` is
``FuncWrapper(np.max, vectorized=True, axis=1)``.
.. Note::
* A function can only be applied in vectorized manner when the required
series are REGULARLY sampled (and have the same index in case of multiple
required series).
* The `input_type` should be `np.ndarray` when `vectorized` is True. It does
not make sense to use a `pd.Series`, as the index should be regularly
sampled (see requirement above).
**kwargs: dict, optional
Keyword arguments which will be also passed to the `function`
Raises
------
TypeError
Raised when the `output_names` cannot be set.
"""
def __init__( # type: ignore[no-untyped-def]
self,
func: Callable,
output_names: Optional[Union[List[str], str]] = None,
input_type: Union[np.ndarray, pd.Series] = np.ndarray,
vectorized: bool = False,
**kwargs,
):
"""Create FuncWrapper instance."""
self.func = func
self.kwargs: dict = kwargs
if isinstance(output_names, list):
self.output_names = output_names
elif isinstance(output_names, str):
self.output_names = [output_names]
elif not output_names:
self.output_names = [_get_name(func)]
else:
raise TypeError(f"`output_names` is unexpected type {type(output_names)}")
# for backwards compatibility
input_type = np.ndarray if input_type is np.array else input_type
assert input_type in SUPPORTED_STROLL_TYPES, "Invalid input_type!"
assert not (
vectorized & (input_type is not np.ndarray)
), "The input_type must be np.ndarray if vectorized is True!"
self.input_type = input_type
self.vectorized = vectorized
self._freeze()
def __repr__(self) -> str:
"""Return repr string."""
return (
f"{self.__class__.__name__}({_get_name(self.func)}, {self.output_names},"
f" {self.kwargs})"
)
def __call__(self, *series: Union[np.ndarray, pd.Series]) -> Any:
"""Call wrapped function with passed data.
Parameters
---------
*series : Union[np.ndarray, pd.Series]
The (multiple) input series for the function.
Returns
-------
Any
The function output for the passed series.
"""
return self.func(*series, **self.kwargs)
Classes
class FuncWrapper (func, output_names=None, input_type=numpy.ndarray, vectorized=False, **kwargs)
-
Expand source code
class FuncWrapper(FrozenClass): """Function wrapper. A function wrapper which takes a numpy array / pandas series as input and returns one or multiple values. It also defines the names of the function outputs, and stores the function its keyword arguments. Parameters ---------- func : Callable The wrapped function. output_names : Union[List[str], str], optional The name of the outputs of the function, by default None. input_type: Union[np.array, pd.Series], optional The input type that the function requires (either np.array or pd.Series), by default np.array. .. Note:: Make sure to only set this argument to pd.Series if the function requires a pd.Series, since pd.Series strided-rolling is significantly less efficient. For a np.array it is possible to create very efficient views, but there is no such thing as a pd.Series view. Thus, for each stroll, a new series is created. vectorized: bool, optional Flag indicating whether `func` should be executed vectorized over all the segmented windows, by default False. .. Info:: A vectorized function should take one or multiple series that each have the shape (nb. segmented windows, window size). For example a vectorized version of `np.max` is ``FuncWrapper(np.max, vectorized=True, axis=1)``. .. Note:: * A function can only be applied in vectorized manner when the required series are REGULARLY sampled (and have the same index in case of multiple required series). * The `input_type` should be `np.ndarray` when `vectorized` is True. It does not make sense to use a `pd.Series`, as the index should be regularly sampled (see requirement above). **kwargs: dict, optional Keyword arguments which will be also passed to the `function` Raises ------ TypeError Raised when the `output_names` cannot be set. """ def __init__( # type: ignore[no-untyped-def] self, func: Callable, output_names: Optional[Union[List[str], str]] = None, input_type: Union[np.ndarray, pd.Series] = np.ndarray, vectorized: bool = False, **kwargs, ): """Create FuncWrapper instance.""" self.func = func self.kwargs: dict = kwargs if isinstance(output_names, list): self.output_names = output_names elif isinstance(output_names, str): self.output_names = [output_names] elif not output_names: self.output_names = [_get_name(func)] else: raise TypeError(f"`output_names` is unexpected type {type(output_names)}") # for backwards compatibility input_type = np.ndarray if input_type is np.array else input_type assert input_type in SUPPORTED_STROLL_TYPES, "Invalid input_type!" assert not ( vectorized & (input_type is not np.ndarray) ), "The input_type must be np.ndarray if vectorized is True!" self.input_type = input_type self.vectorized = vectorized self._freeze() def __repr__(self) -> str: """Return repr string.""" return ( f"{self.__class__.__name__}({_get_name(self.func)}, {self.output_names}," f" {self.kwargs})" ) def __call__(self, *series: Union[np.ndarray, pd.Series]) -> Any: """Call wrapped function with passed data. Parameters --------- *series : Union[np.ndarray, pd.Series] The (multiple) input series for the function. Returns ------- Any The function output for the passed series. """ return self.func(*series, **self.kwargs)
Function wrapper.
A function wrapper which takes a numpy array / pandas series as input and returns one or multiple values. It also defines the names of the function outputs, and stores the function its keyword arguments.
Parameters
func
:Callable
- The wrapped function.
output_names
:Union[List[str], str]
, optional- The name of the outputs of the function, by default None.
input_type
:Union[np.array, pd.Series]
, optional- The input type that the function requires (either np.array or pd.Series), by
default np.array.
Note
Make sure to only set this argument to pd.Series if the function requires a pd.Series, since pd.Series strided-rolling is significantly less efficient. For a np.array it is possible to create very efficient views, but there is no such thing as a pd.Series view. Thus, for each stroll, a new series is created. vectorized
:bool
, optional-
Flag indicating whether
func
should be executed vectorized over all the segmented windows, by default False.Info
A vectorized function should take one or multiple series that each have the shape (nb. segmented windows, window size). For example a vectorized version ofnp.max
isFuncWrapper(np.max, vectorized=True, axis=1)
.Note
- A function can only be applied in vectorized manner when the required series are REGULARLY sampled (and have the same index in case of multiple required series).
- The
input_type
should benp.ndarray
whenvectorized
is True. It does not make sense to use apd.Series
, as the index should be regularly sampled (see requirement above).
**kwargs
:dict
, optional- Keyword arguments which will be also passed to the
function
Raises
TypeError
- Raised when the
output_names
cannot be set.
Create FuncWrapper instance.
Ancestors
- tsflex.utils.classes.FrozenClass
Methods
def __call__(self, *series)
-
Expand source code
def __call__(self, *series: Union[np.ndarray, pd.Series]) -> Any: """Call wrapped function with passed data. Parameters --------- *series : Union[np.ndarray, pd.Series] The (multiple) input series for the function. Returns ------- Any The function output for the passed series. """ return self.func(*series, **self.kwargs)
Call wrapped function with passed data.
Parameters
*series
:Union[np.ndarray, pd.Series]
- The (multiple) input series for the function.
Returns
Any
- The function output for the passed series.