Module tsflex.features.function_wrapper

FuncWrapper class for object-oriented representation of a function.

Expand source code
"""FuncWrapper class for object-oriented representation of a function."""

__author__ = "Jonas Van Der Donckt, Jeroen Van Der Donckt, Emiel Deprost"

import functools
from typing import Any, Callable, List, Optional, Union

import numpy as np
import pandas as pd

from .. import __pdoc__
from ..utils.classes import FrozenClass
from ..utils.data import SUPPORTED_STROLL_TYPES

__pdoc__["FuncWrapper.__call__"] = True


def _get_name(func: Callable) -> str:
    """Get the name of the function.

    Parameters
    ----------
    func: Callable
        The function whose name has to be returned, should be either a function or an
        object that is callable.

    Returns
    -------
    str
        The name of ``func`` in case of a function, or
        - the name of the wrapped function in case of functools.partial.
        - the name of the class in case of a callable object.

    """
    assert callable(func), f"The given argument {func} is not callable!"
    try:
        return func.__name__
    except AttributeError:
        if isinstance(func, functools.partial):
            return func.func.__name__
        return type(func).__name__


class FuncWrapper(FrozenClass):
    """Function wrapper.

    A function wrapper which takes a numpy array / pandas series as input and returns
    one or multiple values. It also defines the names of the function outputs, and
    stores the function its keyword arguments.

    Parameters
    ----------
    func : Callable
        The wrapped function.
    output_names : Union[List[str], str], optional
        The name of the outputs of the function, by default None.
    input_type: Union[np.array, pd.Series], optional
        The input type that the function requires (either np.array or pd.Series), by
        default np.array.
        .. Note::
            Make sure to only set this argument to pd.Series if the function requires
            a pd.Series, since pd.Series strided-rolling is significantly less efficient.
            For a np.array it is possible to create very efficient views, but there is no
            such thing as a pd.Series view. Thus, for each stroll, a new series is created.
    vectorized: bool, optional
        Flag indicating whether `func` should be executed vectorized over all the
        segmented windows, by default False.
        .. Info::
            A vectorized function should take one or multiple series that each have the
            shape (nb. segmented windows, window size).
            For example a vectorized version of `np.max` is
            ``FuncWrapper(np.max, vectorized=True, axis=1)``.
        .. Note::
            * A function can only be applied in vectorized manner when the required
              series are REGULARLY sampled (and have the same index in case of multiple
              required series).
            * The `input_type` should be `np.ndarray` when `vectorized` is True. It does
              not make sense to use a `pd.Series`, as the index should be regularly
              sampled (see requirement above).
    **kwargs: dict, optional
        Keyword arguments which will be also passed to the `function`

    Raises
    ------
    TypeError
        Raised when the `output_names` cannot be set.

    """

    def __init__(  # type: ignore[no-untyped-def]
        self,
        func: Callable,
        output_names: Optional[Union[List[str], str]] = None,
        input_type: Union[np.ndarray, pd.Series] = np.ndarray,
        vectorized: bool = False,
        **kwargs,
    ):
        """Create FuncWrapper instance."""
        self.func = func
        self.kwargs: dict = kwargs

        if isinstance(output_names, list):
            self.output_names = output_names
        elif isinstance(output_names, str):
            self.output_names = [output_names]
        elif not output_names:
            self.output_names = [_get_name(func)]
        else:
            raise TypeError(f"`output_names` is unexpected type {type(output_names)}")

        # for backwards compatibility
        input_type = np.ndarray if input_type is np.array else input_type
        assert input_type in SUPPORTED_STROLL_TYPES, "Invalid input_type!"
        assert not (
            vectorized & (input_type is not np.ndarray)
        ), "The input_type must be np.ndarray if vectorized is True!"
        self.input_type = input_type
        self.vectorized = vectorized

        self._freeze()

    def __repr__(self) -> str:
        """Return repr string."""
        return (
            f"{self.__class__.__name__}({_get_name(self.func)}, {self.output_names},"
            f" {self.kwargs})"
        )

    def __call__(self, *series: Union[np.ndarray, pd.Series]) -> Any:
        """Call wrapped function with passed data.

        Parameters
        ---------
        *series : Union[np.ndarray, pd.Series]
            The (multiple) input series for the function.

        Returns
        -------
        Any
            The function output for the passed series.

        """
        return self.func(*series, **self.kwargs)

Classes

class FuncWrapper (func, output_names=None, input_type=numpy.ndarray, vectorized=False, **kwargs)
Expand source code
class FuncWrapper(FrozenClass):
    """Function wrapper.

    A function wrapper which takes a numpy array / pandas series as input and returns
    one or multiple values. It also defines the names of the function outputs, and
    stores the function its keyword arguments.

    Parameters
    ----------
    func : Callable
        The wrapped function.
    output_names : Union[List[str], str], optional
        The name of the outputs of the function, by default None.
    input_type: Union[np.array, pd.Series], optional
        The input type that the function requires (either np.array or pd.Series), by
        default np.array.
        .. Note::
            Make sure to only set this argument to pd.Series if the function requires
            a pd.Series, since pd.Series strided-rolling is significantly less efficient.
            For a np.array it is possible to create very efficient views, but there is no
            such thing as a pd.Series view. Thus, for each stroll, a new series is created.
    vectorized: bool, optional
        Flag indicating whether `func` should be executed vectorized over all the
        segmented windows, by default False.
        .. Info::
            A vectorized function should take one or multiple series that each have the
            shape (nb. segmented windows, window size).
            For example a vectorized version of `np.max` is
            ``FuncWrapper(np.max, vectorized=True, axis=1)``.
        .. Note::
            * A function can only be applied in vectorized manner when the required
              series are REGULARLY sampled (and have the same index in case of multiple
              required series).
            * The `input_type` should be `np.ndarray` when `vectorized` is True. It does
              not make sense to use a `pd.Series`, as the index should be regularly
              sampled (see requirement above).
    **kwargs: dict, optional
        Keyword arguments which will be also passed to the `function`

    Raises
    ------
    TypeError
        Raised when the `output_names` cannot be set.

    """

    def __init__(  # type: ignore[no-untyped-def]
        self,
        func: Callable,
        output_names: Optional[Union[List[str], str]] = None,
        input_type: Union[np.ndarray, pd.Series] = np.ndarray,
        vectorized: bool = False,
        **kwargs,
    ):
        """Create FuncWrapper instance."""
        self.func = func
        self.kwargs: dict = kwargs

        if isinstance(output_names, list):
            self.output_names = output_names
        elif isinstance(output_names, str):
            self.output_names = [output_names]
        elif not output_names:
            self.output_names = [_get_name(func)]
        else:
            raise TypeError(f"`output_names` is unexpected type {type(output_names)}")

        # for backwards compatibility
        input_type = np.ndarray if input_type is np.array else input_type
        assert input_type in SUPPORTED_STROLL_TYPES, "Invalid input_type!"
        assert not (
            vectorized & (input_type is not np.ndarray)
        ), "The input_type must be np.ndarray if vectorized is True!"
        self.input_type = input_type
        self.vectorized = vectorized

        self._freeze()

    def __repr__(self) -> str:
        """Return repr string."""
        return (
            f"{self.__class__.__name__}({_get_name(self.func)}, {self.output_names},"
            f" {self.kwargs})"
        )

    def __call__(self, *series: Union[np.ndarray, pd.Series]) -> Any:
        """Call wrapped function with passed data.

        Parameters
        ---------
        *series : Union[np.ndarray, pd.Series]
            The (multiple) input series for the function.

        Returns
        -------
        Any
            The function output for the passed series.

        """
        return self.func(*series, **self.kwargs)

Function wrapper.

A function wrapper which takes a numpy array / pandas series as input and returns one or multiple values. It also defines the names of the function outputs, and stores the function its keyword arguments.

Parameters

func : Callable
The wrapped function.
output_names : Union[List[str], str], optional
The name of the outputs of the function, by default None.
input_type : Union[np.array, pd.Series], optional
The input type that the function requires (either np.array or pd.Series), by default np.array.

Note

Make sure to only set this argument to pd.Series if the function requires a pd.Series, since pd.Series strided-rolling is significantly less efficient. For a np.array it is possible to create very efficient views, but there is no such thing as a pd.Series view. Thus, for each stroll, a new series is created.
vectorized : bool, optional

Flag indicating whether func should be executed vectorized over all the segmented windows, by default False.

Info

A vectorized function should take one or multiple series that each have the shape (nb. segmented windows, window size). For example a vectorized version of np.max is FuncWrapper(np.max, vectorized=True, axis=1).

Note

  • A function can only be applied in vectorized manner when the required series are REGULARLY sampled (and have the same index in case of multiple required series).
  • The input_type should be np.ndarray when vectorized is True. It does not make sense to use a pd.Series, as the index should be regularly sampled (see requirement above).
**kwargs : dict, optional
Keyword arguments which will be also passed to the function

Raises

TypeError
Raised when the output_names cannot be set.

Create FuncWrapper instance.

Ancestors

  • tsflex.utils.classes.FrozenClass

Methods

def __call__(self, *series)
Expand source code
def __call__(self, *series: Union[np.ndarray, pd.Series]) -> Any:
    """Call wrapped function with passed data.

    Parameters
    ---------
    *series : Union[np.ndarray, pd.Series]
        The (multiple) input series for the function.

    Returns
    -------
    Any
        The function output for the passed series.

    """
    return self.func(*series, **self.kwargs)

Call wrapped function with passed data.

Parameters

*series : Union[np.ndarray, pd.Series]
The (multiple) input series for the function.

Returns

Any
The function output for the passed series.