Skip to content

aggregation_interface

AbstractAggregator interface-class, subclassed by concrete aggregators.

AbstractAggregator

Bases: ABC

Source code in plotly_resampler/aggregation/aggregation_interface.py
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
class AbstractAggregator(ABC):
    def __init__(
        self,
        x_dtype_regex_list: Optional[List[str]] = None,
        y_dtype_regex_list: Optional[List[str]] = None,
        **downsample_kwargs,
    ):
        """Constructor of AbstractSeriesAggregator.

        Parameters
        ----------
        x_dtype_regex_list: List[str], optional
            List containing the regex matching the supported datatypes for the x array,
            by default None.
        y_dtype_regex_list: List[str], optional
            List containing the regex matching the supported datatypes for the y array,
            by default None.
        downsample_kwargs: dict
            Additional kwargs passed to the downsample method.

        """
        self.x_dtype_regex_list = x_dtype_regex_list
        self.y_dtype_regex_list = y_dtype_regex_list
        self.downsample_kwargs = downsample_kwargs

    @staticmethod
    def _check_n_out(n_out: int) -> None:
        """Check if the n_out is valid."""
        assert isinstance(n_out, (int, np.integer))
        assert n_out > 0

    @staticmethod
    def _process_args(*args) -> Tuple[np.ndarray | None, np.ndarray]:
        """Process the args into the x and y arrays.

        If only y is passed, x is set to None.
        """
        assert len(args) in [1, 2], "Must pass either 1 or 2 arrays"
        x, y = (None, args[0]) if len(args) == 1 else args
        return x, y

    @staticmethod
    def _check_arr(arr: np.ndarray, regex_list: Optional[List[str]] = None):
        """Check if the array is valid."""
        assert isinstance(arr, np.ndarray), f"Expected np.ndarray, got {type(arr)}"
        assert arr.ndim == 1
        AbstractAggregator._supports_dtype(arr, regex_list)

    def _check_x_y(self, x: np.ndarray | None, y: np.ndarray) -> None:
        """Check if the x and y arrays are valid."""
        # Check x (if not None)
        if x is not None:
            self._check_arr(x, self.x_dtype_regex_list)
            assert x.shape == y.shape, "x and y must have the same shape"
        # Check y
        self._check_arr(y, self.y_dtype_regex_list)

    @staticmethod
    def _supports_dtype(arr: np.ndarray, dtype_regex_list: Optional[List[str]] = None):
        # base case
        if dtype_regex_list is None:
            return

        for dtype_regex_str in dtype_regex_list:
            m = re.compile(dtype_regex_str).match(str(arr.dtype))
            if m is not None:  # a match is found
                return
        raise ValueError(
            f"{arr.dtype} doesn't match with any regex in {dtype_regex_list}"
        )

__init__(x_dtype_regex_list=None, y_dtype_regex_list=None, **downsample_kwargs)

Constructor of AbstractSeriesAggregator.

Parameters:

Name Type Description Default
x_dtype_regex_list Optional[List[str]]

List containing the regex matching the supported datatypes for the x array, by default None.

None
y_dtype_regex_list Optional[List[str]]

List containing the regex matching the supported datatypes for the y array, by default None.

None
downsample_kwargs

Additional kwargs passed to the downsample method.

{}
Source code in plotly_resampler/aggregation/aggregation_interface.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
def __init__(
    self,
    x_dtype_regex_list: Optional[List[str]] = None,
    y_dtype_regex_list: Optional[List[str]] = None,
    **downsample_kwargs,
):
    """Constructor of AbstractSeriesAggregator.

    Parameters
    ----------
    x_dtype_regex_list: List[str], optional
        List containing the regex matching the supported datatypes for the x array,
        by default None.
    y_dtype_regex_list: List[str], optional
        List containing the regex matching the supported datatypes for the y array,
        by default None.
    downsample_kwargs: dict
        Additional kwargs passed to the downsample method.

    """
    self.x_dtype_regex_list = x_dtype_regex_list
    self.y_dtype_regex_list = y_dtype_regex_list
    self.downsample_kwargs = downsample_kwargs

DataAggregator

Bases: AbstractAggregator, ABC

Implementation of the AbstractAggregator interface for data aggregation.

DataAggregator differs from DataPointSelector in that it doesn’t select data points, but rather aggregates the data (e.g., mean). As such, the _aggregate method is responsible for aggregating the data, and thus returns a tuple of the aggregated x and y values.

Concrete implementations of this class must implement the _aggregate method, and have full responsibility on how they deal with other high-frequency properties, such as hovertext, marker_size, ‘marker_color`, etc …

Source code in plotly_resampler/aggregation/aggregation_interface.py
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
class DataAggregator(AbstractAggregator, ABC):
    """Implementation of the AbstractAggregator interface for data aggregation.

    DataAggregator differs from DataPointSelector in that it doesn't select data points,
    but rather aggregates the data (e.g., mean).
    As such, the `_aggregate` method is responsible for aggregating the data, and thus
    returns a tuple of the aggregated x and y values.

    Concrete implementations of this class must implement the `_aggregate` method, and
    have full responsibility on how they deal with other high-frequency properties, such
    as `hovertext`, `marker_size`, 'marker_color`, etc ...
    """

    @abstractmethod
    def _aggregate(
        self,
        x: np.ndarray | None,
        y: np.ndarray,
        n_out: int,
    ) -> Tuple[np.ndarray, np.ndarray]:
        raise NotImplementedError

    def aggregate(
        self,
        *args,
        n_out: int,
    ) -> Tuple[np.ndarray, np.ndarray]:
        """Aggregate the data.

        Parameters
        ----------
        x, y: np.ndarray
            The x and y data of the to-be-aggregated series.
            The x array is optional (i.e., if only 1 array is passed, it is assumed to
            be the y array).
            The array(s) must be 1-dimensional, and have the same length (if x is
            passed).
            These cannot be passed as keyword arguments, as they are positional-only.
        n_out: int
            The number of samples which the downsampled series should contain.
            This should be passed as a keyword argument.

        Returns
        -------
        Tuple[np.ndarray, np.ndarray]
            The aggregated x and y data, respectively.

        """
        # Check n_out
        assert n_out is not None

        # Get x and y
        x, y = DataPointSelector._process_args(*args)

        # Check x and y
        self._check_x_y(x, y)

        return self._aggregate(x=x, y=y, n_out=n_out)

aggregate(*args, n_out)

Aggregate the data.

Parameters:

Name Type Description Default
x

The x and y data of the to-be-aggregated series. The x array is optional (i.e., if only 1 array is passed, it is assumed to be the y array). The array(s) must be 1-dimensional, and have the same length (if x is passed). These cannot be passed as keyword arguments, as they are positional-only.

required
y

The x and y data of the to-be-aggregated series. The x array is optional (i.e., if only 1 array is passed, it is assumed to be the y array). The array(s) must be 1-dimensional, and have the same length (if x is passed). These cannot be passed as keyword arguments, as they are positional-only.

required
n_out int

The number of samples which the downsampled series should contain. This should be passed as a keyword argument.

required

Returns:

Type Description
Tuple[ndarray, ndarray]

The aggregated x and y data, respectively.

Source code in plotly_resampler/aggregation/aggregation_interface.py
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
def aggregate(
    self,
    *args,
    n_out: int,
) -> Tuple[np.ndarray, np.ndarray]:
    """Aggregate the data.

    Parameters
    ----------
    x, y: np.ndarray
        The x and y data of the to-be-aggregated series.
        The x array is optional (i.e., if only 1 array is passed, it is assumed to
        be the y array).
        The array(s) must be 1-dimensional, and have the same length (if x is
        passed).
        These cannot be passed as keyword arguments, as they are positional-only.
    n_out: int
        The number of samples which the downsampled series should contain.
        This should be passed as a keyword argument.

    Returns
    -------
    Tuple[np.ndarray, np.ndarray]
        The aggregated x and y data, respectively.

    """
    # Check n_out
    assert n_out is not None

    # Get x and y
    x, y = DataPointSelector._process_args(*args)

    # Check x and y
    self._check_x_y(x, y)

    return self._aggregate(x=x, y=y, n_out=n_out)

DataPointSelector

Bases: AbstractAggregator, ABC

Implementation of the AbstractAggregator interface for data point selection.

DataPointSelector differs from DataAggregator in that they don’t aggregate the data (e.g., mean) but instead select data points (e.g., first, last, min, max, etc …). As such, the _arg_downsample method returns the index positions of the selected data points.

This class utilizes the arg_downsample method to compute the index positions.

Source code in plotly_resampler/aggregation/aggregation_interface.py
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
class DataPointSelector(AbstractAggregator, ABC):
    """Implementation of the AbstractAggregator interface for data point selection.

    DataPointSelector differs from DataAggregator in that they don't aggregate the data
    (e.g., mean) but instead select data points (e.g., first, last, min, max, etc ...).
    As such, the `_arg_downsample` method returns the index positions of the selected
    data points.

    This class utilizes the `arg_downsample` method to compute the index positions.
    """

    @abstractmethod
    def _arg_downsample(
        self,
        x: np.ndarray | None,
        y: np.ndarray,
        n_out: int,
    ) -> np.ndarray:
        # Note: this method can utilize the self.downsample_kwargs property
        raise NotImplementedError

    def arg_downsample(
        self,
        *args,
        n_out: int,
    ) -> np.ndarray:
        """Compute the index positions for the downsampled representation.

        Parameters
        ----------
        x, y: np.ndarray
            The x and y data of the to-be-aggregated series.
            The x array is optional (i.e., if only 1 array is passed, it is assumed to
            be the y array).
            The array(s) must be 1-dimensional, and have the same length (if x is
            passed).
            These cannot be passed as keyword arguments, as they are positional-only.
        n_out: int
            The number of samples which the downsampled series should contain.
            This should be passed as a keyword argument.

        Returns
        -------
        np.ndarray
            The index positions of the selected data points.

        """
        # Check n_out
        DataPointSelector._check_n_out(n_out)

        # Get x and y
        x, y = DataPointSelector._process_args(*args)

        # Check x and y
        self._check_x_y(x, y)

        if len(y) <= n_out:
            # Fewer samples than n_out -> return all indices
            return np.arange(len(y))

        # More samples that n_out -> perform data aggregation
        return self._arg_downsample(x=x, y=y, n_out=n_out)

arg_downsample(*args, n_out)

Compute the index positions for the downsampled representation.

Parameters:

Name Type Description Default
x

The x and y data of the to-be-aggregated series. The x array is optional (i.e., if only 1 array is passed, it is assumed to be the y array). The array(s) must be 1-dimensional, and have the same length (if x is passed). These cannot be passed as keyword arguments, as they are positional-only.

required
y

The x and y data of the to-be-aggregated series. The x array is optional (i.e., if only 1 array is passed, it is assumed to be the y array). The array(s) must be 1-dimensional, and have the same length (if x is passed). These cannot be passed as keyword arguments, as they are positional-only.

required
n_out int

The number of samples which the downsampled series should contain. This should be passed as a keyword argument.

required

Returns:

Type Description
ndarray

The index positions of the selected data points.

Source code in plotly_resampler/aggregation/aggregation_interface.py
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
def arg_downsample(
    self,
    *args,
    n_out: int,
) -> np.ndarray:
    """Compute the index positions for the downsampled representation.

    Parameters
    ----------
    x, y: np.ndarray
        The x and y data of the to-be-aggregated series.
        The x array is optional (i.e., if only 1 array is passed, it is assumed to
        be the y array).
        The array(s) must be 1-dimensional, and have the same length (if x is
        passed).
        These cannot be passed as keyword arguments, as they are positional-only.
    n_out: int
        The number of samples which the downsampled series should contain.
        This should be passed as a keyword argument.

    Returns
    -------
    np.ndarray
        The index positions of the selected data points.

    """
    # Check n_out
    DataPointSelector._check_n_out(n_out)

    # Get x and y
    x, y = DataPointSelector._process_args(*args)

    # Check x and y
    self._check_x_y(x, y)

    if len(y) <= n_out:
        # Fewer samples than n_out -> return all indices
        return np.arange(len(y))

    # More samples that n_out -> perform data aggregation
    return self._arg_downsample(x=x, y=y, n_out=n_out)