Bases: AbstractGapHandler
Gap handling based on the median diff of the x_agg array.
Source code in plotly_resampler/aggregation/gap_handlers.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85 | class MedDiffGapHandler(AbstractGapHandler):
"""Gap handling based on the median diff of the x_agg array."""
def _calc_med_diff(self, x_agg: np.ndarray) -> Tuple[float, np.ndarray]:
"""Calculate the median diff of the x_agg array.
As median is more robust to outliers than the mean, the median is used to define
the gap threshold.
This method performs a divide and conquer heuristic to calculate the median;
1. divide the array into `n_blocks` blocks (with `n_blocks` = 128)
2. calculate the mean of each block
3. calculate the median of the means
=> This proves to be a good approximation of the median of the full array, while
being much faster than calculating the median of the full array.
"""
# remark: thanks to the prepend -> x_diff.shape === len(s)
x_diff = np.diff(x_agg, prepend=x_agg[0])
# To do so - use an approach where we reshape the data
# into `n_blocks` blocks and calculate the mean and then the median on that
# Why use `median` instead of a global mean?
# => when you have large gaps, they will be represented by a large diff
# which will skew the mean way more than the median!
n_blocks = 128
if x_agg.shape[0] > 5 * n_blocks:
blck_size = x_diff.shape[0] // n_blocks
# convert the index series index diff into a reshaped view (i.e., sid_v)
sid_v: np.ndarray = x_diff[: blck_size * n_blocks].reshape(n_blocks, -1)
# calculate the mean fore each block and then the median of those means
med_diff = np.median(np.mean(sid_v, axis=1))
else:
med_diff = np.median(x_diff)
return med_diff, x_diff
def _get_gap_mask(self, x_agg: np.ndarray) -> Optional[np.ndarray]:
"""Get a boolean mask indicating the indices where there are gaps.
If you require custom gap handling, you can implement this method to return a
boolean mask indicating the indices where there are gaps.
Parameters
----------
x_agg: np.ndarray
The x array. This is used to determine the gaps.
Returns
-------
Optional[np.ndarray]
A boolean mask indicating the indices where there are gaps. If there are no
gaps, None is returned.
"""
med_diff, x_diff = self._calc_med_diff(x_agg)
# TODO: this 4 was revealed to me in a dream, but it seems to work well
# After some consideration, we altered this to a 4.1
gap_mask = x_diff > 4.1 * med_diff
if not any(gap_mask):
return
return gap_mask
|