Module tsflex.processing.logger
Contains the used variables and functions to provide logging functionality.
See Also
SeriesPipeline
- its
logging_file_path
of theprocess
method.
Expand source code
"""Contains the used variables and functions to provide logging functionality.
See Also
--------
SeriesPipeline : its `logging_file_path` of the `process` method.
"""
__author__ = "Jeroen Van Der Donckt, Jonas Van Der Donckt"
import logging
import re
import pandas as pd
from ..utils.logging import logging_file_to_df, remove_inner_brackets
# Package specific logger
logger = logging.getLogger("feature_processing_logger")
logger.setLevel(logging.DEBUG)
# Create logger which writes WARNING messages or higher to sys.stderr
console = logging.StreamHandler()
console.setLevel(logging.WARNING)
logger.addHandler(console)
def _parse_message(message: str) -> list:
"""Parse the message of the logged info."""
regex = r"\[(.*?)\]"
matches = re.findall(regex, remove_inner_brackets(message))
assert len(matches) == 4
func = matches[0]
series_names = matches[1].replace("'", "")
output_names = matches[2].replace("'", "")
duration_s = float(matches[3].rstrip(" seconds"))
return [func, series_names, output_names, duration_s]
def _parse_logging_execution_to_df(logging_file_path: str) -> pd.DataFrame:
"""Parse the logged messages into a DataFrame that contains execution info.
Parameters
----------
logging_file_path: str
The file path where the logged messages are stored. This is the file path that
is passed to the ``SeriesPipeline`` its ``process`` method.
Returns
-------
pd.DataFrame
A DataFrame with the processor its method, series names, output names, and
(%) calculation duration.
Note
----
This function only works when the ``logging_file_path`` used in a ``SeriesPipeline``
its ``process`` method is passed.
"""
df = logging_file_to_df(logging_file_path)
df[["function", "series_names", "output_names", "duration"]] = pd.DataFrame(
list(df["message"].apply(_parse_message)),
index=df.index,
)
df["duration %"] = (100 * (df["duration"] / df["duration"].sum())).round(2)
return df.drop(columns=["name", "log_level", "message"])
def get_processor_logs(logging_file_path: str) -> pd.DataFrame:
"""Get execution (time) info for each processor of a ``SeriesPipeline``.
Parameters
----------
logging_file_path: str
The file path where the logged messages are stored. This is the file path that
is passed to the ``SeriesPipeline`` its ``process`` method.
Returns
-------
pd.DataFrame
A DataFrame containing each processor its series names, output names, and
(%) duration.
"""
df = _parse_logging_execution_to_df(logging_file_path)
df["duration"] = pd.to_timedelta(df["duration"], unit="s")
return df
Functions
def get_processor_logs(logging_file_path)
-
Expand source code
def get_processor_logs(logging_file_path: str) -> pd.DataFrame: """Get execution (time) info for each processor of a ``SeriesPipeline``. Parameters ---------- logging_file_path: str The file path where the logged messages are stored. This is the file path that is passed to the ``SeriesPipeline`` its ``process`` method. Returns ------- pd.DataFrame A DataFrame containing each processor its series names, output names, and (%) duration. """ df = _parse_logging_execution_to_df(logging_file_path) df["duration"] = pd.to_timedelta(df["duration"], unit="s") return df
Get execution (time) info for each processor of a
SeriesPipeline
.Parameters
logging_file_path
:str
- The file path where the logged messages are stored. This is the file path that
is passed to the
SeriesPipeline
itsprocess
method.
Returns
pd.DataFrame
- A DataFrame containing each processor its series names, output names, and (%) duration.