From 5ad84a3c6222aa8909f56b811b7fc1c0e9eccfc8 Mon Sep 17 00:00:00 2001 From: Isis Garayalde Vandendriessche Date: Tue, 6 Dec 2022 15:13:12 +0100 Subject: [PATCH 01/15] first iteration update only visible --- examples/dash_apps/01_minimal_global.py | 25 ++- plotly_resampler/aggregation/aggregators.py | 115 ++++++++--- .../figure_resampler/figure_resampler.py | 65 ++++--- .../figure_resampler_interface.py | 180 +++++++++++------- 4 files changed, 252 insertions(+), 133 deletions(-) diff --git a/examples/dash_apps/01_minimal_global.py b/examples/dash_apps/01_minimal_global.py index 6ab2173e..97715563 100644 --- a/examples/dash_apps/01_minimal_global.py +++ b/examples/dash_apps/01_minimal_global.py @@ -16,18 +16,20 @@ import numpy as np import plotly.graph_objects as go from dash import Input, Output, dcc, html, Dash, no_update, callback_context +from graph_reporter import GraphReporter from plotly_resampler import FigureResampler from trace_updater import TraceUpdater # Data that will be used for the plotly-resampler figures -x = np.arange(2_000_000) +n = 500_000 +x = np.arange(n) noisy_sin = (3 + np.sin(x / 200) + np.random.randn(len(x)) / 10) * x / 1_000 - +flat = np.ones(n) # --------------------------------------Globals --------------------------------------- app = Dash(__name__) -fig: FigureResampler = FigureResampler() +fig: FigureResampler = FigureResampler(verbose=True) # NOTE: in this example, this reference to a FigureResampler is essential to preserve # throughout the whole dash app! If your dash app wants to create a new go.Figure(), # you should not construct a new FigureResampler object, but replace the figure of this @@ -40,8 +42,11 @@ html.Hr(), # The graph and it's needed components to update efficiently + dcc.Graph(id="graph-id"), TraceUpdater(id="trace-updater", gdID="graph-id"), + GraphReporter(id="graph-reporter", gId="graph-id"), + # html.Div(id='print') ] ) @@ -59,13 +64,27 @@ def plot_graph(n_clicks): # Note how the replace method is used here on the global figure object global fig fig.replace(go.Figure()) + fig._print_verbose = True fig.add_trace(go.Scattergl(name="log"), hf_x=x, hf_y=noisy_sin * .9999995 ** x) fig.add_trace(go.Scattergl(name="exp"), hf_x=x, hf_y=noisy_sin * 1.000002 ** x) + fig.add_trace(go.Scattergl(name="const"), hf_x=x, hf_y=flat) + fig.add_trace(go.Scattergl(name="poly"), hf_x=x, hf_y=noisy_sin * 1.000002 ** 2) + fig.update_layout(showlegend=True) return fig else: return no_update +# @app.callback( +# Output("print", "children"), +# Input("graph-id", "restyleData"), +# prevent_initial_call=True, +# ) +# def get_restyle_data(restyle_data): +# print(restyle_data) +# return "" +# + # Register the graph update callbacks to the layout fig.register_update_graph_callback( app=app, graph_id="graph-id", trace_updater_id="trace-updater" diff --git a/plotly_resampler/aggregation/aggregators.py b/plotly_resampler/aggregation/aggregators.py index 9357ef57..34243d73 100644 --- a/plotly_resampler/aggregation/aggregators.py +++ b/plotly_resampler/aggregation/aggregators.py @@ -15,12 +15,14 @@ import pandas as pd from ..aggregation.aggregation_interface import AbstractSeriesAggregator +# from plotly_resampler.aggregation import AbstractSeriesAggregator try: # The efficient c version of the LTTB algorithm from .algorithms.lttb_c import LTTB_core_c as LTTB_core except (ImportError, ModuleNotFoundError): import warnings + warnings.warn("Could not import lttbc; will use a (slower) python alternative.") from .algorithms.lttb_py import LTTB_core_py as LTTB_core @@ -76,7 +78,7 @@ def __init__(self, interleave_gaps: bool = True, nan_position="end"): interleave_gaps, nan_position, dtype_regex_list=[rf"{dtype}\d*" for dtype in ["float", "int", "uint"]] - + ["category", "bool"], + + ["category", "bool"], ) def _aggregate(self, s: pd.Series, n_out: int) -> pd.Series: @@ -151,17 +153,17 @@ def _aggregate(self, s: pd.Series, n_out: int) -> pd.Series: # Calculate the argmin & argmax on the reshaped view of `s` & # add the corresponding offset argmin = ( - s.values[: block_size * offset.shape[0]] - .reshape(-1, block_size) - .argmin(axis=1) - + offset + s.values[: block_size * offset.shape[0]] + .reshape(-1, block_size) + .argmin(axis=1) + + offset ) argmax = ( - s.values[argmax_offset : block_size * offset.shape[0] + argmax_offset] - .reshape(-1, block_size) - .argmax(axis=1) - + offset - + argmax_offset + s.values[argmax_offset: block_size * offset.shape[0] + argmax_offset] + .reshape(-1, block_size) + .argmax(axis=1) + + offset + + argmax_offset ) # Sort the argmin & argmax (where we append the first and last index item) # and then slice the original series on these indexes. @@ -216,16 +218,16 @@ def _aggregate(self, s: pd.Series, n_out: int) -> pd.Series: # Calculate the argmin & argmax on the reshaped view of `s` & # add the corresponding offset argmin = ( - s.values[: block_size * offset.shape[0]] - .reshape(-1, block_size) - .argmin(axis=1) - + offset + s.values[: block_size * offset.shape[0]] + .reshape(-1, block_size) + .argmin(axis=1) + + offset ) argmax = ( - s.values[: block_size * offset.shape[0]] - .reshape(-1, block_size) - .argmax(axis=1) - + offset + s.values[: block_size * offset.shape[0]] + .reshape(-1, block_size) + .argmax(axis=1) + + offset ) # Note: the implementation below flips the array to search from @@ -278,7 +280,7 @@ def __init__(self, interleave_gaps: bool = True, nan_position="end"): interleave_gaps, nan_position, dtype_regex_list=[rf"{dtype}\d*" for dtype in ["float", "int", "uint"]] - + ["category", "bool"], + + ["category", "bool"], ) def _aggregate(self, s: pd.Series, n_out: int) -> pd.Series: @@ -336,11 +338,11 @@ class FuncAggregator(AbstractSeriesAggregator): """ def __init__( - self, - aggregation_func, - interleave_gaps: bool = True, - nan_position="end", - dtype_regex_list=None, + self, + aggregation_func, + interleave_gaps: bool = True, + nan_position="end", + dtype_regex_list=None, ): """ Parameters @@ -383,8 +385,8 @@ def _aggregate(self, s: pd.Series, n_out: int) -> pd.Series: # where each value is repeated based $len(s)/n_out$ times by=np.repeat(np.arange(n_out), group_size)[: len(s)] ) - .agg(self.aggregation_func) - .dropna() + .agg(self.aggregation_func) + .dropna() ) # Create an index-estimation for real-time data # Add one to the index so it's pointed at the end of the window @@ -399,3 +401,64 @@ def _aggregate(self, s: pd.Series, n_out: int) -> pd.Series: name=str(s.name), copy=False, ) + + +class M4Aggregator(AbstractSeriesAggregator): + """Aggregation method which performs binned min-max aggregation over fully + overlapping windows. + + .. note:: + This method is rather efficient when scaling to large data sizes and can be used + as a data-reduction step before feeding it to the :class:`LTTB ` + algorithm, as :class:`EfficientLTTB ` does with the + :class:`MinMaxOverlapAggregator `. + + """ + + def __init__(self, interleave_gaps: bool = True, nan_position="end"): + """ + Parameters + ---------- + interleave_gaps: bool, optional + Whether None values should be added when there are gaps / irregularly + sampled data. A quantile-based approach is used to determine the gaps / + irregularly sampled data. By default, True. + nan_position: str, optional + Indicates where nans must be placed when gaps are detected. \n + If ``'end'``, the first point after a gap will be replaced with a + nan-value \n + If ``'begin'``, the last point before a gap will be replaced with a + nan-value \n + If ``'both'``, both the encompassing gap datapoints are replaced with + nan-values \n + .. note:: + This parameter only has an effect when ``interleave_gaps`` is set + to *True*. + dtype_regex_list: List[str], optional + List containing the regex matching the supported datatypes, by default None. + """ + # this downsampler supports all pd.Series dtypes + super().__init__(interleave_gaps, nan_position, dtype_regex_list=None) + + def _aggregate(self, s: pd.Series, n_out: int) -> pd.Series: + assert n_out % 4 == 0, "n_out must be a multiple of 4" + + s_i = s.index.astype(np.int64) if s.dtype.type == np.datetime64 else s.index + + # Thanks to the `linspace` the data is evenly distributed over the index-range + # The searchsorted function returns the index positions + bins = np.searchsorted(s_i, np.linspace(s_i[0], s_i[-1], n_out // 4 + 1)) + + rel_idxs = [] + for lower, upper in zip(bins, bins[1:]): + slice = s.iloc[lower:upper] + if not len(slice): + continue + + # calculate the min(idx), argmin(slice), argmax(slice), max(idx) + rel_idxs.append(slice.index[0]) + rel_idxs.append(slice.idxmin()) + rel_idxs.append(slice.idxmax()) + rel_idxs.append(slice.index[-1]) + + return s.loc[np.unique(rel_idxs)] diff --git a/plotly_resampler/figure_resampler/figure_resampler.py b/plotly_resampler/figure_resampler/figure_resampler.py index d6d0fc76..1ee9fbc7 100644 --- a/plotly_resampler/figure_resampler/figure_resampler.py +++ b/plotly_resampler/figure_resampler/figure_resampler.py @@ -21,6 +21,7 @@ from jupyter_dash import JupyterDash from plotly.basedatatypes import BaseFigure from trace_updater import TraceUpdater +from graph_reporter import GraphReporter from ..aggregation import AbstractSeriesAggregator, EfficientLTTB from .figure_resampler_interface import AbstractFigureAggregator @@ -91,19 +92,19 @@ def _display_inline_output(self, dashboard_url, width, height):
@@ -184,19 +185,19 @@ class FigureResampler(AbstractFigureAggregator, go.Figure): """Data aggregation functionality for ``go.Figures``.""" def __init__( - self, - figure: BaseFigure | dict = None, - convert_existing_traces: bool = True, - default_n_shown_samples: int = 1000, - default_downsampler: AbstractSeriesAggregator = EfficientLTTB(), - resampled_trace_prefix_suffix: Tuple[str, str] = ( - '[R] ', - "", - ), - show_mean_aggregation_size: bool = True, - convert_traces_kwargs: dict | None = None, - verbose: bool = False, - show_dash_kwargs: dict | None = None, + self, + figure: BaseFigure | dict = None, + convert_existing_traces: bool = True, + default_n_shown_samples: int = 1000, + default_downsampler: AbstractSeriesAggregator = EfficientLTTB(), + resampled_trace_prefix_suffix: Tuple[str, str] = ( + '[R] ', + "", + ), + show_mean_aggregation_size: bool = True, + convert_traces_kwargs: dict | None = None, + verbose: bool = False, + show_dash_kwargs: dict | None = None, ): """Initialize a dynamic aggregation data mirror using a dash web app. @@ -264,7 +265,7 @@ def __init__( f._grid_ref = figure._grid_ref f.add_traces(figure.data) elif isinstance(figure, dict) and ( - "data" in figure or "layout" in figure # or "frames" in figure # TODO + "data" in figure or "layout" in figure # or "frames" in figure # TODO ): # A figure as a dict, can be; # - a plotly figure as a dict (after calling `fig.to_dict()`) @@ -324,11 +325,11 @@ def __init__( self._host: str | None = None def show_dash( - self, - mode=None, - config: dict | None = None, - graph_properties: dict | None = None, - **kwargs, + self, + mode=None, + config: dict | None = None, + graph_properties: dict | None = None, + **kwargs, ): """Registers the :func:`update_graph` callback & show the figure in a dash app. @@ -374,7 +375,7 @@ def show_dash( """ available_modes = ["external", "inline", "inline_persistent", "jupyterlab"] assert ( - mode is None or mode in available_modes + mode is None or mode in available_modes ), f"mode must be one of {available_modes}" graph_properties = {} if graph_properties is None else graph_properties assert "config" not in graph_properties.keys() # There is a param for config @@ -395,6 +396,7 @@ def show_dash( TraceUpdater( id="trace-updater", gdID="resample-figure", sequentialUpdate=False ), + GraphReporter(id='graph-reporter', gId='resample-figure'), ] ) self.register_update_graph_callback(app, "resample-figure", "trace-updater") @@ -443,7 +445,7 @@ def stop_server(self, warn: bool = True): ) def register_update_graph_callback( - self, app: dash.Dash, graph_id: str, trace_updater_id: str + self, app: dash.Dash, graph_id: str, trace_updater_id: str ): """Register the :func:`construct_update_data` method as callback function to the passed dash-app. @@ -463,7 +465,10 @@ def register_update_graph_callback( """ app.callback( dash.dependencies.Output(trace_updater_id, "updateData"), + # dash.dependencies.Output(trace_updater_id, "invisibleUpdateData"), dash.dependencies.Input(graph_id, "relayoutData"), + dash.dependencies.State(graph_id, "figure"), + dash.dependencies.State(graph_id, "style"), prevent_initial_call=True, )(self.construct_update_data) diff --git a/plotly_resampler/figure_resampler/figure_resampler_interface.py b/plotly_resampler/figure_resampler/figure_resampler_interface.py index c26cf925..029c752f 100644 --- a/plotly_resampler/figure_resampler/figure_resampler_interface.py +++ b/plotly_resampler/figure_resampler/figure_resampler_interface.py @@ -38,18 +38,19 @@ class AbstractFigureAggregator(BaseFigure, ABC): _high_frequency_traces = ["scatter", "scattergl"] def __init__( - self, - figure: BaseFigure, - convert_existing_traces: bool = True, - default_n_shown_samples: int = 1000, - default_downsampler: AbstractSeriesAggregator = EfficientLTTB(), - resampled_trace_prefix_suffix: Tuple[str, str] = ( - '[R] ', - "", - ), - show_mean_aggregation_size: bool = True, - convert_traces_kwargs: dict | None = None, - verbose: bool = False, + self, + figure: BaseFigure, + convert_existing_traces: bool = True, + default_n_shown_samples: int = 1000, + default_downsampler: AbstractSeriesAggregator = EfficientLTTB(), + resampled_trace_prefix_suffix: Tuple[str, str] = ( + '[R] ', + "", + ), + show_mean_aggregation_size: bool = True, + convert_traces_kwargs: dict | None = None, + verbose: bool = False, + # TODO: add c_width parameter ): """Instantiate a resampling data mirror. @@ -208,10 +209,10 @@ def _get_current_graph(self) -> dict: } def _check_update_trace_data( - self, - trace: dict, - start=None, - end=None, + self, + trace: dict, + start=None, + end=None, ) -> Optional[Union[dict, BaseTraceType]]: """Check and update the passed ``trace`` its data properties based on the slice range. @@ -312,7 +313,7 @@ def _check_update_trace_data( else: # When not resampled: trim prefix and/or suffix if necessary if len(self._prefix) and name.startswith(self._prefix): - name = name[len(self._prefix) :] + name = name[len(self._prefix):] if len(self._suffix) and trace["name"].endswith(self._suffix): name = name[: -len(self._suffix)] trace["name"] = name @@ -342,12 +343,14 @@ def _check_update_trace_data( return None def _check_update_figure_dict( - self, - figure: dict, - start: Optional[Union[float, str]] = None, - stop: Optional[Union[float, str]] = None, - xaxis_filter: str = None, - updated_trace_indices: Optional[List[int]] = None, + self, + figure: dict, + start: Optional[Union[float, str]] = None, + stop: Optional[Union[float, str]] = None, + xaxis_filter: str = None, + updated_trace_indices: Optional[List[int]] = None, + invisible_indices: Optional[List[int]] = None, + aggregate_invisible: bool = True ) -> List[int]: """Check and update the traces within the figure dict. @@ -388,9 +391,13 @@ def _check_update_figure_dict( if updated_trace_indices is None: updated_trace_indices = [] + if invisible_indices is None: + invisible_indices = [] + for idx, trace in enumerate(figure["data"]): # We skip when the trace-idx already has been updated. - if idx in updated_trace_indices: + if idx in updated_trace_indices or (idx not in invisible_indices and not aggregate_invisible): + # print(f'idx {idx} was not resampled') continue if xaxis_filter is not None: @@ -409,8 +416,8 @@ def _check_update_figure_dict( if x_anchor_trace is not None: xaxis_matches = ( figure["layout"] - .get("xaxis" + x_anchor_trace.lstrip("x"), {}) - .get("matches") + .get("xaxis" + x_anchor_trace.lstrip("x"), {}) + .get("matches") ) else: xaxis_matches = figure["layout"].get("xaxis", {}).get("matches") @@ -429,14 +436,14 @@ def _check_update_figure_dict( # * x_axis_filter_short not in [x_anchor or xaxis matches] for # NON first rows if ( - xaxis_filter_short == "x" - and ( - x_anchor_trace not in [None, "x"] - and xaxis_matches != xaxis_filter_short - ) + xaxis_filter_short == "x" + and ( + x_anchor_trace not in [None, "x"] + and xaxis_matches != xaxis_filter_short + ) ) or ( - xaxis_filter_short != "x" - and (xaxis_filter_short not in [x_anchor_trace, xaxis_matches]) + xaxis_filter_short != "x" + and (xaxis_filter_short not in [x_anchor_trace, xaxis_matches]) ): continue @@ -453,7 +460,10 @@ def _get_figure_class(constr: type) -> type: .. Note:: This method will always return a plotly constructor, even when the given - `constr` is decorated (after executing the ``register_plotly_resampler`` + `constr` is decorated (after executing the `` + + + _plotly_resampler`` function). Parameters @@ -473,9 +483,9 @@ def _get_figure_class(constr: type) -> type: @staticmethod def _slice_time( - hf_series: pd.Series, - t_start: Optional[pd.Timestamp] = None, - t_stop: Optional[pd.Timestamp] = None, + hf_series: pd.Series, + t_start: Optional[pd.Timestamp] = None, + t_stop: Optional[pd.Timestamp] = None, ) -> pd.Series: """Slice the time-indexed ``hf_series`` for the passed pd.Timestamps. @@ -502,7 +512,7 @@ def _slice_time( """ def to_same_tz( - ts: Union[pd.Timestamp, None], reference_tz=hf_series.index.tz + ts: Union[pd.Timestamp, None], reference_tz=hf_series.index.tz ) -> Union[pd.Timestamp, None]: """Adjust `ts` its timezone to the `reference_tz`.""" if ts is None: @@ -520,7 +530,7 @@ def to_same_tz( if t_start is not None and t_stop is not None: assert t_start.tz == t_stop.tz - return hf_series[to_same_tz(t_start) : to_same_tz(t_stop)] + return hf_series[to_same_tz(t_start): to_same_tz(t_stop)] @property def hf_data(self): @@ -586,12 +596,12 @@ def _to_hf_series(x: np.ndarray, y: np.ndarray) -> pd.Series: ) def _parse_get_trace_props( - self, - trace: BaseTraceType, - hf_x: Iterable = None, - hf_y: Iterable = None, - hf_text: Iterable = None, - hf_hovertext: Iterable = None, + self, + trace: BaseTraceType, + hf_x: Iterable = None, + hf_y: Iterable = None, + hf_text: Iterable = None, + hf_hovertext: Iterable = None, ) -> _hf_data_container: """Parse and capture the possibly high-frequency trace-props in a datacontainer. @@ -724,12 +734,12 @@ def _parse_get_trace_props( return _hf_data_container(hf_x, hf_y, hf_text, hf_hovertext) def _construct_hf_data_dict( - self, - dc: _hf_data_container, - trace: BaseTraceType, - downsampler: AbstractSeriesAggregator | None, - max_n_samples: int | None, - offset=0, + self, + dc: _hf_data_container, + trace: BaseTraceType, + downsampler: AbstractSeriesAggregator | None, + max_n_samples: int | None, + offset=0, ) -> dict: """Create the `hf_data` dict which will be put in the `_hf_data` property. @@ -795,17 +805,17 @@ def _construct_hf_data_dict( } def add_trace( - self, - trace: Union[BaseTraceType, dict], - max_n_samples: int = None, - downsampler: AbstractSeriesAggregator = None, - limit_to_view: bool = False, - # Use these if you want some speedups (and are working with really large data) - hf_x: Iterable = None, - hf_y: Iterable = None, - hf_text: Union[str, Iterable] = None, - hf_hovertext: Union[str, Iterable] = None, - **trace_kwargs, + self, + trace: Union[BaseTraceType, dict], + max_n_samples: int = None, + downsampler: AbstractSeriesAggregator = None, + limit_to_view: bool = False, + # Use these if you want some speedups (and are working with really large data) + hf_x: Iterable = None, + hf_y: Iterable = None, + hf_text: Union[str, Iterable] = None, + hf_hovertext: Union[str, Iterable] = None, + **trace_kwargs, ): """Add a trace to the figure. @@ -968,14 +978,14 @@ def add_trace( return super(self._figure_class, self).add_trace(trace, **trace_kwargs) def add_traces( - self, - data: List[BaseTraceType | dict] | BaseTraceType | Dict, - max_n_samples: None | List[int] | int = None, - downsamplers: None - | List[AbstractSeriesAggregator] - | AbstractFigureAggregator = None, - limit_to_views: List[bool] | bool = False, - **traces_kwargs, + self, + data: List[BaseTraceType | dict] | BaseTraceType | Dict, + max_n_samples: None | List[int] | int = None, + downsamplers: None + | List[AbstractSeriesAggregator] + | AbstractFigureAggregator = None, + limit_to_views: List[bool] | bool = False, + **traces_kwargs, ): """Add traces to the figure. @@ -1057,11 +1067,11 @@ def add_traces( limit_to_views = [limit_to_views] * len(data) for i, (trace, max_out, downsampler, limit_to_view) in enumerate( - zip(data, max_n_samples, downsamplers, limit_to_views) + zip(data, max_n_samples, downsamplers, limit_to_views) ): if ( - trace.type.lower() not in self._high_frequency_traces - or self._hf_data.get(trace.uid) is not None + trace.type.lower() not in self._high_frequency_traces + or self._hf_data.get(trace.uid) is not None ): continue @@ -1151,7 +1161,7 @@ def replace(self, figure: go.Figure, convert_existing_traces: bool = True): resampled_trace_prefix_suffix=(self._prefix, self._suffix), ) - def construct_update_data(self, relayout_data: dict) -> List[dict]: + def construct_update_data(self, relayout_data: dict, figure: dict, style: dict) -> List[dict]: """Construct the to-be-updated front-end data, based on the layout change. Attention @@ -1167,6 +1177,12 @@ def construct_update_data(self, relayout_data: dict) -> List[dict]: A dict containing the ``relayout``-data (a.k.a. changed layout data) of the corresponding front-end graph. + figure: dict + A dict containing the ``figure``-data (a.k.a. all the data needed to plot traces and style them accordingly) of + the corresponding front-end graph. Used to determine the current visible state of each trace + NOTE: there should be a better way to pass ONLY the visible state of the traces to the back-end + wrap dcc.Graph? => could extract the visible data from the figure before passing it to the callback? + Returns ------- List[dict]: @@ -1178,6 +1194,19 @@ def construct_update_data(self, relayout_data: dict) -> List[dict]: in each dict. """ + invisible_trace_idx = [] + if figure: + + print(figure["data"][0].get('visible')) + for idx, trace in enumerate(figure["data"]): + visible = trace.get("visible", True) + if visible is not True: + invisible_trace_idx.append(idx) + print(invisible_trace_idx) + # import json + # import datetime + # with open(f'figure_{datetime.datetime.now().strftime("%H_%M")}.json', 'w') as f: + # json.dump({"data": figure['data']}, f) current_graph = self._get_current_graph() updated_trace_indices, cl_k = [], [] if relayout_data: @@ -1201,6 +1230,8 @@ def construct_update_data(self, relayout_data: dict) -> List[dict]: stop=relayout_data[t_stop_key], xaxis_filter=xaxis, updated_trace_indices=updated_trace_indices, + invisible_indices=invisible_trace_idx, + aggregate_invisible=False # set True later, False is now for testing ) # 2. The user clicked on either autorange | reset axes @@ -1266,6 +1297,7 @@ def _re_matches(regex: re.Pattern, strings: Iterable[str]) -> List[str]: m = regex.match(item) if m is not None: matches.append(m.string) + # print(f'sorted(matches): {sorted(matches)}') return sorted(matches) ## Magic methods (to use plotly.py words :grin:) From f456ef4b8fe61ffcbc638da7c95ee83fbdf7d669 Mon Sep 17 00:00:00 2001 From: Isis Garayalde Vandendriessche Date: Tue, 6 Dec 2022 15:15:02 +0100 Subject: [PATCH 02/15] include new version of M4 --- plotly_resampler/aggregation/aggregators.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/plotly_resampler/aggregation/aggregators.py b/plotly_resampler/aggregation/aggregators.py index 34243d73..d7f64b7f 100644 --- a/plotly_resampler/aggregation/aggregators.py +++ b/plotly_resampler/aggregation/aggregators.py @@ -443,7 +443,12 @@ def __init__(self, interleave_gaps: bool = True, nan_position="end"): def _aggregate(self, s: pd.Series, n_out: int) -> pd.Series: assert n_out % 4 == 0, "n_out must be a multiple of 4" - s_i = s.index.astype(np.int64) if s.dtype.type == np.datetime64 else s.index + s_i = ( + s.index.astype(np.int64) + if s.index.dtype.type in (np.datetime64, pd.Timestamp) + else s.index + ) + print(s_i) # Thanks to the `linspace` the data is evenly distributed over the index-range # The searchsorted function returns the index positions @@ -461,4 +466,4 @@ def _aggregate(self, s: pd.Series, n_out: int) -> pd.Series: rel_idxs.append(slice.idxmax()) rel_idxs.append(slice.index[-1]) - return s.loc[np.unique(rel_idxs)] + return s.loc[np.unique(rel_idxs)] \ No newline at end of file From e36d9df5ee5f35b2ed5862a8b06bc5f15d206781 Mon Sep 17 00:00:00 2001 From: Isis Garayalde Vandendriessche Date: Tue, 6 Dec 2022 15:19:24 +0100 Subject: [PATCH 03/15] actually working 1st iteration of update visible traces --- plotly_resampler/figure_resampler/figure_resampler_interface.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plotly_resampler/figure_resampler/figure_resampler_interface.py b/plotly_resampler/figure_resampler/figure_resampler_interface.py index 029c752f..8059b6d9 100644 --- a/plotly_resampler/figure_resampler/figure_resampler_interface.py +++ b/plotly_resampler/figure_resampler/figure_resampler_interface.py @@ -396,7 +396,7 @@ def _check_update_figure_dict( for idx, trace in enumerate(figure["data"]): # We skip when the trace-idx already has been updated. - if idx in updated_trace_indices or (idx not in invisible_indices and not aggregate_invisible): + if idx in updated_trace_indices or (idx in invisible_indices and not aggregate_invisible): # print(f'idx {idx} was not resampled') continue From c5077bd646f332840d27fca79779fb4cbccdb67e Mon Sep 17 00:00:00 2001 From: Isis Garayalde Vandendriessche Date: Thu, 8 Dec 2022 22:26:36 +0100 Subject: [PATCH 04/15] unoptimized separation of visible and invisible update --- examples/dash_apps/usage.py | 41 ++++++ .../figure_resampler/figure_resampler.py | 13 +- .../figure_resampler_interface.py | 127 ++++++++++++++++-- 3 files changed, 165 insertions(+), 16 deletions(-) create mode 100644 examples/dash_apps/usage.py diff --git a/examples/dash_apps/usage.py b/examples/dash_apps/usage.py new file mode 100644 index 00000000..0302408b --- /dev/null +++ b/examples/dash_apps/usage.py @@ -0,0 +1,41 @@ +import trace_updater +import dash +import plotly.graph_objs as go +from dash import html, dcc, Input, Output +from plotly_resampler import FigureResampler +from plotly_resampler.aggregation import EveryNthPoint +import numpy as np + +# Construct a high-frequency signal +n = 1_000_000 +x = np.arange(n) +noisy_sin = (3 + np.sin(x / 200) + np.random.randn(len(x)) / 10) * x / (n / 10) + +# Construct the to-be resampled figure +fig = FigureResampler( + go.Figure(), + # show_mean_aggregation_size=False, + default_downsampler=EveryNthPoint(interleave_gaps=False), + default_n_shown_samples=4000, + resampled_trace_prefix_suffix=("", ""), +) +for i in range(100): + fig.add_trace(go.Scattergl(name=f"sine-{i}", showlegend=True), hf_x=x, hf_y=noisy_sin + 10 * i) + + +# Construct app & its layout +app = dash.Dash(__name__) + +app.layout = html.Div( + [ + dcc.Graph(id="graph-id", figure=fig), + trace_updater.TraceUpdater(id="trace-updater", gdID="graph-id"), + ] +) + +# Register the callback +fig.register_update_graph_callback(app, "graph-id", "trace-updater") + + +if __name__ == "__main__": + app.run_server(debug=True, port=8059) \ No newline at end of file diff --git a/plotly_resampler/figure_resampler/figure_resampler.py b/plotly_resampler/figure_resampler/figure_resampler.py index 1ee9fbc7..1976f33d 100644 --- a/plotly_resampler/figure_resampler/figure_resampler.py +++ b/plotly_resampler/figure_resampler/figure_resampler.py @@ -464,14 +464,21 @@ def register_update_graph_callback( """ app.callback( - dash.dependencies.Output(trace_updater_id, "updateData"), - # dash.dependencies.Output(trace_updater_id, "invisibleUpdateData"), + # dash.dependencies.Output(trace_updater_id, "updateData"), + dash.dependencies.Output(trace_updater_id, "visibleUpdateData"), dash.dependencies.Input(graph_id, "relayoutData"), dash.dependencies.State(graph_id, "figure"), - dash.dependencies.State(graph_id, "style"), prevent_initial_call=True, )(self.construct_update_data) + app.callback( + dash.dependencies.Output(trace_updater_id, "invisibleUpdateData"), + dash.dependencies.Input(trace_updater_id, "visibleUpdateData"), + dash.dependencies.State(graph_id, "relayoutData"), + dash.dependencies.State(graph_id, "figure"), + prevent_initial_call=True, + )(self.construct_invisible_update_data) + def _get_pr_props_keys(self) -> List[str]: # Add the additional plotly-resampler properties of this class return super()._get_pr_props_keys() + ["_show_dash_kwargs"] diff --git a/plotly_resampler/figure_resampler/figure_resampler_interface.py b/plotly_resampler/figure_resampler/figure_resampler_interface.py index 8059b6d9..9ddb6441 100644 --- a/plotly_resampler/figure_resampler/figure_resampler_interface.py +++ b/plotly_resampler/figure_resampler/figure_resampler_interface.py @@ -214,7 +214,7 @@ def _check_update_trace_data( start=None, end=None, ) -> Optional[Union[dict, BaseTraceType]]: - """Check and update the passed ``trace`` its data properties based on the + """Check and update the passed ddfkj``trace`` its data properties based on the slice range. Note @@ -349,8 +349,7 @@ def _check_update_figure_dict( stop: Optional[Union[float, str]] = None, xaxis_filter: str = None, updated_trace_indices: Optional[List[int]] = None, - invisible_indices: Optional[List[int]] = None, - aggregate_invisible: bool = True + indices_to_use: Optional[List[int]] = None, ) -> List[int]: """Check and update the traces within the figure dict. @@ -391,13 +390,13 @@ def _check_update_figure_dict( if updated_trace_indices is None: updated_trace_indices = [] - if invisible_indices is None: - invisible_indices = [] + if indices_to_use is None: + indices_to_use = [] for idx, trace in enumerate(figure["data"]): # We skip when the trace-idx already has been updated. - if idx in updated_trace_indices or (idx in invisible_indices and not aggregate_invisible): - # print(f'idx {idx} was not resampled') + if idx in updated_trace_indices or idx not in indices_to_use: + print(f'idx {idx} was not resampled') continue if xaxis_filter is not None: @@ -1161,14 +1160,14 @@ def replace(self, figure: go.Figure, convert_existing_traces: bool = True): resampled_trace_prefix_suffix=(self._prefix, self._suffix), ) - def construct_update_data(self, relayout_data: dict, figure: dict, style: dict) -> List[dict]: + def construct_update_data(self, relayout_data: dict, figure: dict) -> List[dict]: """Construct the to-be-updated front-end data, based on the layout change. Attention --------- This method is tightly coupled with Dash app callbacks. It takes the front-end figure its ``relayoutData`` as input and returns the data which needs to be - sent tot the ``TraceUpdater`` its ``updateData`` property for that corresponding + sent tot the ``TraceUpdater`` its ``visibleUpdateData`` property for that corresponding graph. Parameters @@ -1194,10 +1193,110 @@ def construct_update_data(self, relayout_data: dict, figure: dict, style: dict) in each dict. """ + visible_trace_idx = [] + + if figure: + for idx, trace in enumerate(figure["data"]): + visible = trace.get("visible", True) + if visible is True: + visible_trace_idx.append(idx) + print(visible_trace_idx) + # import json + # import datetime + # with open(f'figure_{datetime.datetime.now().strftime("%H_%M")}.json', 'w') as f: + # json.dump({"data": figure['data']}, f) + current_graph = self._get_current_graph() + updated_trace_indices, cl_k = [], [] + if relayout_data: + self._print("-" * 100 + "\n", "changed layout", relayout_data) + + cl_k = relayout_data.keys() + + # ------------------ HF DATA aggregation --------------------- + # 1. Base case - there is a x-range specified in the front-end + start_matches = self._re_matches(re.compile(r"xaxis\d*.range\[0]"), cl_k) + stop_matches = self._re_matches(re.compile(r"xaxis\d*.range\[1]"), cl_k) + if len(start_matches) and len(stop_matches): + for t_start_key, t_stop_key in zip(start_matches, stop_matches): + # Check if the xaxis part of xaxis.[0-1] matches + xaxis = t_start_key.split(".")[0] + assert xaxis == t_stop_key.split(".")[0] + # -> we want to copy the layout on the back-end + updated_trace_indices = self._check_update_figure_dict( + figure=current_graph, + start=relayout_data[t_start_key], + stop=relayout_data[t_stop_key], + xaxis_filter=xaxis, + updated_trace_indices=updated_trace_indices, + indices_to_use=visible_trace_idx + ) + print(updated_trace_indices) # only contains ints (the indices of the updated traces) + + # 2. The user clicked on either autorange | reset axes + autorange_matches = self._re_matches( + re.compile(r"xaxis\d*.autorange"), cl_k + ) + spike_matches = self._re_matches(re.compile(r"xaxis\d*.showspikes"), cl_k) + # 2.1 Reset-axes -> autorange & reset to the global data view + if len(autorange_matches) and len(spike_matches): + for autorange_key in autorange_matches: + if relayout_data[autorange_key]: + xaxis = autorange_key.split(".")[0] + updated_trace_indices = self._check_update_figure_dict( + current_graph, + xaxis_filter=xaxis, + updated_trace_indices=updated_trace_indices, + indices_to_use=visible_trace_idx + ) + # 2.1. Autorange -> do nothing, the autorange will be applied on the + # current front-end view + elif len(autorange_matches) and not len(spike_matches): + # PreventUpdate returns a 204 status code response on the + # relayout post request + return dash.no_update + + # If we do not have any traces to be updated, we will return an empty + # request response + if len(updated_trace_indices) == 0: + # PreventUpdate returns a 204 status-code response on the relayout post + # request + return dash.no_update + + # -------------------- construct callback data -------------------------- + layout_traces_list: List[dict] = [] # the data + + # 1. Create a new dict with additional layout updates for the front-end + extra_layout_updates = {} + + # 1.1. Set autorange to False for each layout item with a specified x-range + xy_matches = self._re_matches(re.compile(r"[xy]axis\d*.range\[\d+]"), cl_k) + for range_change_axis in xy_matches: + axis = range_change_axis.split(".")[0] + extra_layout_updates[f"{axis}.autorange"] = False + layout_traces_list.append(extra_layout_updates) + + # 2. Create the additional trace data for the frond-end + relevant_keys = ["x", "y", "text", "hovertext", "name"] # TODO - marker color + # Note that only updated trace-data will be sent to the client + for idx in updated_trace_indices: + trace = current_graph["data"][idx] + trace_reduced = {k: trace[k] for k in relevant_keys if k in trace} + + # Store the index into the corresponding to-be-sent trace-data so + # the client front-end can know which trace needs to be updated + trace_reduced.update({"index": idx}) + layout_traces_list.append(trace_reduced) + print(layout_traces_list) + return layout_traces_list + + def construct_invisible_update_data(self, visible_update_data: int, relayout_data, figure): + # print(visible_updated) + # print(f'relayout: {relayout}') invisible_trace_idx = [] + if figure: - print(figure["data"][0].get('visible')) + # print(figure["data"][0].get('visible')) for idx, trace in enumerate(figure["data"]): visible = trace.get("visible", True) if visible is not True: @@ -1225,14 +1324,14 @@ def construct_update_data(self, relayout_data: dict, figure: dict, style: dict) assert xaxis == t_stop_key.split(".")[0] # -> we want to copy the layout on the back-end updated_trace_indices = self._check_update_figure_dict( - current_graph, + figure=current_graph, start=relayout_data[t_start_key], stop=relayout_data[t_stop_key], xaxis_filter=xaxis, updated_trace_indices=updated_trace_indices, - invisible_indices=invisible_trace_idx, - aggregate_invisible=False # set True later, False is now for testing + indices_to_use=invisible_trace_idx ) + # print(updated_trace_indices) #only contains ints (the indices of the updated traces) # 2. The user clicked on either autorange | reset axes autorange_matches = self._re_matches( @@ -1248,6 +1347,7 @@ def construct_update_data(self, relayout_data: dict, figure: dict, style: dict) current_graph, xaxis_filter=xaxis, updated_trace_indices=updated_trace_indices, + indices_to_use=invisible_trace_idx ) # 2.1. Autorange -> do nothing, the autorange will be applied on the # current front-end view @@ -1287,6 +1387,7 @@ def construct_update_data(self, relayout_data: dict, figure: dict, style: dict) # the client front-end can know which trace needs to be updated trace_reduced.update({"index": idx}) layout_traces_list.append(trace_reduced) + # print(layout_traces_list) return layout_traces_list @staticmethod From e1fb02950a38f917c392d5cbebf9fcccef6a1713 Mon Sep 17 00:00:00 2001 From: Isis Garayalde Vandendriessche Date: Fri, 10 Feb 2023 15:10:54 +0100 Subject: [PATCH 05/15] Added: client-side callback to efficiently fetch trace visibility --- examples/basic_example.ipynb | 431 ++++++++++++++---- examples/dash_apps/01_minimal_global.py | 20 +- examples/dash_apps/usage.py | 10 +- node_modules/.package-lock.json | 13 + package-lock.json | 27 ++ package.json | 7 + .../figure_resampler/figure_resampler.py | 71 ++- .../figure_resampler_interface.py | 39 +- 8 files changed, 492 insertions(+), 126 deletions(-) create mode 100644 node_modules/.package-lock.json create mode 100644 package-lock.json create mode 100644 package.json diff --git a/examples/basic_example.ipynb b/examples/basic_example.ipynb index 8e2c86f3..3ca5f225 100644 --- a/examples/basic_example.ipynb +++ b/examples/basic_example.ipynb @@ -4,7 +4,11 @@ "cell_type": "code", "execution_count": 1, "id": "6a8b1d54-106c-4f89-842d-c1aa0b136a42", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "%load_ext autoreload\n", @@ -15,7 +19,11 @@ "cell_type": "code", "execution_count": 2, "id": "e42a2898-75c8-4e0a-af91-14a7d497e603", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "import pandas as pd\n", @@ -35,7 +43,11 @@ { "cell_type": "markdown", "id": "45398797", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "# Adding dynamic aggregation to your plotly Figure" ] @@ -44,7 +56,11 @@ "cell_type": "code", "execution_count": 3, "id": "9c5d1e2f", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# Some dummy data that will be used throughout the examples\n", @@ -54,22 +70,27 @@ ] }, { - "cell_type": "markdown", - "id": "fa10538c", + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "## **auto mode**: `register_plotly_resampler`" + ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } - }, - "source": [ - "## **auto mode**: `register_plotly_resampler`" - ] + } }, { "cell_type": "markdown", "id": "ffdf4c2b", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "Once `register_plotly_resampler` method is called, it will automatically convert all new defined plotly graph objects into a `FigureResampler` or `FigureWidgetResampler` object. The `mode` parameter of this method allows to define which type of the aforementioned resampling objects is used.\n", "\n", @@ -86,7 +107,11 @@ "cell_type": "code", "execution_count": null, "id": "b81403ca", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "from plotly_resampler import register_plotly_resampler, unregister_plotly_resampler\n" @@ -96,7 +121,11 @@ "cell_type": "code", "execution_count": null, "id": "0371d1f3", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# by default, 1,000 samples per trace are shown\n", @@ -107,7 +136,11 @@ "cell_type": "code", "execution_count": null, "id": "0f4656be", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# auto mode: when working in an IPython environment, this will automatically be a\n", @@ -126,7 +159,11 @@ { "cell_type": "markdown", "id": "e20add02", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "❗ `.show()` always returns a static html view" ] @@ -135,7 +172,11 @@ "cell_type": "code", "execution_count": null, "id": "0f4656be", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# this outputs a static html view of the figure, which can be serialized within notebooks\n", @@ -145,7 +186,11 @@ { "cell_type": "markdown", "id": "382d04ec", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "### pro tip: `register_plotly_resampler` + pandas plotting backend = 🔥" ] @@ -153,7 +198,11 @@ { "cell_type": "markdown", "id": "04a5919e", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "The combo below allows to conveniently visualize large time_series data" ] @@ -162,7 +211,11 @@ "cell_type": "code", "execution_count": null, "id": "88556465", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "register_plotly_resampler(mode=\"auto\", default_n_shown_samples=1500)\n", @@ -173,7 +226,11 @@ "cell_type": "code", "execution_count": null, "id": "08dc25ff", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "df = pd.DataFrame(data={\"sine\": noisy_sine, \"neg-sine\": -noisy_sine}, copy=False)\n", @@ -184,7 +241,11 @@ "cell_type": "code", "execution_count": null, "id": "f10cd857", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# TODO: for some reason pandas plotting backend + datetime index work really slow\n", @@ -195,7 +256,11 @@ "cell_type": "code", "execution_count": null, "id": "67251df5", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "df.plot(backend=\"plotly\")\n" @@ -205,7 +270,11 @@ "cell_type": "code", "execution_count": null, "id": "e5c7b3e6", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "unregister_plotly_resampler()" @@ -214,7 +283,11 @@ { "cell_type": "markdown", "id": "54079ce0", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "## **manual mode**: Basic sine example\n" ] @@ -222,7 +295,11 @@ { "cell_type": "markdown", "id": "3312e7e5", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "\n", "✅ **advantages**:\n", @@ -239,7 +316,11 @@ "cell_type": "code", "execution_count": null, "id": "ef449619", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# 1. Wrap the figure with the FigureResampler class\n", @@ -261,7 +342,11 @@ { "cell_type": "markdown", "id": "7d828610", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "For the `FigureWidgetResampler` use case, you only need to chance way of displaying the figure" ] @@ -270,7 +355,11 @@ "cell_type": "code", "execution_count": null, "id": "ef449619", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# Same content as above, but using the FigureWidgetResampler class\n", @@ -287,7 +376,11 @@ "cell_type": "code", "execution_count": null, "id": "ef449619", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# Same content as above, but using the FigureWidgetResampler class\n", @@ -302,7 +395,11 @@ { "cell_type": "markdown", "id": "9bfc2726", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "# Advanced stuff" ] @@ -310,7 +407,11 @@ { "cell_type": "markdown", "id": "f4dfa7df", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "## `plotly-resampler` notebook figure retention|serialization" ] @@ -318,7 +419,11 @@ { "cell_type": "markdown", "id": "e5f559f9", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "It is often useful to still retain a figure after your notebook is disconnected from your python kernel, e.g., when exported to html.\n", "\n", @@ -333,7 +438,11 @@ { "cell_type": "markdown", "id": "b3768f13", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "
How is this implemented?\n", "We create a JupyterDash subclass which extends the \"inline\" visualization capabilities with the functionality described above.
\n", @@ -363,7 +472,11 @@ "cell_type": "code", "execution_count": 4, "id": "b9c1aedd", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [ { "data": { @@ -454,7 +567,11 @@ { "cell_type": "markdown", "id": "0a3761fd", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "When you:\n", "* restart the kernel; and reopen this notebook\n", @@ -466,7 +583,11 @@ { "cell_type": "markdown", "id": "b997c178", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "## Adjusting the data of your plotly-resampler figure at runtime" ] @@ -474,7 +595,11 @@ { "cell_type": "markdown", "id": "28f10919", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "The `hf_data` property of the `FigureResampler` / `FigureWidgetResampler` can be used to change figure data at runtime" ] @@ -482,7 +607,11 @@ { "cell_type": "markdown", "id": "d0ce62a4", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "### `FigureResampler`" ] @@ -491,7 +620,11 @@ "cell_type": "code", "execution_count": null, "id": "499ac9f3", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "fig = FigureResampler(go.Figure())\n", @@ -503,7 +636,11 @@ { "cell_type": "markdown", "id": "6bffa11a", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "Now we adjust the figure data \n", "**Note**: after running the cell below, we need to manually trigger a graph update (by for example zooming / resetting the axes) to ensure that the new data is shown." @@ -513,7 +650,11 @@ "cell_type": "code", "execution_count": null, "id": "94127fae", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "fig.hf_data[0][\"y\"] = -10 * noisy_sine\n", @@ -523,7 +664,11 @@ { "cell_type": "markdown", "id": "784d8464", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "### `FigureWidgetResampler`" ] @@ -532,7 +677,11 @@ "cell_type": "code", "execution_count": null, "id": "9a0a0870", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "fig = FigureWidgetResampler(go.Figure())\n", @@ -544,7 +693,11 @@ { "cell_type": "markdown", "id": "22687274", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "Now we adjust the figure data \n", "**Note**: after running the cell below, we need to manually trigger a graph update (by for example zooming / resetting the axes) to ensure that the new data is shown." @@ -554,7 +707,11 @@ "cell_type": "code", "execution_count": null, "id": "850abc7c", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "fig.hf_data[0][\"y\"] = 10 * noisy_sine**2\n" @@ -563,7 +720,11 @@ { "cell_type": "markdown", "id": "f4130fac", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "**Pro tip**: `FigureWidgetResampler` has the `reload_data` and `reset_axes` methods to do this automatically" ] @@ -572,7 +733,11 @@ "cell_type": "code", "execution_count": null, "id": "726f0c08", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "fig.hf_data[0][\"y\"] = -10 * noisy_sine**2\n", @@ -584,7 +749,11 @@ "cell_type": "code", "execution_count": null, "id": "8cbfca3d", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "fig.hf_data[0][\"y\"] = 10 * noisy_sine**.1\n", @@ -595,7 +764,11 @@ { "cell_type": "markdown", "id": "0846eaa3", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "## Different downsampler & number of shown samples per trace" ] @@ -603,7 +776,11 @@ { "cell_type": "markdown", "id": "2c0f1aff", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "To achieve this, you only need to adjust the `max_n_samples` per trace; see the example below" ] @@ -612,7 +789,11 @@ "cell_type": "code", "execution_count": null, "id": "193e26f5", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "fig = FigureResampler(go.Figure())\n", @@ -636,7 +817,11 @@ { "cell_type": "markdown", "id": "74e64a79", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "---" ] @@ -644,7 +829,11 @@ { "cell_type": "markdown", "id": "192a74fb", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "# Various other examples" ] @@ -652,7 +841,11 @@ { "cell_type": "markdown", "id": "c7962301", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "The visualizations below shows how you plotly-resampler is used for various visualization configurations." ] @@ -661,7 +854,10 @@ "cell_type": "markdown", "id": "62535963", "metadata": { - "tags": [] + "tags": [], + "pycharm": { + "name": "#%% md\n" + } }, "source": [ "## The example `.gif` from the docs/README" @@ -670,7 +866,11 @@ { "cell_type": "markdown", "id": "d2f83be8", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "Note how:\n", "* The example figure withholds both time-indexed data and numeric-indexed data\n", @@ -683,7 +883,11 @@ "cell_type": "code", "execution_count": null, "id": "7384c081-a733-41e5-a80c-99b7b31d0520", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# ------------ loading the data -----------\n", @@ -701,7 +905,10 @@ "execution_count": null, "id": "c1f1d9a2-63a1-484e-a346-ae6b04c997b6", "metadata": { - "tags": [] + "tags": [], + "pycharm": { + "name": "#%%\n" + } }, "outputs": [], "source": [ @@ -774,7 +981,11 @@ { "cell_type": "markdown", "id": "28b02cff", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "## Converting a `go.Figure`, with its traces, into a `FigureResampler`" ] @@ -782,7 +993,11 @@ { "cell_type": "markdown", "id": "20b9cef4", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "This example first creates the `.gif` figure (with less data, otherwise the graph construction time would be too long) and then uses the `convert_existing_traces` argument of the FigureResampler constructor to convert this into a FigureResampler figure." ] @@ -791,7 +1006,11 @@ "cell_type": "code", "execution_count": null, "id": "4e239eda", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "from plotly_resampler import unregister_plotly_resampler\n", @@ -802,7 +1021,11 @@ "cell_type": "code", "execution_count": null, "id": "0941f11a", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "df_gusb = pd.read_parquet(f\"data/df_gusb.parquet\")\n", @@ -875,7 +1098,11 @@ { "cell_type": "markdown", "id": "b863e42c", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "Note how the `data` property shape is the raw data size" ] @@ -884,7 +1111,11 @@ "cell_type": "code", "execution_count": null, "id": "ad0a05d3", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "fig.data[1][\"x\"].shape\n" @@ -894,7 +1125,11 @@ "cell_type": "code", "execution_count": null, "id": "c38f7f52", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# Convert the figure into a figurResampler figure by decorating it\n", @@ -906,7 +1141,11 @@ "cell_type": "code", "execution_count": null, "id": "dbfa7ef8", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "print(\"aggregated data \", fr_fig.data[1][\"x\"].shape)\n", @@ -919,7 +1158,11 @@ { "cell_type": "markdown", "id": "f46d0691", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "**Note**:\n", "* the data size of `fr_fig` is reduced to `default_n_shown_samples` (_500_) but the original `fig` data is still equal to `110_000`.\n", @@ -930,7 +1173,11 @@ "cell_type": "code", "execution_count": null, "id": "d7c10446", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# We flip and take the sqarquere root of the data\n", @@ -943,7 +1190,10 @@ "cell_type": "markdown", "id": "dd77941a", "metadata": { - "tags": [] + "tags": [], + "pycharm": { + "name": "#%% md\n" + } }, "source": [ "## Skin conductance example" @@ -952,7 +1202,11 @@ { "cell_type": "markdown", "id": "2ea4e74a", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "This example is especially interesting as it **uses a _background-color_ to indicate a signal quality**.\n", "\n", @@ -963,7 +1217,11 @@ "cell_type": "code", "execution_count": null, "id": "3cc6fef1", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "df_gsr = pd.read_parquet(\"data/processed_gsr.parquet\")\n" @@ -973,7 +1231,11 @@ "cell_type": "code", "execution_count": null, "id": "71758f03", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "fig = FigureResampler(\n", @@ -1062,7 +1324,10 @@ "cell_type": "markdown", "id": "2cfd9b5f-58c8-4711-9dc0-aa326a81fef9", "metadata": { - "tags": [] + "tags": [], + "pycharm": { + "name": "#%% md\n" + } }, "source": [ "## Categorical series - box & histogram" @@ -1071,7 +1336,11 @@ { "cell_type": "markdown", "id": "d75f128f", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "This example highlights how `plotly-resampler` supports combining high-frequency trace-subplots \n", "with non-scatterlike traces such as a histogram & a boxplot." @@ -1081,7 +1350,11 @@ "cell_type": "code", "execution_count": null, "id": "fe548e0a-8f20-4b01-beec-ded7de144995", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# Create a categorical series, with mostly a's, but a few sparse b's and c's\n", @@ -1103,7 +1376,11 @@ "cell_type": "code", "execution_count": null, "id": "38b1f6b7-896e-48a3-a4b2-91a2bfa2f58b", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "base_fig = make_subplots(\n", @@ -1168,4 +1445,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} +} \ No newline at end of file diff --git a/examples/dash_apps/01_minimal_global.py b/examples/dash_apps/01_minimal_global.py index 97715563..8ac3f2e4 100644 --- a/examples/dash_apps/01_minimal_global.py +++ b/examples/dash_apps/01_minimal_global.py @@ -29,7 +29,7 @@ # --------------------------------------Globals --------------------------------------- app = Dash(__name__) -fig: FigureResampler = FigureResampler(verbose=True) +fig: FigureResampler = FigureResampler(verbose=False) # NOTE: in this example, this reference to a FigureResampler is essential to preserve # throughout the whole dash app! If your dash app wants to create a new go.Figure(), # you should not construct a new FigureResampler object, but replace the figure of this @@ -42,7 +42,7 @@ html.Hr(), # The graph and it's needed components to update efficiently - + dcc.Store(id="visible-indices", data={"visible": [], "invisible": []}), dcc.Graph(id="graph-id"), TraceUpdater(id="trace-updater", gdID="graph-id"), GraphReporter(id="graph-reporter", gId="graph-id"), @@ -55,6 +55,7 @@ # The callback used to construct and store the graph's data on the serverside @app.callback( Output("graph-id", "figure"), + # Output("visible-indices", "data"), Input("plot-button", "n_clicks"), prevent_initial_call=True, ) @@ -72,22 +73,11 @@ def plot_graph(n_clicks): fig.update_layout(showlegend=True) return fig else: - return no_update - - -# @app.callback( -# Output("print", "children"), -# Input("graph-id", "restyleData"), -# prevent_initial_call=True, -# ) -# def get_restyle_data(restyle_data): -# print(restyle_data) -# return "" -# + return no_update, no_update # Register the graph update callbacks to the layout fig.register_update_graph_callback( - app=app, graph_id="graph-id", trace_updater_id="trace-updater" + app=app, graph_id="graph-id", trace_updater_id="trace-updater", store_id="visible-indices" ) # --------------------------------- Running the app --------------------------------- diff --git a/examples/dash_apps/usage.py b/examples/dash_apps/usage.py index 0302408b..e63d98c9 100644 --- a/examples/dash_apps/usage.py +++ b/examples/dash_apps/usage.py @@ -5,6 +5,7 @@ from plotly_resampler import FigureResampler from plotly_resampler.aggregation import EveryNthPoint import numpy as np +from trace_updater import TraceUpdater # Construct a high-frequency signal n = 1_000_000 @@ -15,7 +16,7 @@ fig = FigureResampler( go.Figure(), # show_mean_aggregation_size=False, - default_downsampler=EveryNthPoint(interleave_gaps=False), + # default_downsampler=EveryNthPoint(interleave_gaps=False), default_n_shown_samples=4000, resampled_trace_prefix_suffix=("", ""), ) @@ -28,14 +29,15 @@ app.layout = html.Div( [ + dcc.Store(id="visible-indices", data={"visible": [], "invisible": []}), dcc.Graph(id="graph-id", figure=fig), - trace_updater.TraceUpdater(id="trace-updater", gdID="graph-id"), + TraceUpdater(id="trace-updater", gdID="graph-id"), ] ) # Register the callback -fig.register_update_graph_callback(app, "graph-id", "trace-updater") +fig.register_update_graph_callback(app, "graph-id", "trace-updater", "visible-indices") if __name__ == "__main__": - app.run_server(debug=True, port=8059) \ No newline at end of file + app.run_server(debug=True, port=8050) \ No newline at end of file diff --git a/node_modules/.package-lock.json b/node_modules/.package-lock.json new file mode 100644 index 00000000..b4295f14 --- /dev/null +++ b/node_modules/.package-lock.json @@ -0,0 +1,13 @@ +{ + "name": "plotly-resampler", + "version": "1.0.0", + "lockfileVersion": 2, + "requires": true, + "packages": { + "node_modules/lodash": { + "version": "4.17.21", + "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz", + "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==" + } + } +} diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 00000000..6d19a7b1 --- /dev/null +++ b/package-lock.json @@ -0,0 +1,27 @@ +{ + "name": "plotly-resampler", + "version": "1.0.0", + "lockfileVersion": 2, + "requires": true, + "packages": { + "": { + "name": "plotly-resampler", + "version": "1.0.0", + "dependencies": { + "lodash": "^4.17.21" + } + }, + "node_modules/lodash": { + "version": "4.17.21", + "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz", + "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==" + } + }, + "dependencies": { + "lodash": { + "version": "4.17.21", + "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz", + "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==" + } + } +} diff --git a/package.json b/package.json new file mode 100644 index 00000000..5573e938 --- /dev/null +++ b/package.json @@ -0,0 +1,7 @@ +{ + "name": "plotly-resampler", + "version": "1.0.0", + "dependencies": { + "lodash": "^4.17.21" + } +} diff --git a/plotly_resampler/figure_resampler/figure_resampler.py b/plotly_resampler/figure_resampler/figure_resampler.py index 1976f33d..b62e2a1e 100644 --- a/plotly_resampler/figure_resampler/figure_resampler.py +++ b/plotly_resampler/figure_resampler/figure_resampler.py @@ -444,8 +444,10 @@ def stop_server(self, warn: bool = True): + "\t- the dash-server wasn't started with 'show_dash'" ) + # TODO: check if i should put the clientside callback to fill the store here or in a different function + # for now, here def register_update_graph_callback( - self, app: dash.Dash, graph_id: str, trace_updater_id: str + self, app: dash.Dash, graph_id: str, trace_updater_id: str, store_id: str ): """Register the :func:`construct_update_data` method as callback function to the passed dash-app. @@ -461,21 +463,80 @@ def register_update_graph_callback( The id of the ``TraceUpdater`` component. This component is leveraged by ``FigureResampler`` to efficiently POST the to-be-updated data to the front-end. + store_id + The id of the ``dcc.Store`` component which holds the indices of the visible + traces in the client. Leveraged to efficiently perform the asynchronous update of + the visible and invisible traces of the ``Graph``. """ + # Callback triggers when a stylistic change is made to the graph + # this includes hiding traces or making them visible again, which is the + # desired use-case + app.clientside_callback( + ''' + function(restyleData, gdID) { + // HELPER FUNCTIONS + + function getGraphDiv(gdID){ + // see this link for more information https://stackoverflow.com/a/34002028 + let graphDiv = document?.querySelectorAll('div[id*="' + gdID + '"][class*="dash-graph"]'); + if (graphDiv.length > 1) { + throw new SyntaxError("UpdateStore: multiple graphs with ID=" + gdID + " found; n=" + graphDiv.length + " (either multiple graphs with same ID's or current ID is a str-subset of other graph IDs)"); + } else if (graphDiv.length < 1) { + throw new SyntaxError("UpdateStore: no graphs with ID=" + gdID + " found"); + } + graphDiv = graphDiv?.[0]?.getElementsByClassName('js-plotly-plot')?.[0]; + const isDOMElement = el => el instanceof HTMLElement + if (!isDOMElement) { + throw new Error(`Invalid gdID '${gdID}'`); + } + return graphDiv; + } + + //MAIN CALLBACK + let storeData = {'visible':[], 'invisible':[]}; + if (restyleData) { + let graphDiv = getGraphDiv(gdID); + + //console.log("restyleData:"); + //console.log(restyleData); + //console.log("\tgraph data -> visibility of traces: "); + + let visible_traces = []; + let invisible_traces = []; + graphDiv.data.forEach((trace, index) => { + //console.log('\tvisible: ' + trace.visible); + if (trace.visible == true || trace.visible == undefined) { + visible_traces.push(index); + } else { + invisible_traces.push(index); + } + }); + storeData = {'visible':visible_traces, 'invisible':invisible_traces}; + } + //console.log(storeData); + return storeData; + } + ''', + dash.dependencies.Output(store_id, "data"), + dash.dependencies.Input(graph_id, "restyleData"), + dash.dependencies.State(graph_id, "id") + ) + app.callback( - # dash.dependencies.Output(trace_updater_id, "updateData"), dash.dependencies.Output(trace_updater_id, "visibleUpdateData"), dash.dependencies.Input(graph_id, "relayoutData"), - dash.dependencies.State(graph_id, "figure"), + # dash.dependencies.State(graph_id, "restyleData"), + dash.dependencies.State(store_id, "data"), prevent_initial_call=True, )(self.construct_update_data) app.callback( dash.dependencies.Output(trace_updater_id, "invisibleUpdateData"), - dash.dependencies.Input(trace_updater_id, "visibleUpdateData"), + # dash.dependencies.Input(trace_updater_id, "visibleUpdateData"), + dash.dependencies.Input(trace_updater_id, "visibleUpdate"), dash.dependencies.State(graph_id, "relayoutData"), - dash.dependencies.State(graph_id, "figure"), + dash.dependencies.State(store_id, "data"), prevent_initial_call=True, )(self.construct_invisible_update_data) diff --git a/plotly_resampler/figure_resampler/figure_resampler_interface.py b/plotly_resampler/figure_resampler/figure_resampler_interface.py index 9ddb6441..715e201b 100644 --- a/plotly_resampler/figure_resampler/figure_resampler_interface.py +++ b/plotly_resampler/figure_resampler/figure_resampler_interface.py @@ -394,9 +394,9 @@ def _check_update_figure_dict( indices_to_use = [] for idx, trace in enumerate(figure["data"]): - # We skip when the trace-idx already has been updated. + # We skip when the trace-idx already has been updated or when it's not due for an update. if idx in updated_trace_indices or idx not in indices_to_use: - print(f'idx {idx} was not resampled') + # print(f'idx {idx} was not resampled') continue if xaxis_filter is not None: @@ -1160,7 +1160,7 @@ def replace(self, figure: go.Figure, convert_existing_traces: bool = True): resampled_trace_prefix_suffix=(self._prefix, self._suffix), ) - def construct_update_data(self, relayout_data: dict, figure: dict) -> List[dict]: + def construct_update_data(self, relayout_data: dict, trace_visibility: dict) -> List[dict]: """Construct the to-be-updated front-end data, based on the layout change. Attention @@ -1193,14 +1193,12 @@ def construct_update_data(self, relayout_data: dict, figure: dict) -> List[dict] in each dict. """ - visible_trace_idx = [] - - if figure: - for idx, trace in enumerate(figure["data"]): - visible = trace.get("visible", True) - if visible is True: - visible_trace_idx.append(idx) - print(visible_trace_idx) + + if len(trace_visibility["visible"]) == 0 and len(trace_visibility["invisible"]) == 0: + visible_trace_idx = [i for i, trace in enumerate(self._data)] + else: + visible_trace_idx = trace_visibility["visible"] + # import json # import datetime # with open(f'figure_{datetime.datetime.now().strftime("%H_%M")}.json', 'w') as f: @@ -1286,22 +1284,13 @@ def construct_update_data(self, relayout_data: dict, figure: dict) -> List[dict] # the client front-end can know which trace needs to be updated trace_reduced.update({"index": idx}) layout_traces_list.append(trace_reduced) - print(layout_traces_list) + # print(layout_traces_list) return layout_traces_list - def construct_invisible_update_data(self, visible_update_data: int, relayout_data, figure): - # print(visible_updated) - # print(f'relayout: {relayout}') - invisible_trace_idx = [] - - if figure: - - # print(figure["data"][0].get('visible')) - for idx, trace in enumerate(figure["data"]): - visible = trace.get("visible", True) - if visible is not True: - invisible_trace_idx.append(idx) - print(invisible_trace_idx) + def construct_invisible_update_data(self, visible_update: int, relayout_data, + trace_visibility: dict): + invisible_trace_idx = trace_visibility["invisible"] + print(f'invisible_trace_idx: {invisible_trace_idx}') # import json # import datetime # with open(f'figure_{datetime.datetime.now().strftime("%H_%M")}.json', 'w') as f: From 5ccb488433b5596a5ca4168a6142d795722d2a1c Mon Sep 17 00:00:00 2001 From: Isis Garayalde Vandendriessche Date: Mon, 13 Feb 2023 12:37:08 +0100 Subject: [PATCH 06/15] 1st iteration visible/invisible trace update (with formatting) --- Makefile | 31 +++ examples/dash_apps/01_minimal_global.py | 17 +- examples/dash_apps/02_minimal_cache.py | 5 +- .../dash_apps/03_minimal_cache_dynamic.py | 5 +- examples/dash_apps/11_sine_generator.py | 7 +- examples/dash_apps/12_file_selector.py | 9 +- examples/dash_apps/13_coarse_fine.py | 13 +- examples/dash_apps/usage.py | 15 +- examples/dash_apps/utils/callback_helpers.py | 8 +- examples/other_apps/streamlit_app.py | 15 +- .../aggregation/aggregation_interface.py | 8 +- plotly_resampler/aggregation/aggregators.py | 57 +++--- .../aggregation/algorithms/lttb_c.py | 1 + plotly_resampler/figure_resampler/__init__.py | 1 - .../figure_resampler/figure_resampler.py | 76 ++++---- .../figure_resampler_interface.py | 183 +++++++++--------- .../figurewidget_resampler.py | 2 +- plotly_resampler/figure_resampler/utils.py | 5 +- plotly_resampler/registering.py | 7 +- tests/conftest.py | 21 +- tests/fr_selenium.py | 20 +- tests/test_aggregators.py | 22 ++- tests/test_composability.py | 3 +- tests/test_figure_resampler.py | 13 +- tests/test_figure_resampler_selenium.py | 4 +- tests/test_figurewidget_resampler.py | 12 +- tests/test_registering.py | 9 +- tests/test_serialization.py | 53 ++--- tests/test_utils.py | 7 +- 29 files changed, 355 insertions(+), 274 deletions(-) create mode 100644 Makefile diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..018d1361 --- /dev/null +++ b/Makefile @@ -0,0 +1,31 @@ +black = black plotly_resampler examples tests +isort = isort plotly_resampler examples tests + +.PHONY: format +format: + $(isort) + $(black) + +.PHONY: lint +lint: + poetry run ruff plotly_resampler tests + poetry run $(isort) --check-only --df + poetry run $(black) --check --diff + +.PHONY: test +test: + poetry run pytest --cov-report term-missing --cov=plotly_resampler tests + +.PHONY: clean +clean: + rm -rf `find . -name __pycache__` + rm -rf .cache + rm -rf .pytest_cache + rm -rf *.egg-info + rm -f .coverage + rm -rf build + + rm -f `find . -type f -name '*.py[co]' ` + rm -f `find . -type f -name '*~' ` + rm -f `find . -type f -name '.*~' ` + rm -f `find . -type f -name '*.cpython-*' ` diff --git a/examples/dash_apps/01_minimal_global.py b/examples/dash_apps/01_minimal_global.py index 8ac3f2e4..c9f03f16 100644 --- a/examples/dash_apps/01_minimal_global.py +++ b/examples/dash_apps/01_minimal_global.py @@ -15,11 +15,11 @@ import numpy as np import plotly.graph_objects as go -from dash import Input, Output, dcc, html, Dash, no_update, callback_context +from dash import Dash, Input, Output, callback_context, dcc, html, no_update from graph_reporter import GraphReporter +from trace_updater import TraceUpdater from plotly_resampler import FigureResampler -from trace_updater import TraceUpdater # Data that will be used for the plotly-resampler figures n = 500_000 @@ -40,7 +40,6 @@ html.H1("plotly-resampler global variable", style={"textAlign": "center"}), html.Button("plot chart", id="plot-button", n_clicks=0), html.Hr(), - # The graph and it's needed components to update efficiently dcc.Store(id="visible-indices", data={"visible": [], "invisible": []}), dcc.Graph(id="graph-id"), @@ -66,18 +65,22 @@ def plot_graph(n_clicks): global fig fig.replace(go.Figure()) fig._print_verbose = True - fig.add_trace(go.Scattergl(name="log"), hf_x=x, hf_y=noisy_sin * .9999995 ** x) - fig.add_trace(go.Scattergl(name="exp"), hf_x=x, hf_y=noisy_sin * 1.000002 ** x) + fig.add_trace(go.Scattergl(name="log"), hf_x=x, hf_y=noisy_sin * 0.9999995**x) + fig.add_trace(go.Scattergl(name="exp"), hf_x=x, hf_y=noisy_sin * 1.000002**x) fig.add_trace(go.Scattergl(name="const"), hf_x=x, hf_y=flat) - fig.add_trace(go.Scattergl(name="poly"), hf_x=x, hf_y=noisy_sin * 1.000002 ** 2) + fig.add_trace(go.Scattergl(name="poly"), hf_x=x, hf_y=noisy_sin * 1.000002**2) fig.update_layout(showlegend=True) return fig else: return no_update, no_update + # Register the graph update callbacks to the layout fig.register_update_graph_callback( - app=app, graph_id="graph-id", trace_updater_id="trace-updater", store_id="visible-indices" + app=app, + graph_id="graph-id", + trace_updater_id="trace-updater", + store_id="visible-indices", ) # --------------------------------- Running the app --------------------------------- diff --git a/examples/dash_apps/02_minimal_cache.py b/examples/dash_apps/02_minimal_cache.py index 532e2e00..e8b9b4be 100644 --- a/examples/dash_apps/02_minimal_cache.py +++ b/examples/dash_apps/02_minimal_cache.py @@ -11,15 +11,16 @@ import numpy as np import plotly.graph_objects as go -from dash import Input, Output, State, dcc, html, no_update, callback_context +from dash import Input, Output, State, callback_context, dcc, html, no_update from dash_extensions.enrich import ( DashProxy, ServersideOutput, ServersideOutputTransform, ) -from plotly_resampler import FigureResampler from trace_updater import TraceUpdater +from plotly_resampler import FigureResampler + # Data that will be used for the plotly-resampler figures x = np.arange(2_000_000) noisy_sin = (3 + np.sin(x / 200) + np.random.randn(len(x)) / 10) * x / 1_000 diff --git a/examples/dash_apps/03_minimal_cache_dynamic.py b/examples/dash_apps/03_minimal_cache_dynamic.py index f59fe44c..ed0216e5 100644 --- a/examples/dash_apps/03_minimal_cache_dynamic.py +++ b/examples/dash_apps/03_minimal_cache_dynamic.py @@ -13,8 +13,8 @@ """ -from uuid import uuid4 from typing import List +from uuid import uuid4 import numpy as np import plotly.graph_objects as go @@ -26,9 +26,10 @@ Trigger, TriggerTransform, ) -from plotly_resampler import FigureResampler from trace_updater import TraceUpdater +from plotly_resampler import FigureResampler + # Data that will be used for the plotly-resampler figures x = np.arange(2_000_000) noisy_sin = (3 + np.sin(x / 200) + np.random.randn(len(x)) / 10) * x / 1_000 diff --git a/examples/dash_apps/11_sine_generator.py b/examples/dash_apps/11_sine_generator.py index fcbae17a..69b5f5c6 100644 --- a/examples/dash_apps/11_sine_generator.py +++ b/examples/dash_apps/11_sine_generator.py @@ -14,10 +14,10 @@ from uuid import uuid4 +import dash_bootstrap_components as dbc import numpy as np import plotly.graph_objects as go -import dash_bootstrap_components as dbc -from dash import MATCH, Input, Output, State, dcc, html, no_update, callback_context +from dash import MATCH, Input, Output, State, callback_context, dcc, html, no_update from dash_extensions.enrich import ( DashProxy, ServersideOutput, @@ -25,9 +25,10 @@ Trigger, TriggerTransform, ) -from plotly_resampler import FigureResampler from trace_updater import TraceUpdater +from plotly_resampler import FigureResampler + # --------------------------------------Globals --------------------------------------- app = DashProxy( __name__, diff --git a/examples/dash_apps/12_file_selector.py b/examples/dash_apps/12_file_selector.py index 08cab771..120abe70 100644 --- a/examples/dash_apps/12_file_selector.py +++ b/examples/dash_apps/12_file_selector.py @@ -12,19 +12,18 @@ import dash_bootstrap_components as dbc import plotly.graph_objects as go -from dash import Input, Output, State, dcc, html, no_update, callback_context - +from dash import Input, Output, State, callback_context, dcc, html, no_update from dash_extensions.enrich import ( DashProxy, ServersideOutput, ServersideOutputTransform, ) -from plotly_resampler import FigureResampler from trace_updater import TraceUpdater - -from utils.callback_helpers import multiple_folder_file_selector, get_selector_states +from utils.callback_helpers import get_selector_states, multiple_folder_file_selector from utils.graph_construction import visualize_multiple_files +from plotly_resampler import FigureResampler + # --------------------------------------Globals --------------------------------------- app = DashProxy( __name__, diff --git a/examples/dash_apps/13_coarse_fine.py b/examples/dash_apps/13_coarse_fine.py index 61648497..4d945a5b 100644 --- a/examples/dash_apps/13_coarse_fine.py +++ b/examples/dash_apps/13_coarse_fine.py @@ -13,24 +13,23 @@ __author__ = "Jonas Van Der Donckt" -import dash_bootstrap_components as dbc -import plotly.graph_objects as go from pathlib import Path from typing import List -from dash import Input, Output, State, dcc, html, no_update, callback_context +import dash_bootstrap_components as dbc +import plotly.graph_objects as go +from dash import Input, Output, State, callback_context, dcc, html, no_update from dash_extensions.enrich import ( DashProxy, ServersideOutput, ServersideOutputTransform, ) - -from plotly_resampler import FigureResampler from trace_updater import TraceUpdater - -from utils.callback_helpers import multiple_folder_file_selector, get_selector_states +from utils.callback_helpers import get_selector_states, multiple_folder_file_selector from utils.graph_construction import visualize_multiple_files +from plotly_resampler import FigureResampler + # --------------------------------------Globals --------------------------------------- app = DashProxy( __name__, diff --git a/examples/dash_apps/usage.py b/examples/dash_apps/usage.py index e63d98c9..51d7eaf2 100644 --- a/examples/dash_apps/usage.py +++ b/examples/dash_apps/usage.py @@ -1,11 +1,12 @@ -import trace_updater import dash +import numpy as np import plotly.graph_objs as go -from dash import html, dcc, Input, Output +import trace_updater +from dash import Input, Output, dcc, html +from trace_updater import TraceUpdater + from plotly_resampler import FigureResampler from plotly_resampler.aggregation import EveryNthPoint -import numpy as np -from trace_updater import TraceUpdater # Construct a high-frequency signal n = 1_000_000 @@ -21,7 +22,9 @@ resampled_trace_prefix_suffix=("", ""), ) for i in range(100): - fig.add_trace(go.Scattergl(name=f"sine-{i}", showlegend=True), hf_x=x, hf_y=noisy_sin + 10 * i) + fig.add_trace( + go.Scattergl(name=f"sine-{i}", showlegend=True), hf_x=x, hf_y=noisy_sin + 10 * i + ) # Construct app & its layout @@ -40,4 +43,4 @@ if __name__ == "__main__": - app.run_server(debug=True, port=8050) \ No newline at end of file + app.run_server(debug=True, port=8050) diff --git a/examples/dash_apps/utils/callback_helpers.py b/examples/dash_apps/utils/callback_helpers.py index dbb53bfb..5bcfb9b0 100644 --- a/examples/dash_apps/utils/callback_helpers.py +++ b/examples/dash_apps/utils/callback_helpers.py @@ -3,10 +3,10 @@ __author__ = "Jonas Van Der Donckt" +import itertools from pathlib import Path from typing import Dict, List -import itertools import dash_bootstrap_components as dbc from dash import Input, Output, State, dcc, html from functional import seq @@ -21,8 +21,8 @@ def _update_file_widget(folder): set( list( seq(Path(folder).iterdir()) - .filter(lambda x: x.is_file() and x.name.endswith("parquet")) - .map(lambda x: x.name) + .filter(lambda x: x.is_file() and x.name.endswith("parquet")) + .map(lambda x: x.name) ) ) ) @@ -41,7 +41,7 @@ def _register_selection_callbacks(app, ids=None): def multiple_folder_file_selector( - app, name_folders_list: List[Dict[str, dict]], multi=True + app, name_folders_list: List[Dict[str, dict]], multi=True ) -> dbc.Card: """Constructs a folder user date selector diff --git a/examples/other_apps/streamlit_app.py b/examples/other_apps/streamlit_app.py index 1f282ee0..cc507005 100644 --- a/examples/other_apps/streamlit_app.py +++ b/examples/other_apps/streamlit_app.py @@ -13,30 +13,31 @@ __author__ = "Jeroen Van Der Donckt" +# 0. Create a noisy sine wave +import numpy as np import plotly.graph_objects as go + from plotly_resampler import FigureResampler -# 0. Create a noisy sine wave -import numpy as np x = np.arange(1_000_000) noisy_sin = (3 + np.sin(x / 200) + np.random.randn(len(x)) / 10) * x / 1_000 ### 1. Use FigureResampler fig = FigureResampler(default_n_shown_samples=2_000) -fig.add_trace(go.Scattergl(name='noisy sine', showlegend=True), hf_x=x, hf_y=noisy_sin) +fig.add_trace(go.Scattergl(name="noisy sine", showlegend=True), hf_x=x, hf_y=noisy_sin) fig.update_layout(height=700) ### 2. Run the visualization (which is a dash app) in a (sub)process on a certain port # Note: starting a process allows executing code after `.show_dash` is called from multiprocessing import Process + port = 9022 -proc = Process( - target=fig.show_dash, kwargs=dict(mode="external", port=port) -).start() +proc = Process(target=fig.show_dash, kwargs=dict(mode="external", port=port)).start() # Deleting the lines below this and running this file will result in a classic running dash app # Note: for just a dash app it is not even necessary to execute .show_dash in a (sub)process ### 3. Add as iframe component to streamlit import streamlit.components.v1 as components -components.iframe(f'http://localhost:{port}', height=700) + +components.iframe(f"http://localhost:{port}", height=700) diff --git a/plotly_resampler/aggregation/aggregation_interface.py b/plotly_resampler/aggregation/aggregation_interface.py index 80c63eaa..da7e9778 100644 --- a/plotly_resampler/aggregation/aggregation_interface.py +++ b/plotly_resampler/aggregation/aggregation_interface.py @@ -29,14 +29,14 @@ def __init__( irregularly sampled data. By default, True. nan_position: str, optional Indicates where nans must be placed when gaps are detected. \n - If ``'end'``, the first point after a gap will be replaced with a + If ``'end'``, the first point after a gap will be replaced with a nan-value \n - If ``'begin'``, the last point before a gap will be replaced with a + If ``'begin'``, the last point before a gap will be replaced with a nan-value \n - If ``'both'``, both the encompassing gap datapoints are replaced with + If ``'both'``, both the encompassing gap datapoints are replaced with nan-values \n .. note:: - This parameter only has an effect when ``interleave_gaps`` is set + This parameter only has an effect when ``interleave_gaps`` is set to *True*. dtype_regex_list: List[str], optional List containing the regex matching the supported datatypes, by default None. diff --git a/plotly_resampler/aggregation/aggregators.py b/plotly_resampler/aggregation/aggregators.py index d7f64b7f..158d9927 100644 --- a/plotly_resampler/aggregation/aggregators.py +++ b/plotly_resampler/aggregation/aggregators.py @@ -15,6 +15,7 @@ import pandas as pd from ..aggregation.aggregation_interface import AbstractSeriesAggregator + # from plotly_resampler.aggregation import AbstractSeriesAggregator try: @@ -78,7 +79,7 @@ def __init__(self, interleave_gaps: bool = True, nan_position="end"): interleave_gaps, nan_position, dtype_regex_list=[rf"{dtype}\d*" for dtype in ["float", "int", "uint"]] - + ["category", "bool"], + + ["category", "bool"], ) def _aggregate(self, s: pd.Series, n_out: int) -> pd.Series: @@ -153,17 +154,17 @@ def _aggregate(self, s: pd.Series, n_out: int) -> pd.Series: # Calculate the argmin & argmax on the reshaped view of `s` & # add the corresponding offset argmin = ( - s.values[: block_size * offset.shape[0]] - .reshape(-1, block_size) - .argmin(axis=1) - + offset + s.values[: block_size * offset.shape[0]] + .reshape(-1, block_size) + .argmin(axis=1) + + offset ) argmax = ( - s.values[argmax_offset: block_size * offset.shape[0] + argmax_offset] - .reshape(-1, block_size) - .argmax(axis=1) - + offset - + argmax_offset + s.values[argmax_offset : block_size * offset.shape[0] + argmax_offset] + .reshape(-1, block_size) + .argmax(axis=1) + + offset + + argmax_offset ) # Sort the argmin & argmax (where we append the first and last index item) # and then slice the original series on these indexes. @@ -218,16 +219,16 @@ def _aggregate(self, s: pd.Series, n_out: int) -> pd.Series: # Calculate the argmin & argmax on the reshaped view of `s` & # add the corresponding offset argmin = ( - s.values[: block_size * offset.shape[0]] - .reshape(-1, block_size) - .argmin(axis=1) - + offset + s.values[: block_size * offset.shape[0]] + .reshape(-1, block_size) + .argmin(axis=1) + + offset ) argmax = ( - s.values[: block_size * offset.shape[0]] - .reshape(-1, block_size) - .argmax(axis=1) - + offset + s.values[: block_size * offset.shape[0]] + .reshape(-1, block_size) + .argmax(axis=1) + + offset ) # Note: the implementation below flips the array to search from @@ -280,7 +281,7 @@ def __init__(self, interleave_gaps: bool = True, nan_position="end"): interleave_gaps, nan_position, dtype_regex_list=[rf"{dtype}\d*" for dtype in ["float", "int", "uint"]] - + ["category", "bool"], + + ["category", "bool"], ) def _aggregate(self, s: pd.Series, n_out: int) -> pd.Series: @@ -288,7 +289,7 @@ def _aggregate(self, s: pd.Series, n_out: int) -> pd.Series: ratio_threshold = 100 # TODO -> test this with a move of the .so file - if LTTB_core.__name__ == 'LTTB_core_py': + if LTTB_core.__name__ == "LTTB_core_py": size_threshold = 1_000_000 if s.shape[0] > size_threshold and s.shape[0] / n_out > ratio_threshold: @@ -338,11 +339,11 @@ class FuncAggregator(AbstractSeriesAggregator): """ def __init__( - self, - aggregation_func, - interleave_gaps: bool = True, - nan_position="end", - dtype_regex_list=None, + self, + aggregation_func, + interleave_gaps: bool = True, + nan_position="end", + dtype_regex_list=None, ): """ Parameters @@ -385,8 +386,8 @@ def _aggregate(self, s: pd.Series, n_out: int) -> pd.Series: # where each value is repeated based $len(s)/n_out$ times by=np.repeat(np.arange(n_out), group_size)[: len(s)] ) - .agg(self.aggregation_func) - .dropna() + .agg(self.aggregation_func) + .dropna() ) # Create an index-estimation for real-time data # Add one to the index so it's pointed at the end of the window @@ -466,4 +467,4 @@ def _aggregate(self, s: pd.Series, n_out: int) -> pd.Series: rel_idxs.append(slice.idxmax()) rel_idxs.append(slice.index[-1]) - return s.loc[np.unique(rel_idxs)] \ No newline at end of file + return s.loc[np.unique(rel_idxs)] diff --git a/plotly_resampler/aggregation/algorithms/lttb_c.py b/plotly_resampler/aggregation/algorithms/lttb_c.py index 629c43d8..90395bfa 100644 --- a/plotly_resampler/aggregation/algorithms/lttb_c.py +++ b/plotly_resampler/aggregation/algorithms/lttb_c.py @@ -2,6 +2,7 @@ import numpy as np + from .lttbc import ( downsample_double_double, downsample_int_double, diff --git a/plotly_resampler/figure_resampler/__init__.py b/plotly_resampler/figure_resampler/__init__.py index f6ecd9a7..39654dc1 100644 --- a/plotly_resampler/figure_resampler/__init__.py +++ b/plotly_resampler/figure_resampler/__init__.py @@ -13,7 +13,6 @@ from .figure_resampler import FigureResampler from .figurewidget_resampler import FigureWidgetResampler - __all__ = [ "FigureResampler", "FigureWidgetResampler", diff --git a/plotly_resampler/figure_resampler/figure_resampler.py b/plotly_resampler/figure_resampler/figure_resampler.py index b62e2a1e..f9861634 100644 --- a/plotly_resampler/figure_resampler/figure_resampler.py +++ b/plotly_resampler/figure_resampler/figure_resampler.py @@ -10,18 +10,18 @@ __author__ = "Jonas Van Der Donckt, Jeroen Van Der Donckt, Emiel Deprost" +import base64 +import uuid import warnings -from typing import Tuple, List +from typing import List, Tuple -import uuid -import base64 import dash import plotly.graph_objects as go from flask_cors import cross_origin +from graph_reporter import GraphReporter from jupyter_dash import JupyterDash from plotly.basedatatypes import BaseFigure from trace_updater import TraceUpdater -from graph_reporter import GraphReporter from ..aggregation import AbstractSeriesAggregator, EfficientLTTB from .figure_resampler_interface import AbstractFigureAggregator @@ -92,19 +92,19 @@ def _display_inline_output(self, dashboard_url, width, height):
@@ -185,19 +185,19 @@ class FigureResampler(AbstractFigureAggregator, go.Figure): """Data aggregation functionality for ``go.Figures``.""" def __init__( - self, - figure: BaseFigure | dict = None, - convert_existing_traces: bool = True, - default_n_shown_samples: int = 1000, - default_downsampler: AbstractSeriesAggregator = EfficientLTTB(), - resampled_trace_prefix_suffix: Tuple[str, str] = ( - '[R] ', - "", - ), - show_mean_aggregation_size: bool = True, - convert_traces_kwargs: dict | None = None, - verbose: bool = False, - show_dash_kwargs: dict | None = None, + self, + figure: BaseFigure | dict = None, + convert_existing_traces: bool = True, + default_n_shown_samples: int = 1000, + default_downsampler: AbstractSeriesAggregator = EfficientLTTB(), + resampled_trace_prefix_suffix: Tuple[str, str] = ( + '[R] ', + "", + ), + show_mean_aggregation_size: bool = True, + convert_traces_kwargs: dict | None = None, + verbose: bool = False, + show_dash_kwargs: dict | None = None, ): """Initialize a dynamic aggregation data mirror using a dash web app. @@ -265,7 +265,7 @@ def __init__( f._grid_ref = figure._grid_ref f.add_traces(figure.data) elif isinstance(figure, dict) and ( - "data" in figure or "layout" in figure # or "frames" in figure # TODO + "data" in figure or "layout" in figure # or "frames" in figure # TODO ): # A figure as a dict, can be; # - a plotly figure as a dict (after calling `fig.to_dict()`) @@ -325,11 +325,11 @@ def __init__( self._host: str | None = None def show_dash( - self, - mode=None, - config: dict | None = None, - graph_properties: dict | None = None, - **kwargs, + self, + mode=None, + config: dict | None = None, + graph_properties: dict | None = None, + **kwargs, ): """Registers the :func:`update_graph` callback & show the figure in a dash app. @@ -375,7 +375,7 @@ def show_dash( """ available_modes = ["external", "inline", "inline_persistent", "jupyterlab"] assert ( - mode is None or mode in available_modes + mode is None or mode in available_modes ), f"mode must be one of {available_modes}" graph_properties = {} if graph_properties is None else graph_properties assert "config" not in graph_properties.keys() # There is a param for config @@ -396,7 +396,7 @@ def show_dash( TraceUpdater( id="trace-updater", gdID="resample-figure", sequentialUpdate=False ), - GraphReporter(id='graph-reporter', gId='resample-figure'), + GraphReporter(id="graph-reporter", gId="resample-figure"), ] ) self.register_update_graph_callback(app, "resample-figure", "trace-updater") @@ -447,7 +447,7 @@ def stop_server(self, warn: bool = True): # TODO: check if i should put the clientside callback to fill the store here or in a different function # for now, here def register_update_graph_callback( - self, app: dash.Dash, graph_id: str, trace_updater_id: str, store_id: str + self, app: dash.Dash, graph_id: str, trace_updater_id: str, store_id: str ): """Register the :func:`construct_update_data` method as callback function to the passed dash-app. @@ -473,7 +473,7 @@ def register_update_graph_callback( # this includes hiding traces or making them visible again, which is the # desired use-case app.clientside_callback( - ''' + """ function(restyleData, gdID) { // HELPER FUNCTIONS @@ -517,10 +517,10 @@ def register_update_graph_callback( //console.log(storeData); return storeData; } - ''', + """, dash.dependencies.Output(store_id, "data"), dash.dependencies.Input(graph_id, "restyleData"), - dash.dependencies.State(graph_id, "id") + dash.dependencies.State(graph_id, "id"), ) app.callback( diff --git a/plotly_resampler/figure_resampler/figure_resampler_interface.py b/plotly_resampler/figure_resampler/figure_resampler_interface.py index 715e201b..78f6bd42 100644 --- a/plotly_resampler/figure_resampler/figure_resampler_interface.py +++ b/plotly_resampler/figure_resampler/figure_resampler_interface.py @@ -13,21 +13,20 @@ __author__ = "Jonas Van Der Donckt, Jeroen Van Der Donckt, Emiel Deprost" import re +from abc import ABC +from collections import namedtuple from copy import copy from typing import Dict, Iterable, List, Optional, Tuple, Union from uuid import uuid4 -from collections import namedtuple import dash import numpy as np import pandas as pd import plotly.graph_objects as go -from plotly.basedatatypes import BaseTraceType, BaseFigure +from plotly.basedatatypes import BaseFigure, BaseTraceType from ..aggregation import AbstractSeriesAggregator, EfficientLTTB -from .utils import round_td_str, round_number_str - -from abc import ABC +from .utils import round_number_str, round_td_str _hf_data_container = namedtuple("DataContainer", ["x", "y", "text", "hovertext"]) @@ -38,19 +37,19 @@ class AbstractFigureAggregator(BaseFigure, ABC): _high_frequency_traces = ["scatter", "scattergl"] def __init__( - self, - figure: BaseFigure, - convert_existing_traces: bool = True, - default_n_shown_samples: int = 1000, - default_downsampler: AbstractSeriesAggregator = EfficientLTTB(), - resampled_trace_prefix_suffix: Tuple[str, str] = ( - '[R] ', - "", - ), - show_mean_aggregation_size: bool = True, - convert_traces_kwargs: dict | None = None, - verbose: bool = False, - # TODO: add c_width parameter + self, + figure: BaseFigure, + convert_existing_traces: bool = True, + default_n_shown_samples: int = 1000, + default_downsampler: AbstractSeriesAggregator = EfficientLTTB(), + resampled_trace_prefix_suffix: Tuple[str, str] = ( + '[R] ', + "", + ), + show_mean_aggregation_size: bool = True, + convert_traces_kwargs: dict | None = None, + verbose: bool = False, + # TODO: add c_width parameter ): """Instantiate a resampling data mirror. @@ -209,10 +208,10 @@ def _get_current_graph(self) -> dict: } def _check_update_trace_data( - self, - trace: dict, - start=None, - end=None, + self, + trace: dict, + start=None, + end=None, ) -> Optional[Union[dict, BaseTraceType]]: """Check and update the passed ddfkj``trace`` its data properties based on the slice range. @@ -313,7 +312,7 @@ def _check_update_trace_data( else: # When not resampled: trim prefix and/or suffix if necessary if len(self._prefix) and name.startswith(self._prefix): - name = name[len(self._prefix):] + name = name[len(self._prefix) :] if len(self._suffix) and trace["name"].endswith(self._suffix): name = name[: -len(self._suffix)] trace["name"] = name @@ -343,13 +342,13 @@ def _check_update_trace_data( return None def _check_update_figure_dict( - self, - figure: dict, - start: Optional[Union[float, str]] = None, - stop: Optional[Union[float, str]] = None, - xaxis_filter: str = None, - updated_trace_indices: Optional[List[int]] = None, - indices_to_use: Optional[List[int]] = None, + self, + figure: dict, + start: Optional[Union[float, str]] = None, + stop: Optional[Union[float, str]] = None, + xaxis_filter: str = None, + updated_trace_indices: Optional[List[int]] = None, + indices_to_use: Optional[List[int]] = None, ) -> List[int]: """Check and update the traces within the figure dict. @@ -415,8 +414,8 @@ def _check_update_figure_dict( if x_anchor_trace is not None: xaxis_matches = ( figure["layout"] - .get("xaxis" + x_anchor_trace.lstrip("x"), {}) - .get("matches") + .get("xaxis" + x_anchor_trace.lstrip("x"), {}) + .get("matches") ) else: xaxis_matches = figure["layout"].get("xaxis", {}).get("matches") @@ -435,14 +434,14 @@ def _check_update_figure_dict( # * x_axis_filter_short not in [x_anchor or xaxis matches] for # NON first rows if ( - xaxis_filter_short == "x" - and ( - x_anchor_trace not in [None, "x"] - and xaxis_matches != xaxis_filter_short - ) + xaxis_filter_short == "x" + and ( + x_anchor_trace not in [None, "x"] + and xaxis_matches != xaxis_filter_short + ) ) or ( - xaxis_filter_short != "x" - and (xaxis_filter_short not in [x_anchor_trace, xaxis_matches]) + xaxis_filter_short != "x" + and (xaxis_filter_short not in [x_anchor_trace, xaxis_matches]) ): continue @@ -482,9 +481,9 @@ def _get_figure_class(constr: type) -> type: @staticmethod def _slice_time( - hf_series: pd.Series, - t_start: Optional[pd.Timestamp] = None, - t_stop: Optional[pd.Timestamp] = None, + hf_series: pd.Series, + t_start: Optional[pd.Timestamp] = None, + t_stop: Optional[pd.Timestamp] = None, ) -> pd.Series: """Slice the time-indexed ``hf_series`` for the passed pd.Timestamps. @@ -511,7 +510,7 @@ def _slice_time( """ def to_same_tz( - ts: Union[pd.Timestamp, None], reference_tz=hf_series.index.tz + ts: Union[pd.Timestamp, None], reference_tz=hf_series.index.tz ) -> Union[pd.Timestamp, None]: """Adjust `ts` its timezone to the `reference_tz`.""" if ts is None: @@ -529,7 +528,7 @@ def to_same_tz( if t_start is not None and t_stop is not None: assert t_start.tz == t_stop.tz - return hf_series[to_same_tz(t_start): to_same_tz(t_stop)] + return hf_series[to_same_tz(t_start) : to_same_tz(t_stop)] @property def hf_data(self): @@ -595,12 +594,12 @@ def _to_hf_series(x: np.ndarray, y: np.ndarray) -> pd.Series: ) def _parse_get_trace_props( - self, - trace: BaseTraceType, - hf_x: Iterable = None, - hf_y: Iterable = None, - hf_text: Iterable = None, - hf_hovertext: Iterable = None, + self, + trace: BaseTraceType, + hf_x: Iterable = None, + hf_y: Iterable = None, + hf_text: Iterable = None, + hf_hovertext: Iterable = None, ) -> _hf_data_container: """Parse and capture the possibly high-frequency trace-props in a datacontainer. @@ -733,12 +732,12 @@ def _parse_get_trace_props( return _hf_data_container(hf_x, hf_y, hf_text, hf_hovertext) def _construct_hf_data_dict( - self, - dc: _hf_data_container, - trace: BaseTraceType, - downsampler: AbstractSeriesAggregator | None, - max_n_samples: int | None, - offset=0, + self, + dc: _hf_data_container, + trace: BaseTraceType, + downsampler: AbstractSeriesAggregator | None, + max_n_samples: int | None, + offset=0, ) -> dict: """Create the `hf_data` dict which will be put in the `_hf_data` property. @@ -804,17 +803,17 @@ def _construct_hf_data_dict( } def add_trace( - self, - trace: Union[BaseTraceType, dict], - max_n_samples: int = None, - downsampler: AbstractSeriesAggregator = None, - limit_to_view: bool = False, - # Use these if you want some speedups (and are working with really large data) - hf_x: Iterable = None, - hf_y: Iterable = None, - hf_text: Union[str, Iterable] = None, - hf_hovertext: Union[str, Iterable] = None, - **trace_kwargs, + self, + trace: Union[BaseTraceType, dict], + max_n_samples: int = None, + downsampler: AbstractSeriesAggregator = None, + limit_to_view: bool = False, + # Use these if you want some speedups (and are working with really large data) + hf_x: Iterable = None, + hf_y: Iterable = None, + hf_text: Union[str, Iterable] = None, + hf_hovertext: Union[str, Iterable] = None, + **trace_kwargs, ): """Add a trace to the figure. @@ -977,14 +976,14 @@ def add_trace( return super(self._figure_class, self).add_trace(trace, **trace_kwargs) def add_traces( - self, - data: List[BaseTraceType | dict] | BaseTraceType | Dict, - max_n_samples: None | List[int] | int = None, - downsamplers: None - | List[AbstractSeriesAggregator] - | AbstractFigureAggregator = None, - limit_to_views: List[bool] | bool = False, - **traces_kwargs, + self, + data: List[BaseTraceType | dict] | BaseTraceType | Dict, + max_n_samples: None | List[int] | int = None, + downsamplers: None + | List[AbstractSeriesAggregator] + | AbstractFigureAggregator = None, + limit_to_views: List[bool] | bool = False, + **traces_kwargs, ): """Add traces to the figure. @@ -1066,11 +1065,11 @@ def add_traces( limit_to_views = [limit_to_views] * len(data) for i, (trace, max_out, downsampler, limit_to_view) in enumerate( - zip(data, max_n_samples, downsamplers, limit_to_views) + zip(data, max_n_samples, downsamplers, limit_to_views) ): if ( - trace.type.lower() not in self._high_frequency_traces - or self._hf_data.get(trace.uid) is not None + trace.type.lower() not in self._high_frequency_traces + or self._hf_data.get(trace.uid) is not None ): continue @@ -1160,7 +1159,9 @@ def replace(self, figure: go.Figure, convert_existing_traces: bool = True): resampled_trace_prefix_suffix=(self._prefix, self._suffix), ) - def construct_update_data(self, relayout_data: dict, trace_visibility: dict) -> List[dict]: + def construct_update_data( + self, relayout_data: dict, trace_visibility: dict + ) -> List[dict]: """Construct the to-be-updated front-end data, based on the layout change. Attention @@ -1194,7 +1195,10 @@ def construct_update_data(self, relayout_data: dict, trace_visibility: dict) -> """ - if len(trace_visibility["visible"]) == 0 and len(trace_visibility["invisible"]) == 0: + if ( + len(trace_visibility["visible"]) == 0 + and len(trace_visibility["invisible"]) == 0 + ): visible_trace_idx = [i for i, trace in enumerate(self._data)] else: visible_trace_idx = trace_visibility["visible"] @@ -1226,9 +1230,11 @@ def construct_update_data(self, relayout_data: dict, trace_visibility: dict) -> stop=relayout_data[t_stop_key], xaxis_filter=xaxis, updated_trace_indices=updated_trace_indices, - indices_to_use=visible_trace_idx + indices_to_use=visible_trace_idx, ) - print(updated_trace_indices) # only contains ints (the indices of the updated traces) + print( + updated_trace_indices + ) # only contains ints (the indices of the updated traces) # 2. The user clicked on either autorange | reset axes autorange_matches = self._re_matches( @@ -1244,7 +1250,7 @@ def construct_update_data(self, relayout_data: dict, trace_visibility: dict) -> current_graph, xaxis_filter=xaxis, updated_trace_indices=updated_trace_indices, - indices_to_use=visible_trace_idx + indices_to_use=visible_trace_idx, ) # 2.1. Autorange -> do nothing, the autorange will be applied on the # current front-end view @@ -1287,10 +1293,11 @@ def construct_update_data(self, relayout_data: dict, trace_visibility: dict) -> # print(layout_traces_list) return layout_traces_list - def construct_invisible_update_data(self, visible_update: int, relayout_data, - trace_visibility: dict): + def construct_invisible_update_data( + self, visible_update: int, relayout_data, trace_visibility: dict + ): invisible_trace_idx = trace_visibility["invisible"] - print(f'invisible_trace_idx: {invisible_trace_idx}') + print(f"invisible_trace_idx: {invisible_trace_idx}") # import json # import datetime # with open(f'figure_{datetime.datetime.now().strftime("%H_%M")}.json', 'w') as f: @@ -1318,7 +1325,7 @@ def construct_invisible_update_data(self, visible_update: int, relayout_data, stop=relayout_data[t_stop_key], xaxis_filter=xaxis, updated_trace_indices=updated_trace_indices, - indices_to_use=invisible_trace_idx + indices_to_use=invisible_trace_idx, ) # print(updated_trace_indices) #only contains ints (the indices of the updated traces) @@ -1336,7 +1343,7 @@ def construct_invisible_update_data(self, visible_update: int, relayout_data, current_graph, xaxis_filter=xaxis, updated_trace_indices=updated_trace_indices, - indices_to_use=invisible_trace_idx + indices_to_use=invisible_trace_idx, ) # 2.1. Autorange -> do nothing, the autorange will be applied on the # current front-end view diff --git a/plotly_resampler/figure_resampler/figurewidget_resampler.py b/plotly_resampler/figure_resampler/figurewidget_resampler.py index d7fe4d6e..1d3397fc 100644 --- a/plotly_resampler/figure_resampler/figurewidget_resampler.py +++ b/plotly_resampler/figure_resampler/figurewidget_resampler.py @@ -66,7 +66,7 @@ def __init__( f._grid_ref = figure._grid_ref f.add_traces(figure.data) elif isinstance(figure, dict) and ( - "data" in figure or "layout" in figure # or "frames" in figure # TODO + "data" in figure or "layout" in figure # or "frames" in figure # TODO ): # A figure as a dict, can be; # - a plotly figure as a dict (after calling `fig.to_dict()`) diff --git a/plotly_resampler/figure_resampler/utils.py b/plotly_resampler/figure_resampler/utils.py index d868ee59..648dfdb4 100644 --- a/plotly_resampler/figure_resampler/utils.py +++ b/plotly_resampler/figure_resampler/utils.py @@ -1,10 +1,11 @@ """Utility functions for the figure_resampler submodule.""" import math -import pandas as pd +import pandas as pd from plotly.basedatatypes import BaseFigure -try: # Fails when IPywidgets is not installed + +try: # Fails when IPywidgets is not installed from plotly.basewidget import BaseFigureWidget except (ImportError, ModuleNotFoundError): BaseFigureWidget = type(None) diff --git a/plotly_resampler/registering.py b/plotly_resampler/registering.py index 467c8e1a..e12a9d8b 100644 --- a/plotly_resampler/registering.py +++ b/plotly_resampler/registering.py @@ -2,13 +2,14 @@ __author__ = "Jeroen Van Der Donckt, Jonas Van Der Donckt, Emiel Deprost" +from functools import wraps + +import plotly + from plotly_resampler import FigureResampler, FigureWidgetResampler from plotly_resampler.figure_resampler.figure_resampler_interface import ( AbstractFigureAggregator, ) -from functools import wraps - -import plotly WRAPPED_PREFIX = "[Plotly-Resampler]__" PLOTLY_MODULES = [ diff --git a/tests/conftest.py b/tests/conftest.py index 777d1b4f..f106505f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,16 +1,21 @@ """Fixtures and helper functions for testing""" +import os from typing import Union -import os import numpy as np import pandas as pd import plotly.graph_objects as go import pytest from plotly.subplots import make_subplots -from plotly_resampler import FigureResampler, LTTB, EveryNthPoint, unregister_plotly_resampler +from plotly_resampler import ( + LTTB, + EveryNthPoint, + FigureResampler, + unregister_plotly_resampler, +) # hyperparameters _nb_samples = 10_000 @@ -31,6 +36,7 @@ def _remove_file(file_path): if os.path.exists(file_path): os.remove(file_path) + @pytest.fixture def pickle_figure(): FIG_PATH = "fig.pkl" @@ -41,17 +47,18 @@ def pickle_figure(): @pytest.fixture def driver(): - from seleniumwire import webdriver - from webdriver_manager.chrome import ChromeDriverManager, ChromeType + import time + from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.desired_capabilities import DesiredCapabilities + from seleniumwire import webdriver + from webdriver_manager.chrome import ChromeDriverManager, ChromeType - import time time.sleep(1) - + options = Options() d = DesiredCapabilities.CHROME - d['goog:loggingPrefs'] = {'browser': 'ALL'} + d["goog:loggingPrefs"] = {"browser": "ALL"} if not TESTING_LOCAL: if headless: options.add_argument("--headless") diff --git a/tests/fr_selenium.py b/tests/fr_selenium.py index aca41414..4e431be8 100644 --- a/tests/fr_selenium.py +++ b/tests/fr_selenium.py @@ -11,25 +11,25 @@ __author__ = "Jonas Van Der Donckt, Jeroen Van Der Donckt" -import sys import json +import sys import time from typing import List, Union -from seleniumwire import webdriver -from seleniumwire.request import Request from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.common.by import By from selenium.webdriver.firefox.options import Options from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import WebDriverWait +from seleniumwire import webdriver +from seleniumwire.request import Request def not_on_linux(): """Return True if the current platform is not Linux. - - Note: this will be used to add more waiting time to windows & mac os tests as - - on these OS's serialization of the figure is necessary (to start the dash app in a + + Note: this will be used to add more waiting time to windows & mac os tests as + - on these OS's serialization of the figure is necessary (to start the dash app in a multiprocessing.Process) https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods - on linux, the browser (i.e., sending & getting requests) goes a lot faster @@ -159,7 +159,7 @@ def browser_independent_single_callback_request_assert( class FigureResamplerGUITests: - """Wrapper for performing figure-resampler GUI. """ + """Wrapper for performing figure-resampler GUI.""" def __init__(self, driver: webdriver, port: int): """Construct an instance of A firefox selenium driver to fetch wearable data. @@ -185,7 +185,8 @@ def go_to_page(self): time.sleep(1) self.driver.get("http://localhost:{}".format(self.port)) self.on_page = True - if not_on_linux(): time.sleep(7) # bcs serialization of multiprocessing + if not_on_linux(): + time.sleep(7) # bcs serialization of multiprocessing max_nb_tries = 3 for _ in range(max_nb_tries): try: @@ -199,7 +200,8 @@ def clear_requests(self, sleep_time_s=1): del self.driver.requests def get_requests(self, delete: bool = True): - if not_on_linux(): time.sleep(2) # bcs slower browser + if not_on_linux(): + time.sleep(2) # bcs slower browser requests = self.driver.requests if delete: self.clear_requests() diff --git a/tests/test_aggregators.py b/tests/test_aggregators.py index 1287cca5..f6d60122 100644 --- a/tests/test_aggregators.py +++ b/tests/test_aggregators.py @@ -1,16 +1,18 @@ +import numpy as np +import pandas as pd +import pytest + from plotly_resampler.aggregation import ( + LTTB, + EfficientLTTB, EveryNthPoint, FuncAggregator, - LTTB, - MinMaxOverlapAggregator, MinMaxAggregator, - EfficientLTTB, + MinMaxOverlapAggregator, ) -from plotly_resampler.aggregation.algorithms.lttb_py import LTTB_core_py from plotly_resampler.aggregation.algorithms.lttb_c import LTTB_core_c -import pandas as pd -import numpy as np -import pytest +from plotly_resampler.aggregation.algorithms.lttb_py import LTTB_core_py + # --------------------------------- EveryNthPoint ------------------------------------ def test_every_nth_point_float_time_data(float_series): @@ -90,7 +92,7 @@ def test_every_nth_point_bool_sequence_data(bool_series): def test_every_nth_point_empty_series(): - empty_series = pd.Series(name="empty", dtype='float32') + empty_series = pd.Series(name="empty", dtype="float32") out = EveryNthPoint(interleave_gaps=True).aggregate(empty_series, n_out=1_000) assert out.equals(empty_series) @@ -232,7 +234,7 @@ def test_mmo_bool_sequence_data(bool_series): def test_mmo_empty_series(): - empty_series = pd.Series(name="empty", dtype='float32') + empty_series = pd.Series(name="empty", dtype="float32") out = MinMaxOverlapAggregator(interleave_gaps=True).aggregate( empty_series, n_out=1_000 ) @@ -646,7 +648,7 @@ def treat_string_as_numeric_data(x): # ------------------------------- LTTB_Bindings ------------------------------- def test_lttb_bindings(): - # Test whether both algorithms produce the same results with different types of + # Test whether both algorithms produce the same results with different types of # input data n = np.random.randint(low=1_000_000, high=2_000_000) x_int = np.arange(n, dtype="int64") diff --git a/tests/test_composability.py b/tests/test_composability.py index ca5b1917..dd632006 100644 --- a/tests/test_composability.py +++ b/tests/test_composability.py @@ -1,8 +1,9 @@ from random import sample + import plotly.graph_objects as go from plotly.subplots import make_subplots -from plotly_resampler import FigureResampler, FigureWidgetResampler +from plotly_resampler import FigureResampler, FigureWidgetResampler # ----------------------- Figure as Base ----------------------- if True: diff --git a/tests/test_figure_resampler.py b/tests/test_figure_resampler.py index 4b08946b..a9814dcb 100644 --- a/tests/test_figure_resampler.py +++ b/tests/test_figure_resampler.py @@ -3,15 +3,17 @@ __author__ = "Jonas Van Der Donckt, Jeroen Van Der Donckt, Emiel Deprost" -import pytest +import multiprocessing import time +from typing import List + import numpy as np import pandas as pd -import multiprocessing import plotly.graph_objects as go +import pytest from plotly.subplots import make_subplots -from plotly_resampler import FigureResampler, LTTB, EveryNthPoint -from typing import List + +from plotly_resampler import LTTB, EveryNthPoint, FigureResampler def test_add_trace_kwarg_space(float_series, bool_series, cat_series): @@ -663,10 +665,11 @@ def test_manual_jupyterdashpersistentinline(): # no need to start the app (we just need the FigureResampler object) + import dash + from plotly_resampler.figure_resampler.figure_resampler import ( JupyterDashPersistentInlineOutput, ) - import dash app = JupyterDashPersistentInlineOutput("manual_app") assert hasattr(app, "_uid") diff --git a/tests/test_figure_resampler_selenium.py b/tests/test_figure_resampler_selenium.py index 31dbc45f..4e64ff13 100644 --- a/tests/test_figure_resampler_selenium.py +++ b/tests/test_figure_resampler_selenium.py @@ -549,7 +549,7 @@ def test_shared_hover_gui(driver, shared_hover_figure): fr.drag_and_zoom("x3y2", x0=0.1, x1=0.5, y0=0.5, y1=0.5) - # First, apply some horizontal based zooms + # First, apply some horizontal based zooms fr.clear_requests(sleep_time_s=1) fr.drag_and_zoom("x3y3", x0=0.1, x1=0.5, y0=0.5, y1=0.5) time.sleep(1) @@ -628,7 +628,7 @@ def test_multi_trace_go_figure(driver, multi_trace_go_figure): fr.drag_and_zoom("xy", x0=0.1, x1=0.3, y0=0.6, y1=0.9) fr.clear_requests(sleep_time_s=3) - # First, apply some horizontal based zooms + # First, apply some horizontal based zooms fr.clear_requests(sleep_time_s=1) fr.drag_and_zoom("xy", x0=0.1, x1=0.2, y0=0.5, y1=0.5) time.sleep(3) diff --git a/tests/test_figurewidget_resampler.py b/tests/test_figurewidget_resampler.py index 52301783..a8847e48 100644 --- a/tests/test_figurewidget_resampler.py +++ b/tests/test_figurewidget_resampler.py @@ -12,6 +12,7 @@ import plotly.graph_objects as go import pytest from plotly.subplots import make_subplots + from plotly_resampler import EfficientLTTB, EveryNthPoint, FigureWidgetResampler @@ -1795,7 +1796,9 @@ def test_fwr_object_bool_data(bool_series): def test_fwr_object_binary_data(): - binary_series = np.array([0, 1]*20, dtype="int32") # as this is << max_n_samples -> limit_to_view + binary_series = np.array( + [0, 1] * 20, dtype="int32" + ) # as this is << max_n_samples -> limit_to_view # First try with the original non-object binary series fig = FigureWidgetResampler() @@ -1812,7 +1815,9 @@ def test_fwr_object_binary_data(): fig.add_trace({"name": "s0"}, hf_y=binary_series_o, limit_to_view=True) assert binary_series_o.dtype == object assert len(fig.hf_data) == 1 - assert (fig.hf_data[0]["y"].dtype == "int32") or (fig.hf_data[0]["y"].dtype == "int64") + assert (fig.hf_data[0]["y"].dtype == "int32") or ( + fig.hf_data[0]["y"].dtype == "int64" + ) assert str(fig.data[0]["y"].dtype).startswith("int") assert np.all(fig.data[0]["y"] == binary_series) @@ -1833,7 +1838,7 @@ def test_fwr_copy_grid(): assert fwr._grid_ref == f._grid_ref assert fwr._grid_str is not None assert fwr._grid_str == f._grid_str - + ## go.FigureWidget fw = go.FigureWidget(f) assert fw._grid_ref is not None @@ -1858,6 +1863,7 @@ def test_fwr_copy_grid(): ## FigureResampler from plotly_resampler import FigureResampler + fr = FigureResampler(f) assert fr._grid_ref is not None assert fr._grid_str is not None diff --git a/tests/test_registering.py b/tests/test_registering.py index 421872d8..b2b8ac95 100644 --- a/tests/test_registering.py +++ b/tests/test_registering.py @@ -1,19 +1,20 @@ -import plotly.graph_objects as go -import plotly.express as px +from inspect import isfunction + import numpy as np +import plotly.express as px +import plotly.graph_objects as go from plotly_resampler import FigureResampler, FigureWidgetResampler from plotly_resampler.figure_resampler.figure_resampler_interface import ( AbstractFigureAggregator, ) from plotly_resampler.registering import ( + _get_plotly_constr, register_plotly_resampler, unregister_plotly_resampler, - _get_plotly_constr, ) from .conftest import registering_cleanup -from inspect import isfunction def test_get_plotly_const(registering_cleanup): diff --git a/tests/test_serialization.py b/tests/test_serialization.py index 182ae788..50221267 100644 --- a/tests/test_serialization.py +++ b/tests/test_serialization.py @@ -1,26 +1,27 @@ -from hashlib import sha1 -import plotly.graph_objects as go -import plotly.express as px -import numpy as np -import pickle import copy +import pickle +from hashlib import sha1 +from inspect import isfunction +import numpy as np +import plotly.express as px +import plotly.graph_objects as go from plotly.subplots import make_subplots + from plotly_resampler import FigureResampler, FigureWidgetResampler from plotly_resampler.registering import ( + _get_plotly_constr, register_plotly_resampler, unregister_plotly_resampler, - _get_plotly_constr, ) -from .conftest import registering_cleanup, pickle_figure -from inspect import isfunction - +from .conftest import pickle_figure, registering_cleanup #### PICKLING ## Test basic pickling + def test_pickle_figure_resampler(pickle_figure): nb_traces = 3 nb_samples = 5_007 @@ -49,18 +50,21 @@ def test_pickle_figure_resampler(pickle_figure): # Test for figure with subplots (check non-pickled private properties) fig = FigureResampler( make_subplots(rows=2, cols=1, shared_xaxes=True), - default_n_shown_samples=50, show_dash_kwargs=dict(port=8051), + default_n_shown_samples=50, + show_dash_kwargs=dict(port=8051), ) for i in range(nb_traces): fig.add_trace( - go.Scattergl(name=f"trace--{i}"), hf_y=np.arange(nb_samples), - row=(i % 2) + 1, col=1, + go.Scattergl(name=f"trace--{i}"), + hf_y=np.arange(nb_samples), + row=(i % 2) + 1, + col=1, ) assert fig._global_n_shown_samples == 50 assert fig._show_dash_kwargs["port"] == 8051 assert fig._figure_class == go.Figure - assert fig._xaxis_list == ['xaxis', 'xaxis2'] - assert fig._yaxis_list == ['yaxis', 'yaxis2'] + assert fig._xaxis_list == ["xaxis", "xaxis2"] + assert fig._yaxis_list == ["yaxis", "yaxis2"] pickle.dump(fig, open(pickle_figure, "wb")) fig_pickle = pickle.load(open(pickle_figure, "rb")) @@ -69,8 +73,8 @@ def test_pickle_figure_resampler(pickle_figure): assert fig_pickle._global_n_shown_samples == 50 assert fig_pickle._show_dash_kwargs["port"] == 8051 assert fig_pickle._figure_class == go.Figure - assert fig_pickle._xaxis_list == ['xaxis', 'xaxis2'] - assert fig_pickle._yaxis_list == ['yaxis', 'yaxis2'] + assert fig_pickle._xaxis_list == ["xaxis", "xaxis2"] + assert fig_pickle._yaxis_list == ["yaxis", "yaxis2"] assert len(fig_pickle.data) == nb_traces assert len(fig_pickle.hf_data) == nb_traces for i in range(nb_traces): @@ -113,13 +117,15 @@ def test_pickle_figurewidget_resampler(pickle_figure): ) for i in range(nb_traces): fig.add_trace( - go.Scattergl(name=f"trace--{i}"), hf_y=np.arange(nb_samples), - row=(i % 2) + 1, col=1, + go.Scattergl(name=f"trace--{i}"), + hf_y=np.arange(nb_samples), + row=(i % 2) + 1, + col=1, ) assert fig._global_n_shown_samples == 50 assert fig._figure_class == go.FigureWidget - assert fig._xaxis_list == ['xaxis', 'xaxis2'] - assert fig._yaxis_list == ['yaxis', 'yaxis2'] + assert fig._xaxis_list == ["xaxis", "xaxis2"] + assert fig._yaxis_list == ["yaxis", "yaxis2"] pickle.dump(fig, open(pickle_figure, "wb")) fig_pickle = pickle.load(open(pickle_figure, "rb")) @@ -127,8 +133,8 @@ def test_pickle_figurewidget_resampler(pickle_figure): assert isinstance(fig_pickle, FigureWidgetResampler) assert fig_pickle._global_n_shown_samples == 50 assert fig_pickle._figure_class == go.FigureWidget - assert fig_pickle._xaxis_list == ['xaxis', 'xaxis2'] - assert fig_pickle._yaxis_list == ['yaxis', 'yaxis2'] + assert fig_pickle._xaxis_list == ["xaxis", "xaxis2"] + assert fig_pickle._yaxis_list == ["yaxis", "yaxis2"] assert len(fig_pickle.data) == nb_traces assert len(fig_pickle.hf_data) == nb_traces for i in range(nb_traces): @@ -143,6 +149,7 @@ def test_pickle_figurewidget_resampler(pickle_figure): ## Test pickling when registered + def test_pickle_figure_resampler_registered(registering_cleanup, pickle_figure): nb_traces = 4 nb_samples = 5_043 @@ -316,6 +323,7 @@ def test_pickle_figurewidget_resampler_registered(registering_cleanup, pickle_fi ## Test basic (deep)copy + def test_copy_and_deepcopy_figure_resampler(): nb_traces = 3 nb_samples = 3_243 @@ -395,6 +403,7 @@ def test_copy_and_deepcopy_figurewidget_resampler(): ## Test basic (deep)copy with PR registered + def test_copy_figure_resampler_registered(): nb_traces = 3 nb_samples = 4_069 diff --git a/tests/test_utils.py b/tests/test_utils.py index 2b5f0c96..c9983860 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,15 +1,16 @@ import pandas as pd import plotly.graph_objects as go + +from plotly_resampler import FigureResampler, FigureWidgetResampler from plotly_resampler.figure_resampler.utils import ( is_figure, is_figurewidget, is_fr, is_fwr, - timedelta_to_str, - round_td_str, round_number_str, + round_td_str, + timedelta_to_str, ) -from plotly_resampler import FigureResampler, FigureWidgetResampler def test_is_figure(): From 76e097e343ec4aeba7b236ec9853471447e75dd6 Mon Sep 17 00:00:00 2001 From: Isis Garayalde Vandendriessche Date: Wed, 15 Mar 2023 21:15:18 +0100 Subject: [PATCH 07/15] add TraceUpdater benchmarking with selenium --- examples/dash_apps/usage.py | 46 ------ .../figure_resampler/figure_resampler.py | 12 +- .../figure_resampler_interface.py | 15 +- tests/__init__.py | 0 tests/conftest.py | 4 +- tests/fr_selenium.py | 55 +++++-- tests/minimal_variable_threads.py | 81 +++++++++++ tests/test_visual_gain_threads.py | 137 ++++++++++++++++++ 8 files changed, 274 insertions(+), 76 deletions(-) delete mode 100644 examples/dash_apps/usage.py delete mode 100644 tests/__init__.py create mode 100644 tests/minimal_variable_threads.py create mode 100644 tests/test_visual_gain_threads.py diff --git a/examples/dash_apps/usage.py b/examples/dash_apps/usage.py deleted file mode 100644 index 51d7eaf2..00000000 --- a/examples/dash_apps/usage.py +++ /dev/null @@ -1,46 +0,0 @@ -import dash -import numpy as np -import plotly.graph_objs as go -import trace_updater -from dash import Input, Output, dcc, html -from trace_updater import TraceUpdater - -from plotly_resampler import FigureResampler -from plotly_resampler.aggregation import EveryNthPoint - -# Construct a high-frequency signal -n = 1_000_000 -x = np.arange(n) -noisy_sin = (3 + np.sin(x / 200) + np.random.randn(len(x)) / 10) * x / (n / 10) - -# Construct the to-be resampled figure -fig = FigureResampler( - go.Figure(), - # show_mean_aggregation_size=False, - # default_downsampler=EveryNthPoint(interleave_gaps=False), - default_n_shown_samples=4000, - resampled_trace_prefix_suffix=("", ""), -) -for i in range(100): - fig.add_trace( - go.Scattergl(name=f"sine-{i}", showlegend=True), hf_x=x, hf_y=noisy_sin + 10 * i - ) - - -# Construct app & its layout -app = dash.Dash(__name__) - -app.layout = html.Div( - [ - dcc.Store(id="visible-indices", data={"visible": [], "invisible": []}), - dcc.Graph(id="graph-id", figure=fig), - TraceUpdater(id="trace-updater", gdID="graph-id"), - ] -) - -# Register the callback -fig.register_update_graph_callback(app, "graph-id", "trace-updater", "visible-indices") - - -if __name__ == "__main__": - app.run_server(debug=True, port=8050) diff --git a/plotly_resampler/figure_resampler/figure_resampler.py b/plotly_resampler/figure_resampler/figure_resampler.py index f9861634..045a14ac 100644 --- a/plotly_resampler/figure_resampler/figure_resampler.py +++ b/plotly_resampler/figure_resampler/figure_resampler.py @@ -6,6 +6,7 @@ """ + from __future__ import annotations __author__ = "Jonas Van Der Donckt, Jeroen Van Der Donckt, Emiel Deprost" @@ -15,6 +16,7 @@ import warnings from typing import List, Tuple + import dash import plotly.graph_objects as go from flask_cors import cross_origin @@ -328,6 +330,7 @@ def show_dash( self, mode=None, config: dict | None = None, + testing: bool | None = False, graph_properties: dict | None = None, **kwargs, ): @@ -390,16 +393,17 @@ def show_dash( app = JupyterDash("local_app") app.layout = dash.html.Div( [ + dash.dcc.Store(id="visible-indices", data={"visible": [], "invisible": []}), dash.dcc.Graph( id="resample-figure", figure=self, config=config, **graph_properties ), TraceUpdater( - id="trace-updater", gdID="resample-figure", sequentialUpdate=False + id="trace-updater", gdID="resample-figure", sequentialUpdate=False, verbose=testing ), GraphReporter(id="graph-reporter", gId="resample-figure"), ] ) - self.register_update_graph_callback(app, "resample-figure", "trace-updater") + self.register_update_graph_callback(app, "resample-figure", "trace-updater", 'visible-indices') # 2. Run the app if mode == "inline" and "height" not in kwargs: @@ -524,7 +528,7 @@ def register_update_graph_callback( ) app.callback( - dash.dependencies.Output(trace_updater_id, "visibleUpdateData"), + dash.dependencies.Output(trace_updater_id, "updateData"), dash.dependencies.Input(graph_id, "relayoutData"), # dash.dependencies.State(graph_id, "restyleData"), dash.dependencies.State(store_id, "data"), @@ -533,7 +537,7 @@ def register_update_graph_callback( app.callback( dash.dependencies.Output(trace_updater_id, "invisibleUpdateData"), - # dash.dependencies.Input(trace_updater_id, "visibleUpdateData"), + # dash.dependencies.Input(trace_updater_id, "updateData"), dash.dependencies.Input(trace_updater_id, "visibleUpdate"), dash.dependencies.State(graph_id, "relayoutData"), dash.dependencies.State(store_id, "data"), diff --git a/plotly_resampler/figure_resampler/figure_resampler_interface.py b/plotly_resampler/figure_resampler/figure_resampler_interface.py index 78f6bd42..47bfd956 100644 --- a/plotly_resampler/figure_resampler/figure_resampler_interface.py +++ b/plotly_resampler/figure_resampler/figure_resampler_interface.py @@ -1168,7 +1168,7 @@ def construct_update_data( --------- This method is tightly coupled with Dash app callbacks. It takes the front-end figure its ``relayoutData`` as input and returns the data which needs to be - sent tot the ``TraceUpdater`` its ``visibleUpdateData`` property for that corresponding + sent tot the ``TraceUpdater`` its ``updateData`` property for that corresponding graph. Parameters @@ -1194,7 +1194,6 @@ def construct_update_data( in each dict. """ - if ( len(trace_visibility["visible"]) == 0 and len(trace_visibility["invisible"]) == 0 @@ -1232,9 +1231,6 @@ def construct_update_data( updated_trace_indices=updated_trace_indices, indices_to_use=visible_trace_idx, ) - print( - updated_trace_indices - ) # only contains ints (the indices of the updated traces) # 2. The user clicked on either autorange | reset axes autorange_matches = self._re_matches( @@ -1290,18 +1286,13 @@ def construct_update_data( # the client front-end can know which trace needs to be updated trace_reduced.update({"index": idx}) layout_traces_list.append(trace_reduced) - # print(layout_traces_list) return layout_traces_list def construct_invisible_update_data( self, visible_update: int, relayout_data, trace_visibility: dict ): invisible_trace_idx = trace_visibility["invisible"] - print(f"invisible_trace_idx: {invisible_trace_idx}") - # import json - # import datetime - # with open(f'figure_{datetime.datetime.now().strftime("%H_%M")}.json', 'w') as f: - # json.dump({"data": figure['data']}, f) + current_graph = self._get_current_graph() updated_trace_indices, cl_k = [], [] if relayout_data: @@ -1327,7 +1318,6 @@ def construct_invisible_update_data( updated_trace_indices=updated_trace_indices, indices_to_use=invisible_trace_idx, ) - # print(updated_trace_indices) #only contains ints (the indices of the updated traces) # 2. The user clicked on either autorange | reset axes autorange_matches = self._re_matches( @@ -1383,7 +1373,6 @@ def construct_invisible_update_data( # the client front-end can know which trace needs to be updated trace_reduced.update({"index": idx}) layout_traces_list.append(trace_reduced) - # print(layout_traces_list) return layout_traces_list @staticmethod diff --git a/tests/__init__.py b/tests/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/conftest.py b/tests/conftest.py index f106505f..7da50754 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -20,8 +20,8 @@ # hyperparameters _nb_samples = 10_000 data_dir = "examples/data/" -headless = True -TESTING_LOCAL = False # SET THIS TO TRUE IF YOU ARE TESTING LOCALLY +headless = False +TESTING_LOCAL = True # SET THIS TO TRUE IF YOU ARE TESTING LOCALLY @pytest.fixture diff --git a/tests/fr_selenium.py b/tests/fr_selenium.py index 4e431be8..ec9f0ade 100644 --- a/tests/fr_selenium.py +++ b/tests/fr_selenium.py @@ -12,19 +12,22 @@ __author__ = "Jonas Van Der Donckt, Jeroen Van Der Donckt" import json -import sys import time from typing import List, Union +import sys from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.common.by import By -from selenium.webdriver.firefox.options import Options from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import WebDriverWait from seleniumwire import webdriver from seleniumwire.request import Request - +# Note: this will be used to add more waiting time to windows & mac os tests as +# - on these OS's serialization of the figure is necessary (to start the dash app in a +# multiprocessing.Process) +# https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods +# - on linux, the browser (i.e., sending & getting requests) goes a lot faster def not_on_linux(): """Return True if the current platform is not Linux. @@ -192,7 +195,7 @@ def go_to_page(self): try: self.driver.find_element_by_id("resample-figure") break - except: + except Exception: time.sleep(5) def clear_requests(self, sleep_time_s=1): @@ -208,7 +211,7 @@ def get_requests(self, delete: bool = True): return requests - def drag_and_zoom(self, div_classname, x0=0.25, x1=0.5, y0=0.25, y1=0.5): + def drag_and_zoom(self, div_classname, x0=0.25, x1=0.5, y0=0.25, y1=0.5, testing = False): """ Drags and zooms the div with the given classname. @@ -240,13 +243,18 @@ def drag_and_zoom(self, div_classname, x0=0.25, x1=0.5, y0=0.25, y1=0.5): actions = ActionChains(self.driver) actions.move_to_element_with_offset(subplot, xoffset=w * x0, yoffset=h * y0) actions.click_and_hold() - actions.pause(0.2) + actions.pause(0.1) actions.move_by_offset(xoffset=w * (x1 - x0), yoffset=h * (y1 - y0)) - actions.pause(0.2) - actions.release() - actions.pause(0.2) + actions.pause(0.1) actions.perform() + action = ActionChains(self.driver) + action.release() + if testing: + # self.driver.execute_script("console.log('time update visible');") + self.driver.execute_script("console.time('time (visible)');console.time('time (full)');") + action.perform() + def _get_modebar_btns(self): if not self.on_page: self.go_to_page() @@ -263,11 +271,19 @@ def autoscale(self): ActionChains(self.driver).move_to_element(btn).click().perform() return - def reset_axes(self): + def reset_axes(self, testing = False): for btn in self._get_modebar_btns(): data_title = btn.get_attribute("data-title") if data_title == "Reset axes": - ActionChains(self.driver).move_to_element(btn).click().perform() + ActionChains(self.driver).move_to_element(btn).perform() + + # NOTE: execucte the click right after the log + actions = ActionChains(self.driver) + actions.click() + if testing: + # self.driver.execute_script("console.log('time update visible');") + self.driver.execute_script("console.time('time (visible)');console.time('time (full)');") + actions.perform() return def click_legend_item(self, legend_name): @@ -286,6 +302,23 @@ def click_legend_item(self, legend_name): ) return + def hide_legend_restyle(self, item_numbers): + + # for the moment this only works with 1 graph present? + graph = self.driver.find_elements(by=By.CLASS_NAME, value="js-plotly-plot") + # TODO: find way to scroll down to an element (trace in legend) within an element (legend) + self.driver.execute_script( + "Plotly.restyle(arguments[0], {'visible': ['legendonly']},arguments[1])", + graph[0], + item_numbers, + ) + + def start_timer(self, type): + if type == "zoom": + self.driver.execute_script("console.log('zoom in')") + else: + self.driver.execute_script("console.log('reset')") + # ------------------------------ DATA MODEL METHODS ------------------------------ def __del__(self): self.driver.close() diff --git a/tests/minimal_variable_threads.py b/tests/minimal_variable_threads.py new file mode 100644 index 00000000..685cf569 --- /dev/null +++ b/tests/minimal_variable_threads.py @@ -0,0 +1,81 @@ + +import numpy as np +import plotly.graph_objs as go +# from dash import Input, Output, dcc, html +# from trace_updater import TraceUpdater + +# import sys +# print(sys.path) +# sys.path.append('C:\\Users\\willi\\Documents\\ISIS\\Thesis\\plotly-resampler') + + +from plotly_resampler.figure_resampler import FigureResampler +from plotly_resampler.aggregation import EveryNthPoint + +import argparse +import os + +parser = argparse.ArgumentParser() +parser.add_argument("-n", "--npoints", type=int) +parser.add_argument("-s", "--nsamples", type=int) +parser.add_argument("-t", "--traces", type=int) + +args = parser.parse_args() +n = args.npoints +s = args.nsamples +t = args.traces + +# print(n) +# print(s) +# print(t) + + + +# # Construct a high-frequency signal +# n=1_000_000 +# s=10_000 +# t=10 + +def make_fig(n, s, t): + x = np.arange(n) + noisy_sin = (3 + np.sin(x / 200) + np.random.randn(len(x)) / 10) * x / (n / 10) + print(n/s) + # Construct the to-be resampled figure + fig = FigureResampler( + go.Figure(), + # show_mean_aggregation_size=False, + default_downsampler=EveryNthPoint(interleave_gaps=False), + default_n_shown_samples=s, + resampled_trace_prefix_suffix=("", ""), + ) + for i in range(t): + fig.add_trace( + go.Scattergl(name=f"sine-{i}", showlegend=True), hf_x=x, hf_y=noisy_sin + 10 * i + ) + return fig + + +# Construct app & its layout +# app = dash.Dash(__name__) + +# app.layout = html.Div( +# [ +# dcc.Store(id="visible-indices", data={"visible": [], "invisible": []}), +# dcc.Graph(id="graph-id", figure=fig), +# TraceUpdater(id="trace-updater", gdID="graph-id",verbose=True), +# ] +# ) + +# n=1_000_000 +# s=4000 +# t=100 + +fig = make_fig(n, s, t) +# Register the callback + +fig.show_dash(mode='external', testing=True) +# # fig.register_update_graph_callback(app, "graph-id", "trace-updater", "visible-indices") + + +# if __name__ == "__main__": +# app.run_server(debug=True, port=8050) diff --git a/tests/test_visual_gain_threads.py b/tests/test_visual_gain_threads.py new file mode 100644 index 00000000..fc763b05 --- /dev/null +++ b/tests/test_visual_gain_threads.py @@ -0,0 +1,137 @@ +import multiprocessing +import time +import subprocess as sp +import os +import signal +import psutil as ps + +import json +import numpy as np + +import plotly.graph_objects as go +from plotly_resampler import FigureResampler +from fr_selenium import FigureResamplerGUITests + + + +from selenium.webdriver.chrome.options import Options +from selenium.webdriver.common.desired_capabilities import DesiredCapabilities +from seleniumwire import webdriver +from webdriver_manager.chrome import ChromeDriverManager, ChromeType +from selenium.webdriver.chrome.service import Service as ChromeService + + +# create a test for each value of n_traces, n_datapoints and shown_datapoints + # open new page + # loop over a range of percentages (% of shown traces) + # start timer (in front end via selenium? performance library js) + # apply 50% range zoom + # stop timer when visible update + # start another timer for invisible + # stop timer when invisible renders + # return to original range (may trigger timer in front end... prevent this!!) + # extract logs from this iteration into a file + # close page! + +# d = driver() +options = Options() +d = DesiredCapabilities.CHROME +d["goog:loggingPrefs"] = {"browser": "ALL"} +driver = webdriver.Chrome( + service=ChromeService(ChromeDriverManager(chrome_type=ChromeType.GOOGLE).install()), + # service_args=["--verbose", "--log-path=C:\\Users\\willi\\Documents\\ISIS\\Thesis\\plotly-resampler\\logs"], + options=options, + desired_capabilities=d, + ) +port = 8050 +fr = FigureResamplerGUITests(driver, port=port) + +percentages_hidden = np.array([0, 0.2, 0.5, 0.8, 0.9]) +n_traces = [10, 20, 50] +n_datapoints = [ + 100_000, + 1_000_000, + 10_000_000 + ] +n_shown_datapoints = [ + 100, + 1000, + 4000 + ] + +try: + for t in n_traces: + for n in n_datapoints: + for s in n_shown_datapoints: + time.sleep(2) + proc = sp.Popen(['poetry','run','python','./tests/minimal_variable_threads.py', '-n', str(n), '-s', str(s), '-t', str(t)], + # creationflags=sp.CREATE_NEW_CONSOLE + ) + print(f'n_traces: {t}') + print(f'n_datapoints: {n}') + print(f'n_shown_datapoints: {s}') + try: + time.sleep(20) + fr.go_to_page() + + time.sleep(1) + + # determine the number of traces that will be hidden corresponding to each percentage + n_traces_hidden = np.unique(np.ceil(t*percentages_hidden)).astype(int) + # TODO: get final list of percentages (visible!) and print to console + + # print(n_traces_hidden) + last = t + for idx, j in enumerate(n_traces_hidden): + if idx == 0: + previous_n_hidden = 0 + else: + previous_n_hidden = n_traces_hidden[idx-1] + # hide r traces from the last hidden trace + driver.execute_script(f'console.log("{100-((j/t)*100)}%")') + print(previous_n_hidden) + residual = n_traces_hidden[idx]-previous_n_hidden + print(residual) + residual_indices = [int(last-(i+1)) for i in range(residual)] + last -= residual + if residual_indices != []: + fr.hide_legend_restyle(residual_indices) + + # after hiding the traces, (start the timer,) zoom in, then reset the axes for the next iteration + fr.drag_and_zoom("xy", x0=0.25, x1=0.75, y0=0.5, y1=0.5, testing=True) + #start timer + # fr.start_timer('zoom') + + time.sleep(5) + fr.reset_axes(testing=True) + # fr.start_timer('reset') + time.sleep(5) + with open(f'./logs/n{n}_s{s}_t{t}_everynth.json', 'w') as logfile: + for log in driver.get_log('browser'): + logfile.write(json.dumps(log)) + print('done saving log') + # print(logs) + # print(type(logs)) + except Exception as e: + raise e + finally: + # print(proc.pid) + # p = ps.Process(proc.pid) + # print(f'pid {proc.pid}') + # print(f'process is running {p.is_running()}') + # proc.send_signal(signal.CTRL_C_EVENT) + + #this works with windows! add if clause for Linux version! (proc.kill works?) + os.system("TASKKILL /F /T /PID " + str(proc.pid)) + os.system('killport 8050 --view-only') + # p.kill() + + # os.kill(proc.pid, signal.SIGKILL) + +except Exception as ex: + raise ex +finally: + print('closing driver') + # driver.close() + print(driver is None) + # driver.quit() From 10eb87b85fbc787b9664083337e8cd8b90b77b37 Mon Sep 17 00:00:00 2001 From: "Isis G.V" Date: Wed, 22 Mar 2023 13:01:14 +0100 Subject: [PATCH 08/15] log processing script + small changes to test log writing --- .../figure_resampler/figure_resampler.py | 4 +- tests/conftest.py | 3 +- tests/log_processing.ipynb | 551 ++++++++++++++++++ tests/test_visual_gain_threads.py | 14 +- 4 files changed, 564 insertions(+), 8 deletions(-) create mode 100644 tests/log_processing.ipynb diff --git a/plotly_resampler/figure_resampler/figure_resampler.py b/plotly_resampler/figure_resampler/figure_resampler.py index 045a14ac..11240d66 100644 --- a/plotly_resampler/figure_resampler/figure_resampler.py +++ b/plotly_resampler/figure_resampler/figure_resampler.py @@ -20,7 +20,7 @@ import dash import plotly.graph_objects as go from flask_cors import cross_origin -from graph_reporter import GraphReporter +# from graph_reporter import GraphReporter from jupyter_dash import JupyterDash from plotly.basedatatypes import BaseFigure from trace_updater import TraceUpdater @@ -400,7 +400,7 @@ def show_dash( TraceUpdater( id="trace-updater", gdID="resample-figure", sequentialUpdate=False, verbose=testing ), - GraphReporter(id="graph-reporter", gId="resample-figure"), + # GraphReporter(id="graph-reporter", gId="resample-figure"), ] ) self.register_update_graph_callback(app, "resample-figure", "trace-updater", 'visible-indices') diff --git a/tests/conftest.py b/tests/conftest.py index 7da50754..e4620c15 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -21,7 +21,7 @@ _nb_samples = 10_000 data_dir = "examples/data/" headless = False -TESTING_LOCAL = True # SET THIS TO TRUE IF YOU ARE TESTING LOCALLY +TESTING_LOCAL = False # SET THIS TO TRUE IF YOU ARE TESTING LOCALLY @pytest.fixture @@ -59,6 +59,7 @@ def driver(): options = Options() d = DesiredCapabilities.CHROME d["goog:loggingPrefs"] = {"browser": "ALL"} + d['acceptSslCerts'] = True if not TESTING_LOCAL: if headless: options.add_argument("--headless") diff --git a/tests/log_processing.ipynb b/tests/log_processing.ipynb new file mode 100644 index 00000000..4ccb07b1 --- /dev/null +++ b/tests/log_processing.ipynb @@ -0,0 +1,551 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
levelmessagetimestampdatapointssamplestracessampling algorithm
0INFO100.0%2023-03-19 18:08:13.58710000000100010everynth
1DEBUGtime (visible): 2520.93701171875 ms2023-03-19 18:08:16.89310000000100010everynth
2DEBUGrender time (visible): 56.18798828125 ms2023-03-19 18:08:16.89310000000100010everynth
3DEBUGtime (invisible): 0.797119140625 ms2023-03-19 18:08:16.89410000000100010everynth
4DEBUGtime (full): 2522.634033203125 ms2023-03-19 18:08:16.89510000000100010everynth
........................
1480DEBUGtime (visible): 2136.897705078125 ms2023-03-19 18:33:19.775100000400050everynth
1481DEBUGrender time (visible): 47.073974609375 ms2023-03-19 18:33:19.776100000400050everynth
1482DEBUGtime (invisible): 2320.608154296875 ms2023-03-19 18:33:22.097100000400050everynth
1483DEBUGrender time (invisible): 54.5732421875 ms2023-03-19 18:33:22.097100000400050everynth
1484DEBUGtime (full): 4458.48388671875 ms2023-03-19 18:33:22.097100000400050everynth
\n", + "

1485 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " level message \\\n", + "0 INFO 100.0% \n", + "1 DEBUG time (visible): 2520.93701171875 ms \n", + "2 DEBUG render time (visible): 56.18798828125 ms \n", + "3 DEBUG time (invisible): 0.797119140625 ms \n", + "4 DEBUG time (full): 2522.634033203125 ms \n", + "... ... ... \n", + "1480 DEBUG time (visible): 2136.897705078125 ms \n", + "1481 DEBUG render time (visible): 47.073974609375 ms \n", + "1482 DEBUG time (invisible): 2320.608154296875 ms \n", + "1483 DEBUG render time (invisible): 54.5732421875 ms \n", + "1484 DEBUG time (full): 4458.48388671875 ms \n", + "\n", + " timestamp datapoints samples traces sampling algorithm \n", + "0 2023-03-19 18:08:13.587 10000000 1000 10 everynth \n", + "1 2023-03-19 18:08:16.893 10000000 1000 10 everynth \n", + "2 2023-03-19 18:08:16.893 10000000 1000 10 everynth \n", + "3 2023-03-19 18:08:16.894 10000000 1000 10 everynth \n", + "4 2023-03-19 18:08:16.895 10000000 1000 10 everynth \n", + "... ... ... ... ... ... \n", + "1480 2023-03-19 18:33:19.775 100000 4000 50 everynth \n", + "1481 2023-03-19 18:33:19.776 100000 4000 50 everynth \n", + "1482 2023-03-19 18:33:22.097 100000 4000 50 everynth \n", + "1483 2023-03-19 18:33:22.097 100000 4000 50 everynth \n", + "1484 2023-03-19 18:33:22.097 100000 4000 50 everynth \n", + "\n", + "[1485 rows x 7 columns]" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import json\n", + "import os\n", + "\n", + "# df = pd.read_json('logs/n100000_s100_t10_everynth.json', orient ='index')\n", + "df = pd.DataFrame()\n", + "\n", + "\n", + "for filename in os.listdir('../logs'):\n", + " f = os.path.join('../logs', filename)\n", + " if os.path.isfile(f) & f.endswith(\".json\"):\n", + " # print(filename)\n", + " dft = pd.read_json(f)\n", + " dft['datapoints'] = filename.split('_')[0][1:]\n", + " dft['samples'] = filename.split('_')[1][1:]\n", + " dft['traces'] = filename.split('_')[2][1:]\n", + " dft['sampling algorithm'] = filename.split('_')[3].split('.')[0]\n", + " df = df.append(dft, ignore_index=True)\n", + " \n", + "df['message']= df['message'].str.split('\\\"').str[-2]\n", + "df.drop(df[df['level'].eq('SEVERE')].index, inplace=True)\n", + "df.drop('source', axis=1, inplace=True)\n", + "df.reset_index()\n", + "df\n", + "# warnings = df[df['level'].eq('ERROR')]\n", + "# warnings\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
messagepercentagetimestamp
0time (visible): 2520.93701171875 ms100.0%2023-03-19 18:08:16.893
1render time (visible): 56.18798828125 ms100.0%2023-03-19 18:08:16.893
2time (invisible): 0.797119140625 ms100.0%2023-03-19 18:08:16.894
3time (full): 2522.634033203125 ms100.0%2023-03-19 18:08:16.895
4render time (invisible): None100.0%2023-03-19 18:08:16.896
............
1345time (visible): 2136.897705078125 ms10.0%2023-03-19 18:33:19.775
1346render time (visible): 47.073974609375 ms10.0%2023-03-19 18:33:19.776
1347time (invisible): 2320.608154296875 ms10.0%2023-03-19 18:33:22.097
1348render time (invisible): 54.5732421875 ms10.0%2023-03-19 18:33:22.097
1349time (full): 4458.48388671875 ms10.0%2023-03-19 18:33:22.097
\n", + "

1350 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " message percentage \\\n", + "0 time (visible): 2520.93701171875 ms 100.0% \n", + "1 render time (visible): 56.18798828125 ms 100.0% \n", + "2 time (invisible): 0.797119140625 ms 100.0% \n", + "3 time (full): 2522.634033203125 ms 100.0% \n", + "4 render time (invisible): None 100.0% \n", + "... ... ... \n", + "1345 time (visible): 2136.897705078125 ms 10.0% \n", + "1346 render time (visible): 47.073974609375 ms 10.0% \n", + "1347 time (invisible): 2320.608154296875 ms 10.0% \n", + "1348 render time (invisible): 54.5732421875 ms 10.0% \n", + "1349 time (full): 4458.48388671875 ms 10.0% \n", + "\n", + " timestamp \n", + "0 2023-03-19 18:08:16.893 \n", + "1 2023-03-19 18:08:16.893 \n", + "2 2023-03-19 18:08:16.894 \n", + "3 2023-03-19 18:08:16.895 \n", + "4 2023-03-19 18:08:16.896 \n", + "... ... \n", + "1345 2023-03-19 18:33:19.775 \n", + "1346 2023-03-19 18:33:19.776 \n", + "1347 2023-03-19 18:33:22.097 \n", + "1348 2023-03-19 18:33:22.097 \n", + "1349 2023-03-19 18:33:22.097 \n", + "\n", + "[1350 rows x 3 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mask = df['message'].str.contains('%')\n", + "percentage = df[mask]\n", + "percentage\n", + "df.loc[mask, 'percentage'] = percentage['message']\n", + "df['percentage'].fillna(method='ffill', inplace=True)\n", + "df.drop(percentage.index, inplace=True)\n", + "df = df.reset_index()\n", + "df[['message', 'percentage','timestamp']]" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 2520.93701171875\n", + "1 56.18798828125\n", + "2 0.797119140625\n", + "3 2522.634033203125\n", + "4 None\n", + " ... \n", + "1345 2136.897705078125\n", + "1346 47.073974609375\n", + "1347 2320.608154296875\n", + "1348 54.5732421875\n", + "1349 4458.48388671875\n", + "Name: time (ms), Length: 1350, dtype: object" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['type']= df['message'].str.split().str[0]\n", + "df['update'] = df['message'].str.split(\"(\").str[1].str.split(\")\").str[0]\n", + "df['time (ms)'] = df['message'].str.split(\":\").str[1].str.split().str[0]\n", + "df['time (ms)']" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 time\n", + "1 render time\n", + "2 time\n", + "3 time\n", + "4 render time\n", + " ... \n", + "1345 time\n", + "1346 render time\n", + "1347 time\n", + "1348 render time\n", + "1349 time\n", + "Name: type, Length: 1350, dtype: object" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['type'] = df['type'].apply(lambda x: x + ' time' if x == 'render' else x)\n", + "df['type']" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\isisg\\AppData\\Local\\Temp\\ipykernel_30812\\2800390986.py:7: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df_visible['version'] = 'async trace update'\n", + "C:\\Users\\isisg\\AppData\\Local\\Temp\\ipykernel_30812\\2800390986.py:8: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df_vanilla_visible['version'] = 'vanilla'\n", + "C:\\Users\\isisg\\AppData\\Local\\Temp\\ipykernel_30812\\2800390986.py:11: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df_visible['time (ms)'] = pd.to_numeric(df_visible['time (ms)'])\n", + "C:\\Users\\isisg\\AppData\\Local\\Temp\\ipykernel_30812\\2800390986.py:12: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df_vanilla_visible['time (ms)'] = pd.to_numeric(df_vanilla_visible['time (ms)'])\n" + ] + } + ], + "source": [ + "df_vanilla = pd.read_csv('../logs/processed_logs_vanilla.csv')\n", + "df_vanilla = df_vanilla.drop(df_vanilla.columns[0],axis=1)\n", + "\n", + "df_vanilla_visible = df_vanilla[df_vanilla['update'].eq('visible') & df_vanilla['type'].eq('time')]\n", + "df_visible = df[df['update'].eq('visible') & df['type'].eq('time')]\n", + "\n", + "df_visible['version'] = 'async trace update'\n", + "df_vanilla_visible['version'] = 'vanilla'\n", + "\n", + "# df_visible = df_visible.append(df_vanilla_visible)\n", + "df_visible['time (ms)'] = pd.to_numeric(df_visible['time (ms)'])\n", + "df_vanilla_visible['time (ms)'] = pd.to_numeric(df_vanilla_visible['time (ms)'])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "from natsort import natsort_keygen\n", + "\n", + "mean_df = df_visible.groupby(['datapoints', 'samples','traces','sampling algorithm', 'percentage', 'version'])['time (ms)'].mean().reset_index()\n", + "mean_vanilla_df = df_vanilla_visible.groupby(['datapoints', 'samples','traces','sampling algorithm', 'percentage', 'version'])['time (ms)'].mean().reset_index()\n", + "mean_vanilla_df = mean_vanilla_df.sort_values(by=['datapoints','samples','traces','sampling algorithm', 'percentage'],key=natsort_keygen())\n", + "mean_df = mean_df.sort_values(by=['datapoints','samples','traces','sampling algorithm', 'percentage'],key=natsort_keygen())\n", + "# mean_df = mean_df.reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "mean_df['diff'] = mean_df['time (ms)'] - mean_vanilla_df['time (ms)']\n", + "mean_df['ratio'] = mean_df['time (ms)'] / mean_vanilla_df['time (ms)']\n", + "mean_df['time (ms) vanilla'] = mean_vanilla_df['time (ms)']\n", + "mean_df['percentage'] = pd.to_numeric(mean_df['percentage'].str.split('.').str[0])\n", + "mean_df.to_csv('../logs/processed_logs.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "import seaborn as sns\n", + "\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "plotly-resampler-rFn5pKAA-py3.9", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "2ee23f9e06e9276a9f67ea4ee15ee2ba149350665b01045ddc135410d3893be3" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tests/test_visual_gain_threads.py b/tests/test_visual_gain_threads.py index fc763b05..d7f74c51 100644 --- a/tests/test_visual_gain_threads.py +++ b/tests/test_visual_gain_threads.py @@ -47,7 +47,11 @@ fr = FigureResamplerGUITests(driver, port=port) percentages_hidden = np.array([0, 0.2, 0.5, 0.8, 0.9]) -n_traces = [10, 20, 50] +n_traces = [ + 10, + 20, + 50 + ] n_datapoints = [ 100_000, 1_000_000, @@ -102,13 +106,13 @@ #start timer # fr.start_timer('zoom') - time.sleep(5) + time.sleep(7) fr.reset_axes(testing=True) # fr.start_timer('reset') - time.sleep(5) + time.sleep(7) with open(f'./logs/n{n}_s{s}_t{t}_everynth.json', 'w') as logfile: - for log in driver.get_log('browser'): - logfile.write(json.dumps(log)) + # for log in driver.get_log('browser'): + logfile.write(json.dumps(driver.get_log('browser'))) print('done saving log') # print(logs) # print(type(logs)) From b95f3ba47bcb29136ec6b283e1e8064dea9fe686 Mon Sep 17 00:00:00 2001 From: "Isis G.V" Date: Fri, 21 Apr 2023 00:58:12 +0200 Subject: [PATCH 09/15] fix bad json dump in log processing --- tests/log_processing.ipynb | 251 ++++++++++++++++++++----------------- 1 file changed, 136 insertions(+), 115 deletions(-) diff --git a/tests/log_processing.ipynb b/tests/log_processing.ipynb index 4ccb07b1..9520c58c 100644 --- a/tests/log_processing.ipynb +++ b/tests/log_processing.ipynb @@ -2,7 +2,31 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "import re\n", + "import os\n", + "\n", + "\n", + "# only needed if trailing object error\n", + "\n", + "# for filename in os.listdir('../logs/linux_jonas/visual_gain_logs/'):\n", + "# f = os.path.join('../logs/linux_jonas/visual_gain_logs/', filename)\n", + "# if os.path.isfile(f) & f.endswith(\".json\"):\n", + "# with open(f, 'r') as r:\n", + "# content = r.read()\n", + "# content = re.sub(r'}\\s*{', '},{', content)\n", + "# json_data = json.loads('[' + content + ']')\n", + "# with open(f, 'w') as w:\n", + "# json.dump(json_data, w)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -40,7 +64,7 @@ " 0\n", " INFO\n", " 100.0%\n", - " 2023-03-19 18:08:13.587\n", + " 2023-03-21 13:47:56.374\n", " 10000000\n", " 1000\n", " 10\n", @@ -49,8 +73,8 @@ " \n", " 1\n", " DEBUG\n", - " time (visible): 2520.93701171875 ms\n", - " 2023-03-19 18:08:16.893\n", + " time (visible): 195.864013671875 ms\n", + " 2023-03-21 13:47:57.477\n", " 10000000\n", " 1000\n", " 10\n", @@ -59,8 +83,8 @@ " \n", " 2\n", " DEBUG\n", - " render time (visible): 56.18798828125 ms\n", - " 2023-03-19 18:08:16.893\n", + " render time (visible): 91.519287109375 ms\n", + " 2023-03-21 13:47:57.477\n", " 10000000\n", " 1000\n", " 10\n", @@ -69,8 +93,8 @@ " \n", " 3\n", " DEBUG\n", - " time (invisible): 0.797119140625 ms\n", - " 2023-03-19 18:08:16.894\n", + " time (invisible): 1.1220703125 ms\n", + " 2023-03-21 13:47:57.479\n", " 10000000\n", " 1000\n", " 10\n", @@ -79,8 +103,8 @@ " \n", " 4\n", " DEBUG\n", - " time (full): 2522.634033203125 ms\n", - " 2023-03-19 18:08:16.895\n", + " time (full): 200.72802734375 ms\n", + " 2023-03-21 13:47:57.479\n", " 10000000\n", " 1000\n", " 10\n", @@ -99,8 +123,8 @@ " \n", " 1480\n", " DEBUG\n", - " time (visible): 2136.897705078125 ms\n", - " 2023-03-19 18:33:19.775\n", + " time (visible): 129.57080078125 ms\n", + " 2023-03-21 14:05:22.175\n", " 100000\n", " 4000\n", " 50\n", @@ -109,8 +133,8 @@ " \n", " 1481\n", " DEBUG\n", - " render time (visible): 47.073974609375 ms\n", - " 2023-03-19 18:33:19.776\n", + " render time (visible): 53.781005859375 ms\n", + " 2023-03-21 14:05:22.175\n", " 100000\n", " 4000\n", " 50\n", @@ -119,8 +143,8 @@ " \n", " 1482\n", " DEBUG\n", - " time (invisible): 2320.608154296875 ms\n", - " 2023-03-19 18:33:22.097\n", + " time (invisible): 269.0849609375 ms\n", + " 2023-03-21 14:05:22.445\n", " 100000\n", " 4000\n", " 50\n", @@ -129,8 +153,8 @@ " \n", " 1483\n", " DEBUG\n", - " render time (invisible): 54.5732421875 ms\n", - " 2023-03-19 18:33:22.097\n", + " render time (invisible): 67.77197265625 ms\n", + " 2023-03-21 14:05:22.445\n", " 100000\n", " 4000\n", " 50\n", @@ -139,8 +163,8 @@ " \n", " 1484\n", " DEBUG\n", - " time (full): 4458.48388671875 ms\n", - " 2023-03-19 18:33:22.097\n", + " time (full): 402.0009765625 ms\n", + " 2023-03-21 14:05:22.445\n", " 100000\n", " 4000\n", " 50\n", @@ -152,36 +176,36 @@ "" ], "text/plain": [ - " level message \\\n", - "0 INFO 100.0% \n", - "1 DEBUG time (visible): 2520.93701171875 ms \n", - "2 DEBUG render time (visible): 56.18798828125 ms \n", - "3 DEBUG time (invisible): 0.797119140625 ms \n", - "4 DEBUG time (full): 2522.634033203125 ms \n", - "... ... ... \n", - "1480 DEBUG time (visible): 2136.897705078125 ms \n", - "1481 DEBUG render time (visible): 47.073974609375 ms \n", - "1482 DEBUG time (invisible): 2320.608154296875 ms \n", - "1483 DEBUG render time (invisible): 54.5732421875 ms \n", - "1484 DEBUG time (full): 4458.48388671875 ms \n", + " level message \\\n", + "0 INFO 100.0% \n", + "1 DEBUG time (visible): 195.864013671875 ms \n", + "2 DEBUG render time (visible): 91.519287109375 ms \n", + "3 DEBUG time (invisible): 1.1220703125 ms \n", + "4 DEBUG time (full): 200.72802734375 ms \n", + "... ... ... \n", + "1480 DEBUG time (visible): 129.57080078125 ms \n", + "1481 DEBUG render time (visible): 53.781005859375 ms \n", + "1482 DEBUG time (invisible): 269.0849609375 ms \n", + "1483 DEBUG render time (invisible): 67.77197265625 ms \n", + "1484 DEBUG time (full): 402.0009765625 ms \n", "\n", " timestamp datapoints samples traces sampling algorithm \n", - "0 2023-03-19 18:08:13.587 10000000 1000 10 everynth \n", - "1 2023-03-19 18:08:16.893 10000000 1000 10 everynth \n", - "2 2023-03-19 18:08:16.893 10000000 1000 10 everynth \n", - "3 2023-03-19 18:08:16.894 10000000 1000 10 everynth \n", - "4 2023-03-19 18:08:16.895 10000000 1000 10 everynth \n", + "0 2023-03-21 13:47:56.374 10000000 1000 10 everynth \n", + "1 2023-03-21 13:47:57.477 10000000 1000 10 everynth \n", + "2 2023-03-21 13:47:57.477 10000000 1000 10 everynth \n", + "3 2023-03-21 13:47:57.479 10000000 1000 10 everynth \n", + "4 2023-03-21 13:47:57.479 10000000 1000 10 everynth \n", "... ... ... ... ... ... \n", - "1480 2023-03-19 18:33:19.775 100000 4000 50 everynth \n", - "1481 2023-03-19 18:33:19.776 100000 4000 50 everynth \n", - "1482 2023-03-19 18:33:22.097 100000 4000 50 everynth \n", - "1483 2023-03-19 18:33:22.097 100000 4000 50 everynth \n", - "1484 2023-03-19 18:33:22.097 100000 4000 50 everynth \n", + "1480 2023-03-21 14:05:22.175 100000 4000 50 everynth \n", + "1481 2023-03-21 14:05:22.175 100000 4000 50 everynth \n", + "1482 2023-03-21 14:05:22.445 100000 4000 50 everynth \n", + "1483 2023-03-21 14:05:22.445 100000 4000 50 everynth \n", + "1484 2023-03-21 14:05:22.445 100000 4000 50 everynth \n", "\n", "[1485 rows x 7 columns]" ] }, - "execution_count": 1, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -189,17 +213,14 @@ "source": [ "import pandas as pd\n", "import numpy as np\n", - "import json\n", - "import os\n", "\n", "# df = pd.read_json('logs/n100000_s100_t10_everynth.json', orient ='index')\n", "df = pd.DataFrame()\n", "\n", "\n", - "for filename in os.listdir('../logs'):\n", - " f = os.path.join('../logs', filename)\n", + "for filename in os.listdir('../logs/linux_jonas/visual_gain_logs/'):\n", + " f = os.path.join('../logs/linux_jonas/visual_gain_logs/', filename)\n", " if os.path.isfile(f) & f.endswith(\".json\"):\n", - " # print(filename)\n", " dft = pd.read_json(f)\n", " dft['datapoints'] = filename.split('_')[0][1:]\n", " dft['samples'] = filename.split('_')[1][1:]\n", @@ -218,7 +239,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 24, "metadata": {}, "outputs": [ { @@ -250,33 +271,33 @@ " \n", " \n", " 0\n", - " time (visible): 2520.93701171875 ms\n", + " time (visible): 195.864013671875 ms\n", " 100.0%\n", - " 2023-03-19 18:08:16.893\n", + " 2023-03-21 13:47:57.477\n", " \n", " \n", " 1\n", - " render time (visible): 56.18798828125 ms\n", + " render time (visible): 91.519287109375 ms\n", " 100.0%\n", - " 2023-03-19 18:08:16.893\n", + " 2023-03-21 13:47:57.477\n", " \n", " \n", " 2\n", - " time (invisible): 0.797119140625 ms\n", + " time (invisible): 1.1220703125 ms\n", " 100.0%\n", - " 2023-03-19 18:08:16.894\n", + " 2023-03-21 13:47:57.479\n", " \n", " \n", " 3\n", - " time (full): 2522.634033203125 ms\n", + " time (full): 200.72802734375 ms\n", " 100.0%\n", - " 2023-03-19 18:08:16.895\n", + " 2023-03-21 13:47:57.479\n", " \n", " \n", " 4\n", " render time (invisible): None\n", " 100.0%\n", - " 2023-03-19 18:08:16.896\n", + " 2023-03-21 13:47:57.480\n", " \n", " \n", " ...\n", @@ -286,33 +307,33 @@ " \n", " \n", " 1345\n", - " time (visible): 2136.897705078125 ms\n", + " time (visible): 129.57080078125 ms\n", " 10.0%\n", - " 2023-03-19 18:33:19.775\n", + " 2023-03-21 14:05:22.175\n", " \n", " \n", " 1346\n", - " render time (visible): 47.073974609375 ms\n", + " render time (visible): 53.781005859375 ms\n", " 10.0%\n", - " 2023-03-19 18:33:19.776\n", + " 2023-03-21 14:05:22.175\n", " \n", " \n", " 1347\n", - " time (invisible): 2320.608154296875 ms\n", + " time (invisible): 269.0849609375 ms\n", " 10.0%\n", - " 2023-03-19 18:33:22.097\n", + " 2023-03-21 14:05:22.445\n", " \n", " \n", " 1348\n", - " render time (invisible): 54.5732421875 ms\n", + " render time (invisible): 67.77197265625 ms\n", " 10.0%\n", - " 2023-03-19 18:33:22.097\n", + " 2023-03-21 14:05:22.445\n", " \n", " \n", " 1349\n", - " time (full): 4458.48388671875 ms\n", + " time (full): 402.0009765625 ms\n", " 10.0%\n", - " 2023-03-19 18:33:22.097\n", + " 2023-03-21 14:05:22.445\n", " \n", " \n", "\n", @@ -320,36 +341,36 @@ "" ], "text/plain": [ - " message percentage \\\n", - "0 time (visible): 2520.93701171875 ms 100.0% \n", - "1 render time (visible): 56.18798828125 ms 100.0% \n", - "2 time (invisible): 0.797119140625 ms 100.0% \n", - "3 time (full): 2522.634033203125 ms 100.0% \n", - "4 render time (invisible): None 100.0% \n", - "... ... ... \n", - "1345 time (visible): 2136.897705078125 ms 10.0% \n", - "1346 render time (visible): 47.073974609375 ms 10.0% \n", - "1347 time (invisible): 2320.608154296875 ms 10.0% \n", - "1348 render time (invisible): 54.5732421875 ms 10.0% \n", - "1349 time (full): 4458.48388671875 ms 10.0% \n", + " message percentage \\\n", + "0 time (visible): 195.864013671875 ms 100.0% \n", + "1 render time (visible): 91.519287109375 ms 100.0% \n", + "2 time (invisible): 1.1220703125 ms 100.0% \n", + "3 time (full): 200.72802734375 ms 100.0% \n", + "4 render time (invisible): None 100.0% \n", + "... ... ... \n", + "1345 time (visible): 129.57080078125 ms 10.0% \n", + "1346 render time (visible): 53.781005859375 ms 10.0% \n", + "1347 time (invisible): 269.0849609375 ms 10.0% \n", + "1348 render time (invisible): 67.77197265625 ms 10.0% \n", + "1349 time (full): 402.0009765625 ms 10.0% \n", "\n", " timestamp \n", - "0 2023-03-19 18:08:16.893 \n", - "1 2023-03-19 18:08:16.893 \n", - "2 2023-03-19 18:08:16.894 \n", - "3 2023-03-19 18:08:16.895 \n", - "4 2023-03-19 18:08:16.896 \n", + "0 2023-03-21 13:47:57.477 \n", + "1 2023-03-21 13:47:57.477 \n", + "2 2023-03-21 13:47:57.479 \n", + "3 2023-03-21 13:47:57.479 \n", + "4 2023-03-21 13:47:57.480 \n", "... ... \n", - "1345 2023-03-19 18:33:19.775 \n", - "1346 2023-03-19 18:33:19.776 \n", - "1347 2023-03-19 18:33:22.097 \n", - "1348 2023-03-19 18:33:22.097 \n", - "1349 2023-03-19 18:33:22.097 \n", + "1345 2023-03-21 14:05:22.175 \n", + "1346 2023-03-21 14:05:22.175 \n", + "1347 2023-03-21 14:05:22.445 \n", + "1348 2023-03-21 14:05:22.445 \n", + "1349 2023-03-21 14:05:22.445 \n", "\n", "[1350 rows x 3 columns]" ] }, - "execution_count": 2, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -367,27 +388,27 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "0 2520.93701171875\n", - "1 56.18798828125\n", - "2 0.797119140625\n", - "3 2522.634033203125\n", - "4 None\n", - " ... \n", - "1345 2136.897705078125\n", - "1346 47.073974609375\n", - "1347 2320.608154296875\n", - "1348 54.5732421875\n", - "1349 4458.48388671875\n", + "0 195.864013671875\n", + "1 91.519287109375\n", + "2 1.1220703125\n", + "3 200.72802734375\n", + "4 None\n", + " ... \n", + "1345 129.57080078125\n", + "1346 53.781005859375\n", + "1347 269.0849609375\n", + "1348 67.77197265625\n", + "1349 402.0009765625\n", "Name: time (ms), Length: 1350, dtype: object" ] }, - "execution_count": 3, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -401,7 +422,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 26, "metadata": {}, "outputs": [ { @@ -421,7 +442,7 @@ "Name: type, Length: 1350, dtype: object" ] }, - "execution_count": 4, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } @@ -433,32 +454,32 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 28, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "C:\\Users\\isisg\\AppData\\Local\\Temp\\ipykernel_30812\\2800390986.py:7: SettingWithCopyWarning: \n", + "C:\\Users\\isisg\\AppData\\Local\\Temp\\ipykernel_31168\\2595053696.py:7: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df_visible['version'] = 'async trace update'\n", - "C:\\Users\\isisg\\AppData\\Local\\Temp\\ipykernel_30812\\2800390986.py:8: SettingWithCopyWarning: \n", + "C:\\Users\\isisg\\AppData\\Local\\Temp\\ipykernel_31168\\2595053696.py:8: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df_vanilla_visible['version'] = 'vanilla'\n", - "C:\\Users\\isisg\\AppData\\Local\\Temp\\ipykernel_30812\\2800390986.py:11: SettingWithCopyWarning: \n", + "C:\\Users\\isisg\\AppData\\Local\\Temp\\ipykernel_31168\\2595053696.py:11: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df_visible['time (ms)'] = pd.to_numeric(df_visible['time (ms)'])\n", - "C:\\Users\\isisg\\AppData\\Local\\Temp\\ipykernel_30812\\2800390986.py:12: SettingWithCopyWarning: \n", + "C:\\Users\\isisg\\AppData\\Local\\Temp\\ipykernel_31168\\2595053696.py:12: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", @@ -468,7 +489,7 @@ } ], "source": [ - "df_vanilla = pd.read_csv('../logs/processed_logs_vanilla.csv')\n", + "df_vanilla = pd.read_csv('../logs/linux_jonas/vanilla_plotly_logs/processed_logs_vanilla.csv')\n", "df_vanilla = df_vanilla.drop(df_vanilla.columns[0],axis=1)\n", "\n", "df_vanilla_visible = df_vanilla[df_vanilla['update'].eq('visible') & df_vanilla['type'].eq('time')]\n", @@ -484,7 +505,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -499,7 +520,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -512,7 +533,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ From 1da64d93bea419adfa2d29405bbd998674badfe5 Mon Sep 17 00:00:00 2001 From: "Isis G.V" Date: Tue, 25 Apr 2023 11:17:44 +0200 Subject: [PATCH 10/15] log processing fix --- tests/log_processing.ipynb | 40 +++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/tests/log_processing.ipynb b/tests/log_processing.ipynb index 9520c58c..ca635930 100644 --- a/tests/log_processing.ipynb +++ b/tests/log_processing.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 22, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ @@ -26,7 +26,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 21, "metadata": {}, "outputs": [ { @@ -205,7 +205,7 @@ "[1485 rows x 7 columns]" ] }, - "execution_count": 23, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -239,7 +239,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -370,7 +370,7 @@ "[1350 rows x 3 columns]" ] }, - "execution_count": 24, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -388,7 +388,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -408,7 +408,7 @@ "Name: time (ms), Length: 1350, dtype: object" ] }, - "execution_count": 25, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -422,7 +422,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 24, "metadata": {}, "outputs": [ { @@ -442,7 +442,7 @@ "Name: type, Length: 1350, dtype: object" ] }, - "execution_count": 26, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -454,32 +454,32 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "C:\\Users\\isisg\\AppData\\Local\\Temp\\ipykernel_31168\\2595053696.py:7: SettingWithCopyWarning: \n", + "C:\\Users\\isisg\\AppData\\Local\\Temp\\ipykernel_34624\\2595053696.py:7: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df_visible['version'] = 'async trace update'\n", - "C:\\Users\\isisg\\AppData\\Local\\Temp\\ipykernel_31168\\2595053696.py:8: SettingWithCopyWarning: \n", + "C:\\Users\\isisg\\AppData\\Local\\Temp\\ipykernel_34624\\2595053696.py:8: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df_vanilla_visible['version'] = 'vanilla'\n", - "C:\\Users\\isisg\\AppData\\Local\\Temp\\ipykernel_31168\\2595053696.py:11: SettingWithCopyWarning: \n", + "C:\\Users\\isisg\\AppData\\Local\\Temp\\ipykernel_34624\\2595053696.py:11: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df_visible['time (ms)'] = pd.to_numeric(df_visible['time (ms)'])\n", - "C:\\Users\\isisg\\AppData\\Local\\Temp\\ipykernel_31168\\2595053696.py:12: SettingWithCopyWarning: \n", + "C:\\Users\\isisg\\AppData\\Local\\Temp\\ipykernel_34624\\2595053696.py:12: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", @@ -505,7 +505,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "metadata": {}, "outputs": [], "source": [ @@ -520,20 +520,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "metadata": {}, "outputs": [], "source": [ - "mean_df['diff'] = mean_df['time (ms)'] - mean_vanilla_df['time (ms)']\n", - "mean_df['ratio'] = mean_df['time (ms)'] / mean_vanilla_df['time (ms)']\n", + "mean_df['diff'] = mean_vanilla_df['time (ms)'] - mean_df['time (ms)']\n", + "mean_df['ratio'] = mean_vanilla_df['time (ms)'] / mean_df['time (ms)']\n", "mean_df['time (ms) vanilla'] = mean_vanilla_df['time (ms)']\n", "mean_df['percentage'] = pd.to_numeric(mean_df['percentage'].str.split('.').str[0])\n", - "mean_df.to_csv('../logs/processed_logs.csv')" + "mean_df.to_csv('../logs/linux_jonas/processed_logs.csv')" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "metadata": {}, "outputs": [], "source": [ From 2badb0daa011940c0ff5f280859ca348677b69d7 Mon Sep 17 00:00:00 2001 From: "Isis G.V" Date: Tue, 25 Apr 2023 11:22:32 +0200 Subject: [PATCH 11/15] added gitignore --- .gitignore | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.gitignore b/.gitignore index 1ce3074d..a8b6bb5a 100644 --- a/.gitignore +++ b/.gitignore @@ -155,3 +155,10 @@ cython_debug/ # sphinx-docs sphinx/_build sphinx/_autosummary + +#testing stuff +logs/ +selectionbox_layout_data/ +examples/dash_apps/**/*coarse_fine* +examples/dash_apps/2* +examples/dash_apps/00* From 52403491a07ad7fe5231fae13110009cc8f7b8db Mon Sep 17 00:00:00 2001 From: "Isis G.V" Date: Tue, 15 Aug 2023 23:20:20 +0200 Subject: [PATCH 12/15] modified log processing --- tests/log_processing.ipynb | 691 +++++++++++++++++++------------------ 1 file changed, 355 insertions(+), 336 deletions(-) diff --git a/tests/log_processing.ipynb b/tests/log_processing.ipynb index ca635930..9a8ed8ec 100644 --- a/tests/log_processing.ipynb +++ b/tests/log_processing.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 20, + "execution_count": 53, "metadata": {}, "outputs": [], "source": [ @@ -13,8 +13,8 @@ "\n", "# only needed if trailing object error\n", "\n", - "# for filename in os.listdir('../logs/linux_jonas/visual_gain_logs/'):\n", - "# f = os.path.join('../logs/linux_jonas/visual_gain_logs/', filename)\n", + "# for filename in os.listdir('../logs/linux_jonas/vanilla_plotly_logs/'):\n", + "# f = os.path.join('../logs/linux_jonas/vanilla_plotly_logs/', filename)\n", "# if os.path.isfile(f) & f.endswith(\".json\"):\n", "# with open(f, 'r') as r:\n", "# content = r.read()\n", @@ -26,7 +26,71 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 100.0%\n", + "1 time (visible): 1805.199951171875 ms\n", + "2 render time (visible): 827.38818359375 ms\n", + "3 time (invisible): 1.220947265625 ms\n", + "4 time (full): 1808.047119140625 ms\n", + " ... \n", + "52795 time (visible): 261.415771484375 ms\n", + "52796 render time (visible): 81.3662109375 ms\n", + "52797 time (invisible): 0.520751953125 ms\n", + "52798 time (full): 264.547607421875 ms\n", + "52799 render time (invisible): None\n", + "Name: message, Length: 52800, dtype: object" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import json\n", + "import os\n", + "\n", + "# df = pd.read_json('logs/n100000_s100_t10_everynth.json', orient ='index')\n", + "df = pd.DataFrame()\n", + "\n", + "\n", + "for filename in os.listdir('../logs/linux_jonas/vanilla_plotly_logs/'):\n", + " f = os.path.join('../logs/linux_jonas/vanilla_plotly_logs/', filename)\n", + " if os.path.isfile(f) & (filename.split('.')[1] == 'json'):\n", + " dft = pd.read_json(f)\n", + " dft['datapoints'] = filename.split('_')[0][1:]\n", + " dft['samples'] = filename.split('_')[1][1:]\n", + " dft['traces'] = filename.split('_')[2][1:]\n", + " dft['sampling algorithm'] = filename.split('_')[3]\n", + " dft['iteration'] = filename.split(\"_\")[4].split('.')[0][-1:]\n", + " df = df.append(dft, ignore_index=True)\n", + " \n", + "\n", + "\n", + "df['message']= df['message'].str.split('\\\"').str[-2]\n", + "df.drop(df[df['level'].eq('WARNING')].index, inplace=True)\n", + "df = df.drop('source', axis=1)\n", + "df.reset_index()\n", + "df['message']\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 55, "metadata": {}, "outputs": [ { @@ -50,196 +114,144 @@ " \n", " \n", " \n", - " level\n", " message\n", + " percentage\n", " timestamp\n", - " datapoints\n", - " samples\n", - " traces\n", - " sampling algorithm\n", " \n", " \n", " \n", " \n", " 0\n", - " INFO\n", + " time (visible): 1805.199951171875 ms\n", " 100.0%\n", - " 2023-03-21 13:47:56.374\n", - " 10000000\n", - " 1000\n", - " 10\n", - " everynth\n", + " 2023-08-07 17:06:47.462\n", " \n", " \n", " 1\n", - " DEBUG\n", - " time (visible): 195.864013671875 ms\n", - " 2023-03-21 13:47:57.477\n", - " 10000000\n", - " 1000\n", - " 10\n", - " everynth\n", + " render time (visible): 827.38818359375 ms\n", + " 100.0%\n", + " 2023-08-07 17:06:47.463\n", " \n", " \n", " 2\n", - " DEBUG\n", - " render time (visible): 91.519287109375 ms\n", - " 2023-03-21 13:47:57.477\n", - " 10000000\n", - " 1000\n", - " 10\n", - " everynth\n", + " time (invisible): 1.220947265625 ms\n", + " 100.0%\n", + " 2023-08-07 17:06:47.464\n", " \n", " \n", " 3\n", - " DEBUG\n", - " time (invisible): 1.1220703125 ms\n", - " 2023-03-21 13:47:57.479\n", - " 10000000\n", - " 1000\n", - " 10\n", - " everynth\n", + " time (full): 1808.047119140625 ms\n", + " 100.0%\n", + " 2023-08-07 17:06:47.464\n", " \n", " \n", " 4\n", - " DEBUG\n", - " time (full): 200.72802734375 ms\n", - " 2023-03-21 13:47:57.479\n", - " 10000000\n", - " 1000\n", - " 10\n", - " everynth\n", + " render time (invisible): None\n", + " 100.0%\n", + " 2023-08-07 17:06:47.464\n", " \n", " \n", " ...\n", " ...\n", " ...\n", " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", " \n", " \n", - " 1480\n", - " DEBUG\n", - " time (visible): 129.57080078125 ms\n", - " 2023-03-21 14:05:22.175\n", - " 100000\n", - " 4000\n", - " 50\n", - " everynth\n", + " 47995\n", + " time (visible): 261.415771484375 ms\n", + " 10.0%\n", + " 2023-08-08 00:53:52.332\n", " \n", " \n", - " 1481\n", - " DEBUG\n", - " render time (visible): 53.781005859375 ms\n", - " 2023-03-21 14:05:22.175\n", - " 100000\n", - " 4000\n", - " 50\n", - " everynth\n", + " 47996\n", + " render time (visible): 81.3662109375 ms\n", + " 10.0%\n", + " 2023-08-08 00:53:52.332\n", " \n", " \n", - " 1482\n", - " DEBUG\n", - " time (invisible): 269.0849609375 ms\n", - " 2023-03-21 14:05:22.445\n", - " 100000\n", - " 4000\n", - " 50\n", - " everynth\n", + " 47997\n", + " time (invisible): 0.520751953125 ms\n", + " 10.0%\n", + " 2023-08-08 00:53:52.332\n", " \n", " \n", - " 1483\n", - " DEBUG\n", - " render time (invisible): 67.77197265625 ms\n", - " 2023-03-21 14:05:22.445\n", - " 100000\n", - " 4000\n", - " 50\n", - " everynth\n", + " 47998\n", + " time (full): 264.547607421875 ms\n", + " 10.0%\n", + " 2023-08-08 00:53:52.333\n", " \n", " \n", - " 1484\n", - " DEBUG\n", - " time (full): 402.0009765625 ms\n", - " 2023-03-21 14:05:22.445\n", - " 100000\n", - " 4000\n", - " 50\n", - " everynth\n", + " 47999\n", + " render time (invisible): None\n", + " 10.0%\n", + " 2023-08-08 00:53:52.333\n", " \n", " \n", "\n", - "

1485 rows × 7 columns

\n", + "

48000 rows × 3 columns

\n", "" ], "text/plain": [ - " level message \\\n", - "0 INFO 100.0% \n", - "1 DEBUG time (visible): 195.864013671875 ms \n", - "2 DEBUG render time (visible): 91.519287109375 ms \n", - "3 DEBUG time (invisible): 1.1220703125 ms \n", - "4 DEBUG time (full): 200.72802734375 ms \n", - "... ... ... \n", - "1480 DEBUG time (visible): 129.57080078125 ms \n", - "1481 DEBUG render time (visible): 53.781005859375 ms \n", - "1482 DEBUG time (invisible): 269.0849609375 ms \n", - "1483 DEBUG render time (invisible): 67.77197265625 ms \n", - "1484 DEBUG time (full): 402.0009765625 ms \n", + " message percentage \\\n", + "0 time (visible): 1805.199951171875 ms 100.0% \n", + "1 render time (visible): 827.38818359375 ms 100.0% \n", + "2 time (invisible): 1.220947265625 ms 100.0% \n", + "3 time (full): 1808.047119140625 ms 100.0% \n", + "4 render time (invisible): None 100.0% \n", + "... ... ... \n", + "47995 time (visible): 261.415771484375 ms 10.0% \n", + "47996 render time (visible): 81.3662109375 ms 10.0% \n", + "47997 time (invisible): 0.520751953125 ms 10.0% \n", + "47998 time (full): 264.547607421875 ms 10.0% \n", + "47999 render time (invisible): None 10.0% \n", "\n", - " timestamp datapoints samples traces sampling algorithm \n", - "0 2023-03-21 13:47:56.374 10000000 1000 10 everynth \n", - "1 2023-03-21 13:47:57.477 10000000 1000 10 everynth \n", - "2 2023-03-21 13:47:57.477 10000000 1000 10 everynth \n", - "3 2023-03-21 13:47:57.479 10000000 1000 10 everynth \n", - "4 2023-03-21 13:47:57.479 10000000 1000 10 everynth \n", - "... ... ... ... ... ... \n", - "1480 2023-03-21 14:05:22.175 100000 4000 50 everynth \n", - "1481 2023-03-21 14:05:22.175 100000 4000 50 everynth \n", - "1482 2023-03-21 14:05:22.445 100000 4000 50 everynth \n", - "1483 2023-03-21 14:05:22.445 100000 4000 50 everynth \n", - "1484 2023-03-21 14:05:22.445 100000 4000 50 everynth \n", + " timestamp \n", + "0 2023-08-07 17:06:47.462 \n", + "1 2023-08-07 17:06:47.463 \n", + "2 2023-08-07 17:06:47.464 \n", + "3 2023-08-07 17:06:47.464 \n", + "4 2023-08-07 17:06:47.464 \n", + "... ... \n", + "47995 2023-08-08 00:53:52.332 \n", + "47996 2023-08-08 00:53:52.332 \n", + "47997 2023-08-08 00:53:52.332 \n", + "47998 2023-08-08 00:53:52.333 \n", + "47999 2023-08-08 00:53:52.333 \n", "\n", - "[1485 rows x 7 columns]" + "[48000 rows x 3 columns]" ] }, - "execution_count": 21, + "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "import pandas as pd\n", - "import numpy as np\n", - "\n", - "# df = pd.read_json('logs/n100000_s100_t10_everynth.json', orient ='index')\n", - "df = pd.DataFrame()\n", - "\n", - "\n", - "for filename in os.listdir('../logs/linux_jonas/visual_gain_logs/'):\n", - " f = os.path.join('../logs/linux_jonas/visual_gain_logs/', filename)\n", - " if os.path.isfile(f) & f.endswith(\".json\"):\n", - " dft = pd.read_json(f)\n", - " dft['datapoints'] = filename.split('_')[0][1:]\n", - " dft['samples'] = filename.split('_')[1][1:]\n", - " dft['traces'] = filename.split('_')[2][1:]\n", - " dft['sampling algorithm'] = filename.split('_')[3].split('.')[0]\n", - " df = df.append(dft, ignore_index=True)\n", - " \n", - "df['message']= df['message'].str.split('\\\"').str[-2]\n", - "df.drop(df[df['level'].eq('SEVERE')].index, inplace=True)\n", - "df.drop('source', axis=1, inplace=True)\n", - "df.reset_index()\n", - "df\n", - "# warnings = df[df['level'].eq('ERROR')]\n", - "# warnings\n" + "mask = df['message'].str.contains('%')\n", + "percentage = df[mask]\n", + "percentage\n", + "df.loc[mask, 'percentage'] = percentage['message']\n", + "df['percentage'].fillna(method='ffill', inplace=True)\n", + "df.drop(percentage.index, inplace=True)\n", + "df = df.reset_index()\n", + "df[['message', 'percentage','timestamp']]" ] }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 56, + "metadata": {}, + "outputs": [], + "source": [ + "df['type']= df['message'].str.split().str[0]\n", + "df['update'] = df['message'].str.split(\"(\").str[1].str.split(\")\").str[0]\n", + "df['time (ms)'] = df['message'].str.split(\":\").str[1].str.split().str[0]\n", + "df['type'] = df['type'].apply(lambda x: x + ' time' if x == 'render' else x)" + ] + }, + { + "cell_type": "code", + "execution_count": 57, "metadata": {}, "outputs": [ { @@ -263,282 +275,289 @@ " \n", " \n", " \n", + " index\n", + " level\n", " message\n", - " percentage\n", " timestamp\n", + " datapoints\n", + " samples\n", + " traces\n", + " sampling algorithm\n", + " iteration\n", + " percentage\n", + " type\n", + " update\n", + " time (ms)\n", " \n", " \n", " \n", " \n", " 0\n", - " time (visible): 195.864013671875 ms\n", + " 1\n", + " DEBUG\n", + " time (visible): 1805.199951171875 ms\n", + " 2023-08-07 17:06:47.462\n", + " 10000000\n", + " 10000\n", + " 100\n", + " everynth\n", + " 0\n", " 100.0%\n", - " 2023-03-21 13:47:57.477\n", + " time\n", + " visible\n", + " 1805.199951171875\n", " \n", " \n", " 1\n", - " render time (visible): 91.519287109375 ms\n", + " 2\n", + " DEBUG\n", + " render time (visible): 827.38818359375 ms\n", + " 2023-08-07 17:06:47.463\n", + " 10000000\n", + " 10000\n", + " 100\n", + " everynth\n", + " 0\n", " 100.0%\n", - " 2023-03-21 13:47:57.477\n", + " render time\n", + " visible\n", + " 827.38818359375\n", " \n", " \n", " 2\n", - " time (invisible): 1.1220703125 ms\n", + " 3\n", + " DEBUG\n", + " time (invisible): 1.220947265625 ms\n", + " 2023-08-07 17:06:47.464\n", + " 10000000\n", + " 10000\n", + " 100\n", + " everynth\n", + " 0\n", " 100.0%\n", - " 2023-03-21 13:47:57.479\n", + " time\n", + " invisible\n", + " 1.220947265625\n", " \n", " \n", " 3\n", - " time (full): 200.72802734375 ms\n", + " 4\n", + " DEBUG\n", + " time (full): 1808.047119140625 ms\n", + " 2023-08-07 17:06:47.464\n", + " 10000000\n", + " 10000\n", + " 100\n", + " everynth\n", + " 0\n", " 100.0%\n", - " 2023-03-21 13:47:57.479\n", + " time\n", + " full\n", + " 1808.047119140625\n", " \n", " \n", " 4\n", + " 5\n", + " INFO\n", " render time (invisible): None\n", + " 2023-08-07 17:06:47.464\n", + " 10000000\n", + " 10000\n", + " 100\n", + " everynth\n", + " 0\n", " 100.0%\n", - " 2023-03-21 13:47:57.480\n", + " render time\n", + " invisible\n", + " None\n", " \n", " \n", " ...\n", " ...\n", " ...\n", " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", " \n", " \n", - " 1345\n", - " time (visible): 129.57080078125 ms\n", + " 47995\n", + " 52795\n", + " DEBUG\n", + " time (visible): 261.415771484375 ms\n", + " 2023-08-08 00:53:52.332\n", + " 100000\n", + " 5000\n", + " 50\n", + " everynth\n", + " 9\n", " 10.0%\n", - " 2023-03-21 14:05:22.175\n", + " time\n", + " visible\n", + " 261.415771484375\n", " \n", " \n", - " 1346\n", - " render time (visible): 53.781005859375 ms\n", + " 47996\n", + " 52796\n", + " DEBUG\n", + " render time (visible): 81.3662109375 ms\n", + " 2023-08-08 00:53:52.332\n", + " 100000\n", + " 5000\n", + " 50\n", + " everynth\n", + " 9\n", " 10.0%\n", - " 2023-03-21 14:05:22.175\n", + " render time\n", + " visible\n", + " 81.3662109375\n", " \n", " \n", - " 1347\n", - " time (invisible): 269.0849609375 ms\n", + " 47997\n", + " 52797\n", + " DEBUG\n", + " time (invisible): 0.520751953125 ms\n", + " 2023-08-08 00:53:52.332\n", + " 100000\n", + " 5000\n", + " 50\n", + " everynth\n", + " 9\n", " 10.0%\n", - " 2023-03-21 14:05:22.445\n", + " time\n", + " invisible\n", + " 0.520751953125\n", " \n", " \n", - " 1348\n", - " render time (invisible): 67.77197265625 ms\n", + " 47998\n", + " 52798\n", + " DEBUG\n", + " time (full): 264.547607421875 ms\n", + " 2023-08-08 00:53:52.333\n", + " 100000\n", + " 5000\n", + " 50\n", + " everynth\n", + " 9\n", " 10.0%\n", - " 2023-03-21 14:05:22.445\n", + " time\n", + " full\n", + " 264.547607421875\n", " \n", " \n", - " 1349\n", - " time (full): 402.0009765625 ms\n", + " 47999\n", + " 52799\n", + " INFO\n", + " render time (invisible): None\n", + " 2023-08-08 00:53:52.333\n", + " 100000\n", + " 5000\n", + " 50\n", + " everynth\n", + " 9\n", " 10.0%\n", - " 2023-03-21 14:05:22.445\n", + " render time\n", + " invisible\n", + " None\n", " \n", " \n", "\n", - "

1350 rows × 3 columns

\n", + "

48000 rows × 13 columns

\n", "" ], "text/plain": [ - " message percentage \\\n", - "0 time (visible): 195.864013671875 ms 100.0% \n", - "1 render time (visible): 91.519287109375 ms 100.0% \n", - "2 time (invisible): 1.1220703125 ms 100.0% \n", - "3 time (full): 200.72802734375 ms 100.0% \n", - "4 render time (invisible): None 100.0% \n", - "... ... ... \n", - "1345 time (visible): 129.57080078125 ms 10.0% \n", - "1346 render time (visible): 53.781005859375 ms 10.0% \n", - "1347 time (invisible): 269.0849609375 ms 10.0% \n", - "1348 render time (invisible): 67.77197265625 ms 10.0% \n", - "1349 time (full): 402.0009765625 ms 10.0% \n", + " index level message \\\n", + "0 1 DEBUG time (visible): 1805.199951171875 ms \n", + "1 2 DEBUG render time (visible): 827.38818359375 ms \n", + "2 3 DEBUG time (invisible): 1.220947265625 ms \n", + "3 4 DEBUG time (full): 1808.047119140625 ms \n", + "4 5 INFO render time (invisible): None \n", + "... ... ... ... \n", + "47995 52795 DEBUG time (visible): 261.415771484375 ms \n", + "47996 52796 DEBUG render time (visible): 81.3662109375 ms \n", + "47997 52797 DEBUG time (invisible): 0.520751953125 ms \n", + "47998 52798 DEBUG time (full): 264.547607421875 ms \n", + "47999 52799 INFO render time (invisible): None \n", "\n", - " timestamp \n", - "0 2023-03-21 13:47:57.477 \n", - "1 2023-03-21 13:47:57.477 \n", - "2 2023-03-21 13:47:57.479 \n", - "3 2023-03-21 13:47:57.479 \n", - "4 2023-03-21 13:47:57.480 \n", - "... ... \n", - "1345 2023-03-21 14:05:22.175 \n", - "1346 2023-03-21 14:05:22.175 \n", - "1347 2023-03-21 14:05:22.445 \n", - "1348 2023-03-21 14:05:22.445 \n", - "1349 2023-03-21 14:05:22.445 \n", + " timestamp datapoints samples traces sampling algorithm \\\n", + "0 2023-08-07 17:06:47.462 10000000 10000 100 everynth \n", + "1 2023-08-07 17:06:47.463 10000000 10000 100 everynth \n", + "2 2023-08-07 17:06:47.464 10000000 10000 100 everynth \n", + "3 2023-08-07 17:06:47.464 10000000 10000 100 everynth \n", + "4 2023-08-07 17:06:47.464 10000000 10000 100 everynth \n", + "... ... ... ... ... ... \n", + "47995 2023-08-08 00:53:52.332 100000 5000 50 everynth \n", + "47996 2023-08-08 00:53:52.332 100000 5000 50 everynth \n", + "47997 2023-08-08 00:53:52.332 100000 5000 50 everynth \n", + "47998 2023-08-08 00:53:52.333 100000 5000 50 everynth \n", + "47999 2023-08-08 00:53:52.333 100000 5000 50 everynth \n", "\n", - "[1350 rows x 3 columns]" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "mask = df['message'].str.contains('%')\n", - "percentage = df[mask]\n", - "percentage\n", - "df.loc[mask, 'percentage'] = percentage['message']\n", - "df['percentage'].fillna(method='ffill', inplace=True)\n", - "df.drop(percentage.index, inplace=True)\n", - "df = df.reset_index()\n", - "df[['message', 'percentage','timestamp']]" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 195.864013671875\n", - "1 91.519287109375\n", - "2 1.1220703125\n", - "3 200.72802734375\n", - "4 None\n", - " ... \n", - "1345 129.57080078125\n", - "1346 53.781005859375\n", - "1347 269.0849609375\n", - "1348 67.77197265625\n", - "1349 402.0009765625\n", - "Name: time (ms), Length: 1350, dtype: object" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df['type']= df['message'].str.split().str[0]\n", - "df['update'] = df['message'].str.split(\"(\").str[1].str.split(\")\").str[0]\n", - "df['time (ms)'] = df['message'].str.split(\":\").str[1].str.split().str[0]\n", - "df['time (ms)']" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 time\n", - "1 render time\n", - "2 time\n", - "3 time\n", - "4 render time\n", - " ... \n", - "1345 time\n", - "1346 render time\n", - "1347 time\n", - "1348 render time\n", - "1349 time\n", - "Name: type, Length: 1350, dtype: object" + " iteration percentage type update time (ms) \n", + "0 0 100.0% time visible 1805.199951171875 \n", + "1 0 100.0% render time visible 827.38818359375 \n", + "2 0 100.0% time invisible 1.220947265625 \n", + "3 0 100.0% time full 1808.047119140625 \n", + "4 0 100.0% render time invisible None \n", + "... ... ... ... ... ... \n", + "47995 9 10.0% time visible 261.415771484375 \n", + "47996 9 10.0% render time visible 81.3662109375 \n", + "47997 9 10.0% time invisible 0.520751953125 \n", + "47998 9 10.0% time full 264.547607421875 \n", + "47999 9 10.0% render time invisible None \n", + "\n", + "[48000 rows x 13 columns]" ] }, - "execution_count": 24, + "execution_count": 57, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['type'] = df['type'].apply(lambda x: x + ' time' if x == 'render' else x)\n", - "df['type']" + "df" ] }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 60, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\isisg\\AppData\\Local\\Temp\\ipykernel_34624\\2595053696.py:7: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - " df_visible['version'] = 'async trace update'\n", - "C:\\Users\\isisg\\AppData\\Local\\Temp\\ipykernel_34624\\2595053696.py:8: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - " df_vanilla_visible['version'] = 'vanilla'\n", - "C:\\Users\\isisg\\AppData\\Local\\Temp\\ipykernel_34624\\2595053696.py:11: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - " df_visible['time (ms)'] = pd.to_numeric(df_visible['time (ms)'])\n", - "C:\\Users\\isisg\\AppData\\Local\\Temp\\ipykernel_34624\\2595053696.py:12: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - " df_vanilla_visible['time (ms)'] = pd.to_numeric(df_vanilla_visible['time (ms)'])\n" - ] - } - ], + "outputs": [], "source": [ - "df_vanilla = pd.read_csv('../logs/linux_jonas/vanilla_plotly_logs/processed_logs_vanilla.csv')\n", - "df_vanilla = df_vanilla.drop(df_vanilla.columns[0],axis=1)\n", "\n", - "df_vanilla_visible = df_vanilla[df_vanilla['update'].eq('visible') & df_vanilla['type'].eq('time')]\n", - "df_visible = df[df['update'].eq('visible') & df['type'].eq('time')]\n", + "# Convert \"time (ms)\" column to numeric data type\n", + "df['time (ms)'] = pd.to_numeric(df['time (ms)'], errors='coerce')\n", + "df['traces'] = pd.to_numeric(df['traces'])\n", "\n", - "df_visible['version'] = 'async trace update'\n", - "df_vanilla_visible['version'] = 'vanilla'\n", + "# aggregation_functions = {\n", + "# 'time (ms)': ['mean', lambda x: np.std(x, ddof=0) if len(x) > 1 else 0, 'var']\n", + "# }\n", + "aggregation_functions = {\n", + " 'time (ms)': ['mean', 'var']\n", + "}\n", "\n", - "# df_visible = df_visible.append(df_vanilla_visible)\n", - "df_visible['time (ms)'] = pd.to_numeric(df_visible['time (ms)'])\n", - "df_vanilla_visible['time (ms)'] = pd.to_numeric(df_vanilla_visible['time (ms)'])\n" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [], - "source": [ - "from natsort import natsort_keygen\n", + "filtered_df = df.query('`time (ms)` == `time (ms)`')\n", "\n", - "mean_df = df_visible.groupby(['datapoints', 'samples','traces','sampling algorithm', 'percentage', 'version'])['time (ms)'].mean().reset_index()\n", - "mean_vanilla_df = df_vanilla_visible.groupby(['datapoints', 'samples','traces','sampling algorithm', 'percentage', 'version'])['time (ms)'].mean().reset_index()\n", - "mean_vanilla_df = mean_vanilla_df.sort_values(by=['datapoints','samples','traces','sampling algorithm', 'percentage'],key=natsort_keygen())\n", - "mean_df = mean_df.sort_values(by=['datapoints','samples','traces','sampling algorithm', 'percentage'],key=natsort_keygen())\n", - "# mean_df = mean_df.reset_index()" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [], - "source": [ - "mean_df['diff'] = mean_vanilla_df['time (ms)'] - mean_df['time (ms)']\n", - "mean_df['ratio'] = mean_vanilla_df['time (ms)'] / mean_df['time (ms)']\n", - "mean_df['time (ms) vanilla'] = mean_vanilla_df['time (ms)']\n", - "mean_df['percentage'] = pd.to_numeric(mean_df['percentage'].str.split('.').str[0])\n", - "mean_df.to_csv('../logs/linux_jonas/processed_logs.csv')" + "grouped_df = df.groupby(['update', 'type', 'percentage', 'datapoints', 'samples', 'traces']).agg(aggregation_functions)\n", + "agg_df = grouped_df.reset_index()\n", + "agg_df.columns = ['update', 'type', 'percentage', 'datapoints', 'samples', 'traces', 'mean_time', 'variance']\n", + "agg_df['std_dev'] = np.sqrt(agg_df[\"variance\"])" ] }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 61, "metadata": {}, "outputs": [], "source": [ - "import seaborn as sns\n", - "\n" + "agg_df.to_csv('../logs/linux_jonas/vanilla_plotly_logs/processed_logs_vanilla.csv')" ] } ], From e0cf4c885d2ebbc66ae06a999203576cef650469 Mon Sep 17 00:00:00 2001 From: "Isis G.V" Date: Fri, 18 Aug 2023 17:25:56 +0200 Subject: [PATCH 13/15] final log processing --- tests/log_processing.ipynb | 743 +++++++++++++++++++++++++------------ 1 file changed, 508 insertions(+), 235 deletions(-) diff --git a/tests/log_processing.ipynb b/tests/log_processing.ipynb index 9a8ed8ec..f71ca0f7 100644 --- a/tests/log_processing.ipynb +++ b/tests/log_processing.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 53, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -26,7 +26,7 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -38,15 +38,15 @@ "3 time (invisible): 1.220947265625 ms\n", "4 time (full): 1808.047119140625 ms\n", " ... \n", - "52795 time (visible): 261.415771484375 ms\n", - "52796 render time (visible): 81.3662109375 ms\n", - "52797 time (invisible): 0.520751953125 ms\n", - "52798 time (full): 264.547607421875 ms\n", - "52799 render time (invisible): None\n", - "Name: message, Length: 52800, dtype: object" + "65995 time (visible): 261.415771484375 ms\n", + "65996 render time (visible): 81.3662109375 ms\n", + "65997 time (invisible): 0.520751953125 ms\n", + "65998 time (full): 264.547607421875 ms\n", + "65999 render time (invisible): None\n", + "Name: message, Length: 66000, dtype: object" ] }, - "execution_count": 54, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -61,16 +61,16 @@ "df = pd.DataFrame()\n", "\n", "\n", - "for filename in os.listdir('../logs/linux_jonas/vanilla_plotly_logs/'):\n", - " f = os.path.join('../logs/linux_jonas/vanilla_plotly_logs/', filename)\n", + "for filename in os.listdir('../logs/final/vanilla_pr_logs/'):\n", + " f = os.path.join('../logs/final/vanilla_pr_logs/', filename)\n", " if os.path.isfile(f) & (filename.split('.')[1] == 'json'):\n", " dft = pd.read_json(f)\n", " dft['datapoints'] = filename.split('_')[0][1:]\n", " dft['samples'] = filename.split('_')[1][1:]\n", " dft['traces'] = filename.split('_')[2][1:]\n", - " dft['sampling algorithm'] = filename.split('_')[3]\n", + " dft['aggregator'] = filename.split('_')[3]\n", " dft['iteration'] = filename.split(\"_\")[4].split('.')[0][-1:]\n", - " df = df.append(dft, ignore_index=True)\n", + " df = pd.concat([df, dft], ignore_index=True)\n", " \n", "\n", "\n", @@ -83,14 +83,227 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
levelmessagetimestampdatapointssamplestracesaggregatoriteration
0INFO100.0%2023-08-11 17:04:16.2051000000010000100everynth0
1DEBUGtime (visible): 1942.76708984375 ms2023-08-11 17:04:19.2341000000010000100everynth0
2DEBUGrender time (visible): 942.869140625 ms2023-08-11 17:04:19.2361000000010000100everynth0
3DEBUGtime (invisible): 1.306884765625 ms2023-08-11 17:04:19.2371000000010000100everynth0
4DEBUGtime (full): 1945.81103515625 ms2023-08-11 17:04:19.2371000000010000100everynth0
...........................
65997DEBUGtime (visible): 168.304931640625 ms2023-08-12 00:45:01.208100000500050everynth9
65998DEBUGrender time (visible): 67.005126953125 ms2023-08-12 00:45:01.208100000500050everynth9
65999DEBUGtime (invisible): 220.11083984375 ms2023-08-12 00:45:01.428100000500050everynth9
66000DEBUGrender time (invisible): 72.118896484375 ms2023-08-12 00:45:01.429100000500050everynth9
66001DEBUGtime (full): 390.1201171875 ms2023-08-12 00:45:01.429100000500050everynth9
\n", + "

66000 rows × 8 columns

\n", + "
" + ], + "text/plain": [ + " level message \\\n", + "0 INFO 100.0% \n", + "1 DEBUG time (visible): 1942.76708984375 ms \n", + "2 DEBUG render time (visible): 942.869140625 ms \n", + "3 DEBUG time (invisible): 1.306884765625 ms \n", + "4 DEBUG time (full): 1945.81103515625 ms \n", + "... ... ... \n", + "65997 DEBUG time (visible): 168.304931640625 ms \n", + "65998 DEBUG render time (visible): 67.005126953125 ms \n", + "65999 DEBUG time (invisible): 220.11083984375 ms \n", + "66000 DEBUG render time (invisible): 72.118896484375 ms \n", + "66001 DEBUG time (full): 390.1201171875 ms \n", + "\n", + " timestamp datapoints samples traces aggregator iteration \n", + "0 2023-08-11 17:04:16.205 10000000 10000 100 everynth 0 \n", + "1 2023-08-11 17:04:19.234 10000000 10000 100 everynth 0 \n", + "2 2023-08-11 17:04:19.236 10000000 10000 100 everynth 0 \n", + "3 2023-08-11 17:04:19.237 10000000 10000 100 everynth 0 \n", + "4 2023-08-11 17:04:19.237 10000000 10000 100 everynth 0 \n", + "... ... ... ... ... ... ... \n", + "65997 2023-08-12 00:45:01.208 100000 5000 50 everynth 9 \n", + "65998 2023-08-12 00:45:01.208 100000 5000 50 everynth 9 \n", + "65999 2023-08-12 00:45:01.428 100000 5000 50 everynth 9 \n", + "66000 2023-08-12 00:45:01.429 100000 5000 50 everynth 9 \n", + "66001 2023-08-12 00:45:01.429 100000 5000 50 everynth 9 \n", + "\n", + "[66000 rows x 8 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_prio = pd.DataFrame()\n", + "\n", + "\n", + "for filename in os.listdir('../logs/final/visual_gain_logs/'):\n", + " f = os.path.join('../logs/final/visual_gain_logs/', filename)\n", + " if os.path.isfile(f) & f.endswith(\".json\"):\n", + " dft = pd.read_json(f)\n", + " dft['datapoints'] = filename.split('_')[0][1:]\n", + " dft['samples'] = filename.split('_')[1][1:]\n", + " dft['traces'] = filename.split('_')[2][1:]\n", + " dft['aggregator'] = filename.split('_')[3]\n", + " dft['iteration'] = filename.split(\"_\")[4].split('.')[0][-1:]\n", + " df_prio = pd.concat([df_prio, dft], ignore_index=True)\n", + " \n", + "df_prio['message']= df_prio['message'].str.split('\\\"').str[-2]\n", + "df_prio.drop(df_prio[df_prio['level'].eq('SEVERE')].index, inplace=True)\n", + "df_prio.drop('source', axis=1, inplace=True)\n", + "df_prio.reset_index()\n", + "df_prio" + ] }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -157,38 +370,38 @@ " ...\n", " \n", " \n", - " 47995\n", + " 59995\n", " time (visible): 261.415771484375 ms\n", " 10.0%\n", " 2023-08-08 00:53:52.332\n", " \n", " \n", - " 47996\n", + " 59996\n", " render time (visible): 81.3662109375 ms\n", " 10.0%\n", " 2023-08-08 00:53:52.332\n", " \n", " \n", - " 47997\n", + " 59997\n", " time (invisible): 0.520751953125 ms\n", " 10.0%\n", " 2023-08-08 00:53:52.332\n", " \n", " \n", - " 47998\n", + " 59998\n", " time (full): 264.547607421875 ms\n", " 10.0%\n", " 2023-08-08 00:53:52.333\n", " \n", " \n", - " 47999\n", + " 59999\n", " render time (invisible): None\n", " 10.0%\n", " 2023-08-08 00:53:52.333\n", " \n", " \n", "\n", - "

48000 rows × 3 columns

\n", + "

60000 rows × 3 columns

\n", "" ], "text/plain": [ @@ -199,11 +412,11 @@ "3 time (full): 1808.047119140625 ms 100.0% \n", "4 render time (invisible): None 100.0% \n", "... ... ... \n", - "47995 time (visible): 261.415771484375 ms 10.0% \n", - "47996 render time (visible): 81.3662109375 ms 10.0% \n", - "47997 time (invisible): 0.520751953125 ms 10.0% \n", - "47998 time (full): 264.547607421875 ms 10.0% \n", - "47999 render time (invisible): None 10.0% \n", + "59995 time (visible): 261.415771484375 ms 10.0% \n", + "59996 render time (visible): 81.3662109375 ms 10.0% \n", + "59997 time (invisible): 0.520751953125 ms 10.0% \n", + "59998 time (full): 264.547607421875 ms 10.0% \n", + "59999 render time (invisible): None 10.0% \n", "\n", " timestamp \n", "0 2023-08-07 17:06:47.462 \n", @@ -212,16 +425,16 @@ "3 2023-08-07 17:06:47.464 \n", "4 2023-08-07 17:06:47.464 \n", "... ... \n", - "47995 2023-08-08 00:53:52.332 \n", - "47996 2023-08-08 00:53:52.332 \n", - "47997 2023-08-08 00:53:52.332 \n", - "47998 2023-08-08 00:53:52.333 \n", - "47999 2023-08-08 00:53:52.333 \n", + "59995 2023-08-08 00:53:52.332 \n", + "59996 2023-08-08 00:53:52.332 \n", + "59997 2023-08-08 00:53:52.332 \n", + "59998 2023-08-08 00:53:52.333 \n", + "59999 2023-08-08 00:53:52.333 \n", "\n", - "[48000 rows x 3 columns]" + "[60000 rows x 3 columns]" ] }, - "execution_count": 55, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -239,7 +452,156 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
messagepercentagetimestamp
0time (visible): 1942.76708984375 ms100.0%2023-08-11 17:04:19.234
1render time (visible): 942.869140625 ms100.0%2023-08-11 17:04:19.236
2time (invisible): 1.306884765625 ms100.0%2023-08-11 17:04:19.237
3time (full): 1945.81103515625 ms100.0%2023-08-11 17:04:19.237
4render time (invisible): None100.0%2023-08-11 17:04:19.240
............
59995time (visible): 168.304931640625 ms10.0%2023-08-12 00:45:01.208
59996render time (visible): 67.005126953125 ms10.0%2023-08-12 00:45:01.208
59997time (invisible): 220.11083984375 ms10.0%2023-08-12 00:45:01.428
59998render time (invisible): 72.118896484375 ms10.0%2023-08-12 00:45:01.429
59999time (full): 390.1201171875 ms10.0%2023-08-12 00:45:01.429
\n", + "

60000 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " message percentage \\\n", + "0 time (visible): 1942.76708984375 ms 100.0% \n", + "1 render time (visible): 942.869140625 ms 100.0% \n", + "2 time (invisible): 1.306884765625 ms 100.0% \n", + "3 time (full): 1945.81103515625 ms 100.0% \n", + "4 render time (invisible): None 100.0% \n", + "... ... ... \n", + "59995 time (visible): 168.304931640625 ms 10.0% \n", + "59996 render time (visible): 67.005126953125 ms 10.0% \n", + "59997 time (invisible): 220.11083984375 ms 10.0% \n", + "59998 render time (invisible): 72.118896484375 ms 10.0% \n", + "59999 time (full): 390.1201171875 ms 10.0% \n", + "\n", + " timestamp \n", + "0 2023-08-11 17:04:19.234 \n", + "1 2023-08-11 17:04:19.236 \n", + "2 2023-08-11 17:04:19.237 \n", + "3 2023-08-11 17:04:19.237 \n", + "4 2023-08-11 17:04:19.240 \n", + "... ... \n", + "59995 2023-08-12 00:45:01.208 \n", + "59996 2023-08-12 00:45:01.208 \n", + "59997 2023-08-12 00:45:01.428 \n", + "59998 2023-08-12 00:45:01.429 \n", + "59999 2023-08-12 00:45:01.429 \n", + "\n", + "[60000 rows x 3 columns]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mask = df_prio['message'].str.contains('%')\n", + "percentage = df_prio[mask]\n", + "percentage\n", + "df_prio.loc[mask, 'percentage'] = percentage['message']\n", + "df_prio['percentage'].fillna(method='ffill', inplace=True)\n", + "df_prio.drop(percentage.index, inplace=True)\n", + "df_prio = df_prio.reset_index()\n", + "df_prio[['message', 'percentage','timestamp']]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -251,7 +613,7 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -275,262 +637,120 @@ " \n", " \n", " \n", - " index\n", - " level\n", - " message\n", - " timestamp\n", - " datapoints\n", - " samples\n", - " traces\n", - " sampling algorithm\n", - " iteration\n", - " percentage\n", - " type\n", - " update\n", " time (ms)\n", + " update\n", + " type\n", " \n", " \n", " \n", " \n", " 0\n", - " 1\n", - " DEBUG\n", - " time (visible): 1805.199951171875 ms\n", - " 2023-08-07 17:06:47.462\n", - " 10000000\n", - " 10000\n", - " 100\n", - " everynth\n", - " 0\n", - " 100.0%\n", - " time\n", + " 1942.76708984375\n", " visible\n", - " 1805.199951171875\n", + " time\n", " \n", " \n", " 1\n", - " 2\n", - " DEBUG\n", - " render time (visible): 827.38818359375 ms\n", - " 2023-08-07 17:06:47.463\n", - " 10000000\n", - " 10000\n", - " 100\n", - " everynth\n", - " 0\n", - " 100.0%\n", - " render time\n", + " 942.869140625\n", " visible\n", - " 827.38818359375\n", + " render time\n", " \n", " \n", " 2\n", - " 3\n", - " DEBUG\n", - " time (invisible): 1.220947265625 ms\n", - " 2023-08-07 17:06:47.464\n", - " 10000000\n", - " 10000\n", - " 100\n", - " everynth\n", - " 0\n", - " 100.0%\n", - " time\n", + " 1.306884765625\n", " invisible\n", - " 1.220947265625\n", + " time\n", " \n", " \n", " 3\n", - " 4\n", - " DEBUG\n", - " time (full): 1808.047119140625 ms\n", - " 2023-08-07 17:06:47.464\n", - " 10000000\n", - " 10000\n", - " 100\n", - " everynth\n", - " 0\n", - " 100.0%\n", - " time\n", + " 1945.81103515625\n", " full\n", - " 1808.047119140625\n", + " time\n", " \n", " \n", " 4\n", - " 5\n", - " INFO\n", - " render time (invisible): None\n", - " 2023-08-07 17:06:47.464\n", - " 10000000\n", - " 10000\n", - " 100\n", - " everynth\n", - " 0\n", - " 100.0%\n", - " render time\n", - " invisible\n", " None\n", + " invisible\n", + " render time\n", " \n", " \n", " ...\n", " ...\n", " ...\n", " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", " \n", " \n", - " 47995\n", - " 52795\n", - " DEBUG\n", - " time (visible): 261.415771484375 ms\n", - " 2023-08-08 00:53:52.332\n", - " 100000\n", - " 5000\n", - " 50\n", - " everynth\n", - " 9\n", - " 10.0%\n", - " time\n", + " 59995\n", + " 168.304931640625\n", " visible\n", - " 261.415771484375\n", + " time\n", " \n", " \n", - " 47996\n", - " 52796\n", - " DEBUG\n", - " render time (visible): 81.3662109375 ms\n", - " 2023-08-08 00:53:52.332\n", - " 100000\n", - " 5000\n", - " 50\n", - " everynth\n", - " 9\n", - " 10.0%\n", - " render time\n", + " 59996\n", + " 67.005126953125\n", " visible\n", - " 81.3662109375\n", + " render time\n", " \n", " \n", - " 47997\n", - " 52797\n", - " DEBUG\n", - " time (invisible): 0.520751953125 ms\n", - " 2023-08-08 00:53:52.332\n", - " 100000\n", - " 5000\n", - " 50\n", - " everynth\n", - " 9\n", - " 10.0%\n", - " time\n", + " 59997\n", + " 220.11083984375\n", " invisible\n", - " 0.520751953125\n", - " \n", - " \n", - " 47998\n", - " 52798\n", - " DEBUG\n", - " time (full): 264.547607421875 ms\n", - " 2023-08-08 00:53:52.333\n", - " 100000\n", - " 5000\n", - " 50\n", - " everynth\n", - " 9\n", - " 10.0%\n", " time\n", - " full\n", - " 264.547607421875\n", " \n", " \n", - " 47999\n", - " 52799\n", - " INFO\n", - " render time (invisible): None\n", - " 2023-08-08 00:53:52.333\n", - " 100000\n", - " 5000\n", - " 50\n", - " everynth\n", - " 9\n", - " 10.0%\n", - " render time\n", + " 59998\n", + " 72.118896484375\n", " invisible\n", - " None\n", + " render time\n", + " \n", + " \n", + " 59999\n", + " 390.1201171875\n", + " full\n", + " time\n", " \n", " \n", "\n", - "

48000 rows × 13 columns

\n", + "

60000 rows × 3 columns

\n", "" ], "text/plain": [ - " index level message \\\n", - "0 1 DEBUG time (visible): 1805.199951171875 ms \n", - "1 2 DEBUG render time (visible): 827.38818359375 ms \n", - "2 3 DEBUG time (invisible): 1.220947265625 ms \n", - "3 4 DEBUG time (full): 1808.047119140625 ms \n", - "4 5 INFO render time (invisible): None \n", - "... ... ... ... \n", - "47995 52795 DEBUG time (visible): 261.415771484375 ms \n", - "47996 52796 DEBUG render time (visible): 81.3662109375 ms \n", - "47997 52797 DEBUG time (invisible): 0.520751953125 ms \n", - "47998 52798 DEBUG time (full): 264.547607421875 ms \n", - "47999 52799 INFO render time (invisible): None \n", + " time (ms) update type\n", + "0 1942.76708984375 visible time\n", + "1 942.869140625 visible render time\n", + "2 1.306884765625 invisible time\n", + "3 1945.81103515625 full time\n", + "4 None invisible render time\n", + "... ... ... ...\n", + "59995 168.304931640625 visible time\n", + "59996 67.005126953125 visible render time\n", + "59997 220.11083984375 invisible time\n", + "59998 72.118896484375 invisible render time\n", + "59999 390.1201171875 full time\n", "\n", - " timestamp datapoints samples traces sampling algorithm \\\n", - "0 2023-08-07 17:06:47.462 10000000 10000 100 everynth \n", - "1 2023-08-07 17:06:47.463 10000000 10000 100 everynth \n", - "2 2023-08-07 17:06:47.464 10000000 10000 100 everynth \n", - "3 2023-08-07 17:06:47.464 10000000 10000 100 everynth \n", - "4 2023-08-07 17:06:47.464 10000000 10000 100 everynth \n", - "... ... ... ... ... ... \n", - "47995 2023-08-08 00:53:52.332 100000 5000 50 everynth \n", - "47996 2023-08-08 00:53:52.332 100000 5000 50 everynth \n", - "47997 2023-08-08 00:53:52.332 100000 5000 50 everynth \n", - "47998 2023-08-08 00:53:52.333 100000 5000 50 everynth \n", - "47999 2023-08-08 00:53:52.333 100000 5000 50 everynth \n", - "\n", - " iteration percentage type update time (ms) \n", - "0 0 100.0% time visible 1805.199951171875 \n", - "1 0 100.0% render time visible 827.38818359375 \n", - "2 0 100.0% time invisible 1.220947265625 \n", - "3 0 100.0% time full 1808.047119140625 \n", - "4 0 100.0% render time invisible None \n", - "... ... ... ... ... ... \n", - "47995 9 10.0% time visible 261.415771484375 \n", - "47996 9 10.0% render time visible 81.3662109375 \n", - "47997 9 10.0% time invisible 0.520751953125 \n", - "47998 9 10.0% time full 264.547607421875 \n", - "47999 9 10.0% render time invisible None \n", - "\n", - "[48000 rows x 13 columns]" + "[60000 rows x 3 columns]" ] }, - "execution_count": 57, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df['type'] = df['type'].apply(lambda x: x + ' time' if x == 'render' else x)\n", - "df" + "df_prio['type']= df_prio['message'].str.split().str[0]\n", + "df_prio['update'] = df_prio['message'].str.split(\"(\").str[1].str.split(\")\").str[0]\n", + "df_prio['time (ms)'] = df_prio['message'].str.split(\":\").str[1].str.split().str[0]\n", + "df_prio['type'] = df_prio['type'].apply(lambda x: x + ' time' if x == 'render' else x)\n", + "df_prio[['time (ms)', 'update','type']]" ] }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ + "from natsort import natsort_keygen\n", "\n", "# Convert \"time (ms)\" column to numeric data type\n", "df['time (ms)'] = pd.to_numeric(df['time (ms)'], errors='coerce')\n", @@ -542,22 +762,75 @@ "aggregation_functions = {\n", " 'time (ms)': ['mean', 'var']\n", "}\n", + "sample_size = 25\n", "\n", "filtered_df = df.query('`time (ms)` == `time (ms)`')\n", + "filtered_df_visible = filtered_df[filtered_df['update']=='visible']\n", + "filtered_df_visible = filtered_df_visible[filtered_df_visible['type']=='time']\n", + "filtered_df_visible = filtered_df_visible[filtered_df_visible['percentage'] == \"100.0%\"]\n", "\n", - "grouped_df = df.groupby(['update', 'type', 'percentage', 'datapoints', 'samples', 'traces']).agg(aggregation_functions)\n", + "grouped_df = filtered_df_visible.groupby(['percentage', 'datapoints', 'samples', 'traces']).agg(aggregation_functions)\n", "agg_df = grouped_df.reset_index()\n", - "agg_df.columns = ['update', 'type', 'percentage', 'datapoints', 'samples', 'traces', 'mean_time', 'variance']\n", - "agg_df['std_dev'] = np.sqrt(agg_df[\"variance\"])" + "\n", + "agg_df.columns = ['percentage', 'datapoints', 'samples', 'traces', 'mean_time', 'variance']\n", + "\n", + "agg_df['std_dev'] = np.sqrt(agg_df[\"variance\"])\n", + "agg_df['std_err'] = 1.96 * agg_df[\"std_dev\"]/np.sqrt(sample_size)\n", + "\n", + "agg_df = agg_df.sort_values(by=['datapoints','samples','traces'],key=natsort_keygen())\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "# Convert \"time (ms)\" column to numeric data type\n", + "df_prio['time (ms)'] = pd.to_numeric(df_prio['time (ms)'], errors='coerce')\n", + "df_prio['traces'] = pd.to_numeric(df_prio['traces'])\n", + "\n", + "# aggregation_functions = {\n", + "# 'time (ms)': ['mean', lambda x: np.std(x, ddof=0) if len(x) > 1 else 0, 'var']\n", + "# }\n", + "aggregation_functions = {\n", + " 'time (ms)': ['mean', 'var']\n", + "}\n", + "\n", + "filtered_df_prio = df_prio.query('`time (ms)` == `time (ms)`')\n", + "filtered_df_prio_visible = filtered_df_prio[filtered_df_prio['update']=='visible']\n", + "filtered_df_prio_visible = filtered_df_prio_visible[filtered_df_prio_visible['type']=='time']\n", + "\n", + "grouped_df_prio = filtered_df_prio_visible.groupby(['percentage','datapoints', 'samples', 'traces']).agg(aggregation_functions)\n", + "agg_df_prio = grouped_df_prio.reset_index()\n", + "agg_df_prio.columns = ['percentage', 'datapoints', 'samples', 'traces', 'mean_time', 'variance']\n", + "agg_df_prio['std_dev'] = np.sqrt(agg_df_prio[\"variance\"])\n", + "agg_df_prio['std_err'] = 1.96 * agg_df_prio[\"std_dev\"]/np.sqrt(sample_size)\n", + "\n", + "\n", + "\n", + "agg_df_prio = agg_df_prio.sort_values(by=['datapoints','samples','traces','percentage'],key=natsort_keygen())\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "agg_df['percentage'] = '100.0% (baseline)'\n", + "df_final = pd.concat([agg_df , agg_df_prio], ignore_index=True)\n", + "df_final = df_final.sort_values(by=['datapoints','samples','traces','percentage'],key=natsort_keygen())\n", + "df_final = df_final.reset_index(drop=True)" ] }, { "cell_type": "code", - "execution_count": 61, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ - "agg_df.to_csv('../logs/linux_jonas/vanilla_plotly_logs/processed_logs_vanilla.csv')" + "df_final.to_csv('../logs/final/processed_logs_final.csv')" ] } ], From 648c88189f52674c6e1ca4b0974a15ea51edf16e Mon Sep 17 00:00:00 2001 From: "Isis G.V" Date: Fri, 18 Aug 2023 17:26:51 +0200 Subject: [PATCH 14/15] modified gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index a8b6bb5a..278eeaa1 100644 --- a/.gitignore +++ b/.gitignore @@ -162,3 +162,5 @@ selectionbox_layout_data/ examples/dash_apps/**/*coarse_fine* examples/dash_apps/2* examples/dash_apps/00* +tests/log_processing*.ipynb +figure data/ From f8e29cd59eeffd13afe9de351a3b009e637e6753 Mon Sep 17 00:00:00 2001 From: ISX2 Date: Mon, 21 Aug 2023 12:57:21 +0200 Subject: [PATCH 15/15] reformatted code :) --- examples/dash_apps/01_minimal_global.py | 1 + .../figure_resampler/figure_resampler.py | 22 ++- .../figure_resampler_interface.py | 12 +- tests/conftest.py | 2 +- tests/fr_selenium.py | 18 +- tests/minimal_variable_threads.py | 23 ++- tests/test_visual_gain_threads.py | 167 +++++++++--------- 7 files changed, 132 insertions(+), 113 deletions(-) diff --git a/examples/dash_apps/01_minimal_global.py b/examples/dash_apps/01_minimal_global.py index 2bea4059..a39fc9cb 100644 --- a/examples/dash_apps/01_minimal_global.py +++ b/examples/dash_apps/01_minimal_global.py @@ -16,6 +16,7 @@ import numpy as np import plotly.graph_objects as go from dash import Dash, Input, Output, callback_context, dcc, html, no_update + # from graph_reporter import GraphReporter from trace_updater import TraceUpdater diff --git a/plotly_resampler/figure_resampler/figure_resampler.py b/plotly_resampler/figure_resampler/figure_resampler.py index 096115ec..acb3f868 100644 --- a/plotly_resampler/figure_resampler/figure_resampler.py +++ b/plotly_resampler/figure_resampler/figure_resampler.py @@ -11,13 +11,12 @@ __author__ = "Jonas Van Der Donckt, Jeroen Van Der Donckt, Emiel Deprost" -import base64 -import uuid import warnings from typing import List, Tuple import dash import plotly.graph_objects as go + # from graph_reporter import GraphReporter from plotly.basedatatypes import BaseFigure from trace_updater import TraceUpdater @@ -303,17 +302,24 @@ def show_dash( app = dash.Dash("local_app") app.layout = dash.html.Div( [ - dash.dcc.Store(id="visible-indices", data={"visible": [], "invisible": []}), + dash.dcc.Store( + id="visible-indices", data={"visible": [], "invisible": []} + ), dash.dcc.Graph( id="resample-figure", figure=self, config=config, **graph_properties ), TraceUpdater( - id="trace-updater", gdID="resample-figure", sequentialUpdate=False, verbose=testing + id="trace-updater", + gdID="resample-figure", + sequentialUpdate=False, + verbose=testing, ), # GraphReporter(id="graph-reporter", gId="resample-figure"), ] ) - self.register_update_graph_callback(app, "resample-figure", "trace-updater", 'visible-indices') + self.register_update_graph_callback( + app, "resample-figure", "trace-updater", "visible-indices" + ) height_param = "height" if self._is_persistent_inline else "jupyter_height" @@ -375,7 +381,11 @@ def stop_server(self, warn: bool = True): # TODO: check if i should put the clientside callback to fill the store here or in a different function # for now, here def register_update_graph_callback( - self, app: dash.Dash, graph_id: str, trace_updater_id: str, visibility_store_id: str + self, + app: dash.Dash, + graph_id: str, + trace_updater_id: str, + visibility_store_id: str, ): """Register the [`construct_update_data`][figure_resampler.figure_resampler_interface.AbstractFigureAggregator.construct_update_data] method as callback function to the passed dash-app. diff --git a/plotly_resampler/figure_resampler/figure_resampler_interface.py b/plotly_resampler/figure_resampler/figure_resampler_interface.py index a185839c..65626d9c 100644 --- a/plotly_resampler/figure_resampler/figure_resampler_interface.py +++ b/plotly_resampler/figure_resampler/figure_resampler_interface.py @@ -478,9 +478,13 @@ def _check_update_figure_dict( # We skip when (i) the trace-idx already has been updated, (ii) when # there is a layout_xaxis_filter and the trace xaxis is not in the filter # or (iii) when its not part of the subset of traces (visible / invisible) to update now - if idx in updated_trace_indices or idx not in indices_to_use or ( - layout_xaxis_filter is not None - and trace.get("xaxis", "x") not in trace_xaxis_filter + if ( + idx in updated_trace_indices + or idx not in indices_to_use + or ( + layout_xaxis_filter is not None + and trace.get("xaxis", "x") not in trace_xaxis_filter + ) ): continue @@ -1424,7 +1428,7 @@ def construct_update_data( def construct_invisible_update_data( self, visible_update: int, relayout_data, trace_visibility: dict - ) -> Union[List[dict], dash.no_update] : + ) -> Union[List[dict], dash.no_update]: invisible_trace_idx = trace_visibility["invisible"] current_graph = self._get_current_graph() diff --git a/tests/conftest.py b/tests/conftest.py index 213168a8..30a770a7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -58,7 +58,7 @@ def driver(): options = Options() d = DesiredCapabilities.CHROME d["goog:loggingPrefs"] = {"browser": "ALL"} - d['acceptSslCerts'] = True + d["acceptSslCerts"] = True if not TESTING_LOCAL: if headless: options.add_argument("--headless") diff --git a/tests/fr_selenium.py b/tests/fr_selenium.py index 255e77f9..bb9076cb 100644 --- a/tests/fr_selenium.py +++ b/tests/fr_selenium.py @@ -14,7 +14,6 @@ import json import time from typing import List, Union -import sys from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.common.by import By @@ -30,7 +29,6 @@ # - on linux, the browser (i.e., sending & getting requests) goes a lot faster from .utils import not_on_linux - # https://www.blazemeter.com/blog/improve-your-selenium-webdriver-tests-with-pytest # and create a parameterized driver.get method @@ -202,7 +200,9 @@ def get_requests(self, delete: bool = True): return requests - def drag_and_zoom(self, div_classname, x0=0.25, x1=0.5, y0=0.25, y1=0.5, testing = False): + def drag_and_zoom( + self, div_classname, x0=0.25, x1=0.5, y0=0.25, y1=0.5, testing=False + ): """ Drags and zooms the div with the given classname. @@ -243,7 +243,9 @@ def drag_and_zoom(self, div_classname, x0=0.25, x1=0.5, y0=0.25, y1=0.5, testing action.release() if testing: # self.driver.execute_script("console.log('time update visible');") - self.driver.execute_script("console.time('time (visible)');console.time('time (full)');") + self.driver.execute_script( + "console.time('time (visible)');console.time('time (full)');" + ) action.perform() def _get_modebar_btns(self): @@ -262,7 +264,7 @@ def autoscale(self): ActionChains(self.driver).move_to_element(btn).click().perform() return - def reset_axes(self, testing = False): + def reset_axes(self, testing=False): for btn in self._get_modebar_btns(): data_title = btn.get_attribute("data-title") if data_title == "Reset axes": @@ -273,7 +275,9 @@ def reset_axes(self, testing = False): actions.click() if testing: # self.driver.execute_script("console.log('time update visible');") - self.driver.execute_script("console.time('time (visible)');console.time('time (full)');") + self.driver.execute_script( + "console.time('time (visible)');console.time('time (full)');" + ) actions.perform() return @@ -309,7 +313,7 @@ def start_timer(self, type): self.driver.execute_script("console.log('zoom in')") else: self.driver.execute_script("console.log('reset')") - + # ------------------------------ DATA MODEL METHODS ------------------------------ def __del__(self): self.driver.close() diff --git a/tests/minimal_variable_threads.py b/tests/minimal_variable_threads.py index 685cf569..c7fb4f4d 100644 --- a/tests/minimal_variable_threads.py +++ b/tests/minimal_variable_threads.py @@ -1,19 +1,16 @@ +import argparse import numpy as np import plotly.graph_objs as go + +from plotly_resampler.aggregation import EveryNthPoint + # from dash import Input, Output, dcc, html # from trace_updater import TraceUpdater - # import sys # print(sys.path) # sys.path.append('C:\\Users\\willi\\Documents\\ISIS\\Thesis\\plotly-resampler') - - from plotly_resampler.figure_resampler import FigureResampler -from plotly_resampler.aggregation import EveryNthPoint - -import argparse -import os parser = argparse.ArgumentParser() parser.add_argument("-n", "--npoints", type=int) @@ -30,16 +27,16 @@ # print(t) - # # Construct a high-frequency signal # n=1_000_000 # s=10_000 # t=10 -def make_fig(n, s, t): + +def make_fig(n, s, t): x = np.arange(n) noisy_sin = (3 + np.sin(x / 200) + np.random.randn(len(x)) / 10) * x / (n / 10) - print(n/s) + print(n / s) # Construct the to-be resampled figure fig = FigureResampler( go.Figure(), @@ -50,7 +47,9 @@ def make_fig(n, s, t): ) for i in range(t): fig.add_trace( - go.Scattergl(name=f"sine-{i}", showlegend=True), hf_x=x, hf_y=noisy_sin + 10 * i + go.Scattergl(name=f"sine-{i}", showlegend=True), + hf_x=x, + hf_y=noisy_sin + 10 * i, ) return fig @@ -73,7 +72,7 @@ def make_fig(n, s, t): fig = make_fig(n, s, t) # Register the callback -fig.show_dash(mode='external', testing=True) +fig.show_dash(mode="external", testing=True) # # fig.register_update_graph_callback(app, "graph-id", "trace-updater", "visible-indices") diff --git a/tests/test_visual_gain_threads.py b/tests/test_visual_gain_threads.py index 895cbca8..1470f9b4 100644 --- a/tests/test_visual_gain_threads.py +++ b/tests/test_visual_gain_threads.py @@ -1,100 +1,99 @@ -import multiprocessing -import time -import subprocess as sp +import json import os -import signal -import psutil as ps +import subprocess as sp +import time -import json import numpy as np - -import plotly.graph_objects as go -from plotly_resampler import FigureResampler from fr_selenium import FigureResamplerGUITests - - - from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.desired_capabilities import DesiredCapabilities from seleniumwire import webdriver from webdriver_manager.chrome import ChromeDriverManager, ChromeType -from selenium.webdriver.chrome.service import Service as ChromeService - # create a test for each value of n_traces, n_datapoints and shown_datapoints - # open new page - # loop over a range of percentages (% of shown traces) - # start timer (in front end via selenium? performance library js) - # apply 50% range zoom - # stop timer when visible update - # start another timer for invisible - # stop timer when invisible renders - # return to original range (may trigger timer in front end... prevent this!!) - # extract logs from this iteration into a file - # close page! +# open new page +# loop over a range of percentages (% of shown traces) +# start timer (in front end via selenium? performance library js) +# apply 50% range zoom +# stop timer when visible update +# start another timer for invisible +# stop timer when invisible renders +# return to original range (may trigger timer in front end... prevent this!!) +# extract logs from this iteration into a file +# close page! -# d = driver() - -iterations = 1 #TODO: use to run this benchmarking process multiple times -> collect more data -> more accurate results +iterations = 1 # use to run this benchmarking process multiple times -> collect more data -> more accurate results percentages_hidden = np.array([0, 0.2, 0.5, 0.8, 0.9]) n_traces = [ - 10, - # 20, - # 50, - # 100 - ] + 10, + 20, + 50, + 100 +] n_datapoints = [ - 100_000, - # 1_000_000, - # 10_000_000 - ] # hypothesis: this shouldn't affect the results too much? (if the biggest bottleneck is data transfer time) + 100_000, + 1_000_000, + 10_000_000 +] # hypothesis: this shouldn't affect the results too much? (if the biggest bottleneck is data transfer time) n_shown_datapoints = [ - 100, - # 1000, - # 5000, - # 10000 - ] + 100, + 1000, + 5000, + 10000 +] for it in range(iterations): print(f"iteration {it}") options = Options() # options.add_argument("--kiosk") #maximize window d = DesiredCapabilities.CHROME d["goog:loggingPrefs"] = {"browser": "ALL"} - driver = webdriver.Chrome(ChromeDriverManager(chrome_type=ChromeType.GOOGLE).install(), options=options,desired_capabilities=d,) - # driver = webdriver.Chrome( - # service=ChromeService(ChromeDriverManager(chrome_type=ChromeType.GOOGLE).install()), - # # service_args=["--verbose", "--log-path=C:\\Users\\willi\\Documents\\ISIS\\Thesis\\plotly-resampler\\logs"], - # options=options, - # desired_capabilities=d, - # ) - # driver.fullscreen_window() + driver = webdriver.Chrome( + ChromeDriverManager(chrome_type=ChromeType.GOOGLE).install(), + options=options, + desired_capabilities=d, + ) + driver.maximize_window() port = 8050 fr = FigureResamplerGUITests(driver, port=port) - try: + try: for t in n_traces: for n in n_datapoints: for s in n_shown_datapoints: time.sleep(2) - proc = sp.Popen(['poetry','run','python','./tests/minimal_variable_threads.py', '-n', str(n), '-s', str(s), '-t', str(t)], - # creationflags=sp.CREATE_NEW_CONSOLE - ) - print(f'n_traces: {t}') - print(f'n_datapoints: {n}') - print(f'n_shown_datapoints: {s}') + proc = sp.Popen( + [ + "poetry", + "run", + "python", + "./tests/minimal_variable_threads.py", + "-n", + str(n), + "-s", + str(s), + "-t", + str(t), + ], + # creationflags=sp.CREATE_NEW_CONSOLE + ) + print(f"n_traces: {t}") + print(f"n_datapoints: {n}") + print(f"n_shown_datapoints: {s}") print(f"iteration {it}") try: time.sleep(20) fr.go_to_page() - + time.sleep(1) # determine the number of traces that will be hidden corresponding to each percentage - n_traces_hidden = np.unique(np.ceil(t*percentages_hidden)).astype(int) + n_traces_hidden = np.unique( + np.ceil(t * percentages_hidden) + ).astype(int) # TODO: get final list of percentages (visible!) and print to console # print(n_traces_hidden) @@ -103,55 +102,57 @@ if idx == 0: previous_n_hidden = 0 else: - previous_n_hidden = n_traces_hidden[idx-1] + previous_n_hidden = n_traces_hidden[idx - 1] # hide r traces from the last hidden trace driver.execute_script(f'console.log("{100-((j/t)*100)}%")') - print(previous_n_hidden) - residual = n_traces_hidden[idx]-previous_n_hidden + print(previous_n_hidden) + residual = n_traces_hidden[idx] - previous_n_hidden print(residual) - residual_indices = [int(last-(i+1)) for i in range(residual)] + residual_indices = [ + int(last - (i + 1)) for i in range(residual) + ] last -= residual if residual_indices != []: fr.hide_legend_restyle(residual_indices) - # after hiding the traces, (start the timer,) zoom in, then reset the axes for the next iteration - fr.drag_and_zoom("xy", x0=0.25, x1=0.75, y0=0.5, y1=0.5, testing=True) - #start timer - # fr.start_timer('zoom') + # after hiding the traces, (start the timer,) zoom in, then reset the axes for the next iteration + fr.drag_and_zoom( + "xy", x0=0.25, x1=0.75, y0=0.5, y1=0.5, testing=True + ) time.sleep(3) fr.reset_axes(testing=True) - # fr.start_timer('reset') time.sleep(3) - with open(f'./logs/n{n}_s{s}_t{t}_everynth_iter{it}.json', 'w') as logfile: - # for log in driver.get_log('browser'): - logfile.write(json.dumps(driver.get_log('browser'))) - print('done saving log') - # print(logs) - # print(type(logs)) + with open( + f"./logs/n{n}_s{s}_t{t}_everynth_iter{it}.json", "w" + ) as logfile: + logfile.write(json.dumps(driver.get_log("browser"))) + print("done saving log") except Exception as e: - raise e + raise e finally: print(proc.pid) # p = ps.Process(proc.pid) # print(f'pid {proc.pid}') # print(f'process is running {p.is_running()}') - print(f'process is running {proc.poll is not None}') + print(f"process is running {proc.poll is not None}") # proc.send_signal(signal.CTRL_C_EVENT) - - #this works with windows! add if clause for Linux version! (proc.kill works?) + + # this works with windows! add if clause for Linux version! (proc.kill works?) os.system("TASKKILL /F /T /PID " + str(proc.pid)) - os.system('killport 8050 --view-only') # requires pip install killport + os.system( + "killport 8050 --view-only" + ) # requires pip install killport # proc.kill() - print(f'process is running {proc.poll() is not None}') - + print(f"process is running {proc.poll() is not None}") + # os.kill(proc.pid, signal.SIGKILL) - + except Exception as ex: - raise ex + raise ex finally: - print('closing driver') + print("closing driver") # driver.close() print(driver is None) driver.quit()