From aa509ee0eed47df55bad4e84d289f745f8ddde80 Mon Sep 17 00:00:00 2001
From: Ian Thomas <ianthomas23@gmail.com>
Date: Tue, 18 Jul 2023 11:52:09 +0100
Subject: [PATCH 1/5] Benchmarking framework using ASV and Playwright

---
 .gitignore                          |   5 +-
 benchmarks/README.md                |  53 ++++++++
 benchmarks/asv.conf.json            | 196 ++++++++++++++++++++++++++++
 benchmarks/benchmarks/__init__.py   |   1 +
 benchmarks/benchmarks/base.py       |  49 +++++++
 benchmarks/benchmarks/timeseries.py |  55 ++++++++
 pyproject.toml                      |  23 ++++
 7 files changed, 381 insertions(+), 1 deletion(-)
 create mode 100644 benchmarks/README.md
 create mode 100644 benchmarks/asv.conf.json
 create mode 100644 benchmarks/benchmarks/__init__.py
 create mode 100644 benchmarks/benchmarks/base.py
 create mode 100644 benchmarks/benchmarks/timeseries.py
 create mode 100644 pyproject.toml
diff --git a/.gitignore b/.gitignore
index b176403..eebf6b0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -138,4 +138,7 @@ dmypy.json
 
 .vscode
 
-**/*.pt
\ No newline at end of file
+**/*.pt
+
+# Benchmarks
+.asv/
diff --git a/benchmarks/README.md b/benchmarks/README.md
new file mode 100644
index 0000000..b518634
--- /dev/null
+++ b/benchmarks/README.md
@@ -0,0 +1,53 @@
+# Benchmarking
+
+`hvneuro` uses [Playwright for Python](https://playwright.dev/python/docs/intro) and [ASV](https://asv.readthedocs.io) for benchmarking. Playwright automates interaction with the web browser, ASV controls the benchmarking process so that it is statistically valid and repeatable.
+
+## Installing ASV
+
+Benchmarks must be run from a clone of the `hvneuro` github repo. ASV creates and uses isolated virtual environments for benchmarking, so the running of benchmarks needs to be performed from within a Python environment that has access to both `asv` and `virtualenv`. This could be a `conda`, `pyenv` or `venv` for example.
+
+Example setup using `conda`:
+```
+conda create -n hvneuro_asv
+conda activate hvneuro_asv
+pip install asv virtualenv
+```
+
+# Running benchmarks
+
+To run all benchmarks:
+```
+cd benchmarks
+asv run
+```
+
+The first time this is run it creates a machine file to store information about your machine.  Then a virtual environment is created and each benchmark is run multiple times to obtain a statistically valid benchmark time.
+
+The virtual environment contains `hvneuro` and its dependencies as defined in the top-level `pyproject.toml` file. It also contains `playwright`, the latest version of `chromium` as installed by `playwright`, and a particular branch of `bokeh` that contains extra code to record when the canvas is rendered. The latter is compiled by source and extra dependencies may be required for this to work on all test machines (to be determined).
+
+# Viewing benchmark results
+
+To list benchmark runs use
+```
+asv show
+```
+
+Initially this will just list the `hvneuro` commit that the benchmarks are run against. To display the benchmark timings for this commit use:
+```
+asv show <commit hash>
+```
+using enough of the commit hash to uniquely identify it.
+
+ASV ships with its own simple webserver to interactively display the results in a webbrowser.  To use this:
+```
+asv publish
+asv preview
+```
+and then open a web browser at the URL specified.
+
+## Configuration
+
+ASV configuration information is stored in `benchmarks/asv.conf.json`.  This includes a list of branches to benchmark.  If you are using a feature branch and wish to benchmark the code in that branch rather than `main`, edit `asv.conf.json` to change the line:
+```
+"branches": ["main"],
+```
diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json
new file mode 100644
index 0000000..12f11d9
--- /dev/null
+++ b/benchmarks/asv.conf.json
@@ -0,0 +1,196 @@
+{
+    // The version of the config file format.  Do not change, unless
+    // you know what you are doing.
+    "version": 1,
+
+    // The name of the project being benchmarked
+    "project": "hvneuro",
+
+    // The project's homepage
+    "project_url": "https://github.com/holoviz-topics/neuro",
+
+    // The URL or local path of the source code repository for the
+    // project being benchmarked
+    "repo": "..",
+
+    // The Python project's subdirectory in your repo.  If missing or
+    // the empty string, the project is assumed to be located at the root
+    // of the repository.
+    // "repo_subdir": "",
+
+    // Customizable commands for building, installing, and
+    // uninstalling the project. See asv.conf.json documentation.
+    //
+    // "install_command": ["in-dir={env_dir} python -mpip install {wheel_file}"],
+    // "uninstall_command": ["return-code=any python -mpip uninstall -y {project}"],
+    // "build_command": [
+    //     "python setup.py build",
+    //     "PIP_NO_BUILD_ISOLATION=false python -mpip wheel --no-deps --no-index -w {build_cache_dir} {build_dir}"
+    // ],
+    "build_command": [
+        "python -m pip install --upgrade build pip",
+        "python -m build --wheel -o {build_cache_dir} {build_dir}"
+    ],
+    "install_command": [
+        "in-dir={env_dir} python -mpip install {wheel_file}",
+        // Install bokeh from specific repo branch containing the console.log of render count
+        "python -m pip install bokeh git+https://github.com/bokeh/bokeh.git@ianthomas23/log_render_count#egg=bokeh",
+        // Install browsers for playwright
+        "playwright install chromium"
+    ],
+
+    // List of branches to benchmark. If not provided, defaults to "master"
+    // (for git) or "default" (for mercurial).
+    "branches": ["benchmark_framework"],
+
+    // The DVCS being used.  If not set, it will be automatically
+    // determined from "repo" by looking at the protocol in the URL
+    // (if remote), or by looking for special directories, such as
+    "dvcs": "git",
+
+    // The tool to use to create environments.  May be "conda",
+    // "virtualenv" or other value depending on the plugins in use.
+    // If missing or the empty string, the tool will be automatically
+    // determined by looking for tools on the PATH environment
+    // variable.
+    "environment_type": "virtualenv",
+
+    // timeout in seconds for installing any dependencies in environment
+    // defaults to 10 min
+    //"install_timeout": 600,
+
+    // the base URL to show a commit for the project.
+    "show_commit_url": "https://github.com/holoviz-topics/neuro/commit/",
+
+    // The Pythons you'd like to test against.  If not provided, defaults
+    // to the current version of Python used to run `asv`.
+    // "pythons": ["2.7", "3.6"],
+
+    // The list of conda channel names to be searched for benchmark
+    // dependency packages in the specified order
+    //"conda_channels": ["conda-forge", "defaults"],
+
+    // A conda environment file that is used for environment creation.
+    // "conda_environment_file": "environment.yml",
+
+    // The matrix of dependencies to test.  Each key of the "req"
+    // requirements dictionary is the name of a package (in PyPI) and
+    // the values are version numbers.  An empty list or empty string
+    // indicates to just test against the default (latest)
+    // version. null indicates that the package is to not be
+    // installed. If the package to be tested is only available from
+    // PyPi, and the 'environment_type' is conda, then you can preface
+    // the package name by 'pip+', and the package will be installed
+    // via pip (with all the conda available packages installed first,
+    // followed by the pip installed packages).
+    //
+    // The ``@env`` and ``@env_nobuild`` keys contain the matrix of
+    // environment variables to pass to build and benchmark commands.
+    // An environment will be created for every combination of the
+    // cartesian product of the "@env" variables in this matrix.
+    // Variables in "@env_nobuild" will be passed to every environment
+    // during the benchmark phase, but will not trigger creation of
+    // new environments.  A value of ``null`` means that the variable
+    // will not be set for the current combination.
+    //
+    // "matrix": {
+    //     "req": {
+    //         "numpy": ["1.6", "1.7"],
+    //         "six": ["", null],  // test with and without six installed
+    //         "pip+emcee": [""]   // emcee is only available for install with pip.
+    //     },
+    //     "env": {"ENV_VAR_1": ["val1", "val2"]},
+    //     "env_nobuild": {"ENV_VAR_2": ["val3", null]},
+    // },
+    "matrix": {
+        "playwright": []
+    },
+
+    // Combinations of libraries/python versions can be excluded/included
+    // from the set to test. Each entry is a dictionary containing additional
+    // key-value pairs to include/exclude.
+    //
+    // An exclude entry excludes entries where all values match. The
+    // values are regexps that should match the whole string.
+    //
+    // An include entry adds an environment. Only the packages listed
+    // are installed. The 'python' key is required. The exclude rules
+    // do not apply to includes.
+    //
+    // In addition to package names, the following keys are available:
+    //
+    // - python
+    //     Python version, as in the *pythons* variable above.
+    // - environment_type
+    //     Environment type, as above.
+    // - sys_platform
+    //     Platform, as in sys.platform. Possible values for the common
+    //     cases: 'linux2', 'win32', 'cygwin', 'darwin'.
+    // - req
+    //     Required packages
+    // - env
+    //     Environment variables
+    // - env_nobuild
+    //     Non-build environment variables
+    //
+    // "exclude": [
+    //     {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows
+    //     {"environment_type": "conda", "req": {"six": null}}, // don't run without six on conda
+    //     {"env": {"ENV_VAR_1": "val2"}}, // skip val2 for ENV_VAR_1
+    // ],
+    //
+    // "include": [
+    //     // additional env for python2.7
+    //     {"python": "2.7", "req": {"numpy": "1.8"}, "env_nobuild": {"FOO": "123"}},
+    //     // additional env if run on windows+conda
+    //     {"platform": "win32", "environment_type": "conda", "python": "2.7", "req": {"libpython": ""}},
+    // ],
+
+    // The directory (relative to the current directory) that benchmarks are
+    // stored in.  If not provided, defaults to "benchmarks"
+    "benchmark_dir": "benchmarks",
+
+    // The directory (relative to the current directory) to cache the Python
+    // environments in.  If not provided, defaults to "env"
+    "env_dir": ".asv/env",
+
+    // The directory (relative to the current directory) that raw benchmark
+    // results are stored in.  If not provided, defaults to "results".
+    "results_dir": ".asv/results",
+
+    // The directory (relative to the current directory) that the html tree
+    // should be written to.  If not provided, defaults to "html".
+    "html_dir": ".asv/html",
+
+    // The number of characters to retain in the commit hashes.
+    // "hash_length": 8,
+
+    // `asv` will cache results of the recent builds in each
+    // environment, making them faster to install next time.  This is
+    // the number of builds to keep, per environment.
+    // "build_cache_size": 2,
+
+    // The commits after which the regression search in `asv publish`
+    // should start looking for regressions. Dictionary whose keys are
+    // regexps matching to benchmark names, and values corresponding to
+    // the commit (exclusive) after which to start looking for
+    // regressions.  The default is to start from the first commit
+    // with results. If the commit is `null`, regression detection is
+    // skipped for the matching benchmark.
+    //
+    // "regressions_first_commits": {
+    //    "some_benchmark": "352cdf",  // Consider regressions only after this commit
+    //    "another_benchmark": null,   // Skip regression detection altogether
+    // },
+
+    // The thresholds for relative change in results, after which `asv
+    // publish` starts reporting regressions. Dictionary of the same
+    // form as in ``regressions_first_commits``, with values
+    // indicating the thresholds.  If multiple entries match, the
+    // maximum is taken. If no entry matches, the default is 5%.
+    //
+    // "regressions_thresholds": {
+    //    "some_benchmark": 0.01,     // Threshold of 1%
+    //    "another_benchmark": 0.5,   // Threshold of 50%
+    // },
+}
diff --git a/benchmarks/benchmarks/__init__.py b/benchmarks/benchmarks/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/benchmarks/benchmarks/__init__.py
@@ -0,0 +1 @@
+
diff --git a/benchmarks/benchmarks/base.py b/benchmarks/benchmarks/base.py
new file mode 100644
index 0000000..18cef73
--- /dev/null
+++ b/benchmarks/benchmarks/base.py
@@ -0,0 +1,49 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from bokeh.server.server import Server
+from playwright.sync_api import sync_playwright
+
+if TYPE_CHECKING:
+    from typing import Callable
+
+    from bokeh.document import Document
+    from playwright.sync_api import ConsoleMessage
+
+
+class Base:
+    def __init__(self):
+        self._port = 5006
+        self.render_count = -1
+
+    def _console_callback(self, msg: ConsoleMessage) -> None:
+        # Only supports a single Bokeh canvas so far.
+        args = msg.args
+        if len(args) == 3 and args[0].json_value() == "PlotView._actual_paint" and args[2].json_value() == "end":
+            self.render_count += 1
+            count = int(args[1].json_value())
+            if count != self.render_count:
+                raise RuntimeError(f"Mismatch in render count: {count} != {self.render_count}")
+
+    def _playwright_setup(self, bokeh_doc: Callable[[Document], None], catch_console: bool) -> None:
+        # Playwright context manager needs to span multiple functions,
+        # so manually call __enter__ and __exit__ methods.
+        self._playwright_context_manager = sync_playwright()
+        playwright = self._playwright_context_manager.__enter__()
+
+        self._server = Server({'/': bokeh_doc}, port=self._port)
+        self._server.start()
+
+        self._browser = playwright.chromium.launch(headless=True)
+
+        self.page = self._browser.new_page()
+        self.page.goto(f"http://localhost:{self._port}/")
+
+        if catch_console:
+            self.page.on("console", self._console_callback)
+
+    def _playwright_teardown(self):
+        self._browser.close()
+        self._server.stop()
+        self._playwright_context_manager.__exit__(None, None, None)
diff --git a/benchmarks/benchmarks/timeseries.py b/benchmarks/benchmarks/timeseries.py
new file mode 100644
index 0000000..3677e7c
--- /dev/null
+++ b/benchmarks/benchmarks/timeseries.py
@@ -0,0 +1,55 @@
+from __future__ import annotations
+
+from functools import partial
+from typing import TYPE_CHECKING
+
+from bokeh.models import Button, ColumnDataSource
+from bokeh.plotting import column, figure
+import numpy as np
+
+from .base import Base
+
+if TYPE_CHECKING:
+    from bokeh.document import Document
+
+
+def bkapp(doc: Document, n: int, output_backend: str):
+    cds = ColumnDataSource(data=dict(x=[], y=[]))
+
+    p = figure(width=600, height=400, output_backend=output_backend)
+    p.line(source=cds, x="x", y="y")
+
+    # Prepare data but do not send it to browser yet.
+    x = np.arange(n)
+    y = np.random.default_rng(8343).uniform(size=n)
+
+    def python_callback(event):
+        # Benchmark times the sending and rendering of this data.
+        cds.data = dict(x=x, y=y)
+
+    button = Button(label="run")
+    button.on_click(python_callback)
+
+    doc.add_root(column(p, button))
+
+
+class Timeseries(Base):
+    params: tuple[list[int], list[str]] = (
+        [1_000, 10_000, 100_000, 1_000_000, 10_000_000],
+        ["canvas", "webgl"],
+    )
+    param_names: tuple[str] = ("n", "output_backend")
+
+    def setup(self, n: int, output_backend: str) -> None:
+        bkapp_n = partial(bkapp, n=n, output_backend=output_backend)
+        self._playwright_setup(bkapp_n, catch_console=True)
+
+    def teardown(self, n: int, output_backend: str) -> None:
+        self._playwright_teardown()
+
+    def time_values(self, n: int, output_backend: str) -> None:
+        button = self.page.get_by_role("button", name="run")
+        start_render_count = self.render_count
+        button.click()
+        while self.render_count == start_render_count:
+            self.page.wait_for_timeout(1)
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..5749c8a
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,23 @@
+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "hvneuro"
+version = "0.1.0a1"
+description = "Neuroscience visualization with HoloViz"
+authors = [
+  {name = "Demetris Roumis", email = "8qdo8efl3@mozmail.com"},
+]
+readme = "README.md"
+license = {file = "LICENSE"}
+requires-python = ">=3.8"
+dependencies = [
+    "bokeh",
+    "datashader",
+    "holoviews",
+    "hvplot",
+    "numpy",
+    "panel",
+    "xarray",
+]

From f3d5462375ede085a08cc6c5601f6c0ea24a2bcd Mon Sep 17 00:00:00 2001
From: Ian Thomas <ianthomas23@gmail.com>
Date: Thu, 20 Jul 2023 08:37:00 +0100
Subject: [PATCH 2/5] Improve install instructions

---
 benchmarks/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/benchmarks/README.md b/benchmarks/README.md
index b518634..8112be2 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -8,9 +8,9 @@ Benchmarks must be run from a clone of the `hvneuro` github repo. ASV creates an
 
 Example setup using `conda`:
 ```
-conda create -n hvneuro_asv
+conda create -n hvneuro_asv python=3.11
 conda activate hvneuro_asv
-pip install asv virtualenv
+conda install -c conda-forge asv virtualenv "nodejs>=18"
 ```
 
 # Running benchmarks

From ee11db719e12e0dfed64666b898367e2d124af4a Mon Sep 17 00:00:00 2001
From: Ian Thomas <ianthomas23@gmail.com>
Date: Thu, 20 Jul 2023 10:40:32 +0100
Subject: [PATCH 3/5] Only grab console messages for a single figure of a Bokeh
 document

---
 benchmarks/benchmarks/base.py | 32 +++++++++++++++++++++++++-------
 1 file changed, 25 insertions(+), 7 deletions(-)

diff --git a/benchmarks/benchmarks/base.py b/benchmarks/benchmarks/base.py
index 18cef73..a82e737 100644
--- a/benchmarks/benchmarks/base.py
+++ b/benchmarks/benchmarks/base.py
@@ -2,6 +2,7 @@
 
 from typing import TYPE_CHECKING
 
+from bokeh.models import Plot
 from bokeh.server.server import Server
 from playwright.sync_api import sync_playwright
 
@@ -16,15 +17,23 @@ class Base:
     def __init__(self):
         self._port = 5006
         self.render_count = -1
+        self._figure_id = None  # Unique ID of the figure to grab the console messages of.
 
     def _console_callback(self, msg: ConsoleMessage) -> None:
-        # Only supports a single Bokeh canvas so far.
-        args = msg.args
-        if len(args) == 3 and args[0].json_value() == "PlotView._actual_paint" and args[2].json_value() == "end":
-            self.render_count += 1
-            count = int(args[1].json_value())
-            if count != self.render_count:
-                raise RuntimeError(f"Mismatch in render count: {count} != {self.render_count}")
+        if self._figure_id is None or len(msg.args) != 4:
+            return
+
+        args = [arg.json_value() for arg in msg.args]
+
+        if args[0] == "PlotView._actual_paint" and args[1] == self._figure_id:
+            if args[3] == "start":
+                # TODO: need to handle start of render if want to time a single render.
+                pass
+            elif args[3] == "end":
+                self.render_count += 1
+                count = int(args[1].json_value())
+                if count != self.render_count:
+                    raise RuntimeError(f"Mismatch in render count: {count} != {self.render_count}")
 
     def _playwright_setup(self, bokeh_doc: Callable[[Document], None], catch_console: bool) -> None:
         # Playwright context manager needs to span multiple functions,
@@ -40,10 +49,19 @@ def _playwright_setup(self, bokeh_doc: Callable[[Document], None], catch_console
         self.page = self._browser.new_page()
         self.page.goto(f"http://localhost:{self._port}/")
 
+        # Assume Bokeh document contains a single figure, and obtain its ID.
+        sessions = self._server.get_sessions()
+        if len(sessions) != 1:
+            raise RuntimeError(f"Expected a single session but have {len(sessions)}")
+        doc = sessions[0].document
+        # This raises an error if there is more than one figure in the Bokeh document.
+        self._figure_id = doc.select_one(dict(type=Plot)).id
+
         if catch_console:
             self.page.on("console", self._console_callback)
 
     def _playwright_teardown(self):
+        self._figure_id = None
         self._browser.close()
         self._server.stop()
         self._playwright_context_manager.__exit__(None, None, None)

From 9060a942c9f361e13b69f7474798d1f16ff44ebe Mon Sep 17 00:00:00 2001
From: Ian Thomas <ianthomas23@gmail.com>
Date: Tue, 25 Jul 2023 17:01:27 +0100
Subject: [PATCH 4/5] Correct console callback arguments

---
 benchmarks/benchmarks/base.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/benchmarks/benchmarks/base.py b/benchmarks/benchmarks/base.py
index a82e737..f59fc62 100644
--- a/benchmarks/benchmarks/base.py
+++ b/benchmarks/benchmarks/base.py
@@ -23,15 +23,15 @@ def _console_callback(self, msg: ConsoleMessage) -> None:
         if self._figure_id is None or len(msg.args) != 4:
             return
 
-        args = [arg.json_value() for arg in msg.args]
+        msg, figure_id, count, start_or_end = [arg.json_value() for arg in msg.args]
 
-        if args[0] == "PlotView._actual_paint" and args[1] == self._figure_id:
-            if args[3] == "start":
+        if msg == "PlotView._actual_paint" and figure_id == self._figure_id:
+            if start_or_end == "start":
                 # TODO: need to handle start of render if want to time a single render.
                 pass
-            elif args[3] == "end":
+            elif start_or_end == "end":
                 self.render_count += 1
-                count = int(args[1].json_value())
+                count = int(count)
                 if count != self.render_count:
                     raise RuntimeError(f"Mismatch in render count: {count} != {self.render_count}")
 

From 21b55b9db579b440448ed83d8a4a0db939497982 Mon Sep 17 00:00:00 2001
From: Ian Thomas <ianthomas23@gmail.com>
Date: Wed, 26 Jul 2023 11:55:23 +0100
Subject: [PATCH 5/5] Remove console callback before closing browser

---
 benchmarks/README.md                |  4 +++-
 benchmarks/benchmarks/base.py       | 16 ++++++++++++----
 benchmarks/benchmarks/timeseries.py |  4 ++--
 3 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/benchmarks/README.md b/benchmarks/README.md
index 8112be2..7ea8d5a 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -18,13 +18,15 @@ conda install -c conda-forge asv virtualenv "nodejs>=18"
 To run all benchmarks:
 ```
 cd benchmarks
-asv run
+asv run -e
 ```
 
 The first time this is run it creates a machine file to store information about your machine.  Then a virtual environment is created and each benchmark is run multiple times to obtain a statistically valid benchmark time.
 
 The virtual environment contains `hvneuro` and its dependencies as defined in the top-level `pyproject.toml` file. It also contains `playwright`, the latest version of `chromium` as installed by `playwright`, and a particular branch of `bokeh` that contains extra code to record when the canvas is rendered. The latter is compiled by source and extra dependencies may be required for this to work on all test machines (to be determined).
 
+The `-e` flag catches and displays stderr after the benchmark results. This should be free of errors but may contain some warnings.
+
 # Viewing benchmark results
 
 To list benchmark runs use
diff --git a/benchmarks/benchmarks/base.py b/benchmarks/benchmarks/base.py
index f59fc62..ce1a099 100644
--- a/benchmarks/benchmarks/base.py
+++ b/benchmarks/benchmarks/base.py
@@ -14,7 +14,8 @@
 
 
 class Base:
-    def __init__(self):
+    def __init__(self, catch_console: bool = True):
+        self._catch_console = catch_console
         self._port = 5006
         self.render_count = -1
         self._figure_id = None  # Unique ID of the figure to grab the console messages of.
@@ -35,7 +36,7 @@ def _console_callback(self, msg: ConsoleMessage) -> None:
                 if count != self.render_count:
                     raise RuntimeError(f"Mismatch in render count: {count} != {self.render_count}")
 
-    def _playwright_setup(self, bokeh_doc: Callable[[Document], None], catch_console: bool) -> None:
+    def playwright_setup(self, bokeh_doc: Callable[[Document], None]) -> None:
         # Playwright context manager needs to span multiple functions,
         # so manually call __enter__ and __exit__ methods.
         self._playwright_context_manager = sync_playwright()
@@ -57,11 +58,18 @@ def _playwright_setup(self, bokeh_doc: Callable[[Document], None], catch_console
         # This raises an error if there is more than one figure in the Bokeh document.
         self._figure_id = doc.select_one(dict(type=Plot)).id
 
-        if catch_console:
+        if self._catch_console:
             self.page.on("console", self._console_callback)
 
-    def _playwright_teardown(self):
+    def playwright_teardown(self):
         self._figure_id = None
+        if self._catch_console:
+            self.page.remove_listener("console", self._console_callback)
+            self.render_count = -1
+            # Wait a few milliseconds for emitted console messages to be handled before closing
+            # browser. May need to increase this if Playwright complains that browser is closed.
+            self.page.wait_for_timeout(10)
+
         self._browser.close()
         self._server.stop()
         self._playwright_context_manager.__exit__(None, None, None)
diff --git a/benchmarks/benchmarks/timeseries.py b/benchmarks/benchmarks/timeseries.py
index 3677e7c..2012c64 100644
--- a/benchmarks/benchmarks/timeseries.py
+++ b/benchmarks/benchmarks/timeseries.py
@@ -42,10 +42,10 @@ class Timeseries(Base):
 
     def setup(self, n: int, output_backend: str) -> None:
         bkapp_n = partial(bkapp, n=n, output_backend=output_backend)
-        self._playwright_setup(bkapp_n, catch_console=True)
+        self.playwright_setup(bkapp_n)
 
     def teardown(self, n: int, output_backend: str) -> None:
-        self._playwright_teardown()
+        self.playwright_teardown()
 
     def time_values(self, n: int, output_backend: str) -> None:
         button = self.page.get_by_role("button", name="run")