From 70c8c7cbf8959af7f7990a4df45be9cc35da4201 Mon Sep 17 00:00:00 2001
From: Differential Privacy Team <noreply@google.com>
Date: Wed, 24 Apr 2024 08:00:56 -0700
Subject: [PATCH] DP Auditorium divergence testers update

Privacy on Beam:
* Bump golang.org/x/net from 0.22.0 to 0.23.0

DP Auditorium:
* Unify RenyiPropertyTester under new divergence tester class
* Add interface for divergence based testers
* Add example of testing PipelineDP mean mechanism in IPython
* Upgrade Histogram tester
* Update dependencies

DP Accounting:
* Increment patch version of DP accounting library for PyPi release

Change-Id: I3a513cf7d7c7e144b11c778f792f723dff53132f
GitOrigin-RevId: 1833e65df37c76d756d3a62ed6ba99112c0a5dd0
---
 privacy-on-beam/go.mod                        |   2 +-
 privacy-on-beam/go.sum                        |   4 +-
 privacy-on-beam/privacy_on_beam_deps.bzl      |   4 +-
 python/dp_accounting/VERSION                  |   2 +-
 .../configs/property_tester_config.py         |   9 +
 .../pipelinedp_mean_mechanism_example.ipynb   | 235 +++++++++++++++
 .../examples/run_mean_mechanism_example.ipynb |   8 +-
 .../mechanisms/pipeline_dp/aggregation.py     |  86 ++++++
 .../pipeline_dp/aggregation_test.py           |  89 ++++++
 .../dp_auditorium/testers/BUILD.bazel         |  28 +-
 .../dp_auditorium/testers/__init__.py         |   1 -
 .../testers/divergence_tester.py              | 123 ++++++++
 .../testers/divergence_tester_test.py         | 122 ++++++++
 .../dp_auditorium/testers/histogram_tester.py |  36 ++-
 .../testers/histogram_tester_test.py          |  33 ++-
 .../testers/hockey_stick_tester.py            | 268 +++++-------------
 .../testers/hockey_stick_tester_test.py       | 101 +++----
 .../testers/property_tester_utils.py          |   2 +-
 .../testers/property_tester_utils_test.py     |  16 +-
 .../dp_auditorium/testers/renyi_tester.py     | 251 ++++------------
 .../testers/renyi_tester_test.py              |  73 +++--
 python/dp_auditorium/requirements.in          |   2 +
 python/dp_auditorium/requirements.txt         |   6 +-
 23 files changed, 997 insertions(+), 504 deletions(-)
 create mode 100644 python/dp_auditorium/dp_auditorium/examples/pipelinedp_mean_mechanism_example.ipynb
 create mode 100644 python/dp_auditorium/dp_auditorium/mechanisms/pipeline_dp/aggregation.py
 create mode 100644 python/dp_auditorium/dp_auditorium/mechanisms/pipeline_dp/aggregation_test.py
 create mode 100644 python/dp_auditorium/dp_auditorium/testers/divergence_tester.py
 create mode 100644 python/dp_auditorium/dp_auditorium/testers/divergence_tester_test.py

diff --git a/privacy-on-beam/go.mod b/privacy-on-beam/go.mod
index f011fd9e..68342a4d 100644
--- a/privacy-on-beam/go.mod
+++ b/privacy-on-beam/go.mod
@@ -58,7 +58,7 @@ require (
 	golang.org/x/exp v0.0.0-20240325151524-a685a6edb6d8 // indirect
 	golang.org/x/image v0.15.0 // indirect
 	golang.org/x/mod v0.16.0 // indirect
-	golang.org/x/net v0.22.0 // indirect
+	golang.org/x/net v0.23.0 // indirect
 	golang.org/x/oauth2 v0.18.0 // indirect
 	golang.org/x/sync v0.6.0 // indirect
 	golang.org/x/sys v0.18.0 // indirect
diff --git a/privacy-on-beam/go.sum b/privacy-on-beam/go.sum
index 65ec79ed..cb896870 100644
--- a/privacy-on-beam/go.sum
+++ b/privacy-on-beam/go.sum
@@ -223,8 +223,8 @@ golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwY
 golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
 golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
 golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
-golang.org/x/net v0.22.0 h1:9sGLhx7iRIHEiX0oAJ3MRZMUCElJgy7Br1nO+AMN3Tc=
-golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
+golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs=
+golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.18.0 h1:09qnuIAgzdx1XplqJvW6CQqMCtGZykZWcXzPMPUusvI=
 golang.org/x/oauth2 v0.18.0/go.mod h1:Wf7knwG0MPoWIMMBgFlEaSUDaKskp0dCfrlJRJXbBi8=
diff --git a/privacy-on-beam/privacy_on_beam_deps.bzl b/privacy-on-beam/privacy_on_beam_deps.bzl
index 8a125cca..64592fdb 100644
--- a/privacy-on-beam/privacy_on_beam_deps.bzl
+++ b/privacy-on-beam/privacy_on_beam_deps.bzl
@@ -2001,8 +2001,8 @@ def privacy_on_beam_deps():
     go_repository(
         name = "org_golang_x_net",
         importpath = "golang.org/x/net",
-        sum = "h1:9sGLhx7iRIHEiX0oAJ3MRZMUCElJgy7Br1nO+AMN3Tc=",
-        version = "v0.22.0",
+        sum = "h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs=",
+        version = "v0.23.0",
     )
     go_repository(
         name = "org_golang_x_oauth2",
diff --git a/python/dp_accounting/VERSION b/python/dp_accounting/VERSION
index 9673a438..de476221 100644
--- a/python/dp_accounting/VERSION
+++ b/python/dp_accounting/VERSION
@@ -1,2 +1,2 @@
 """ Version of the current release of DP Accounting """
-0.4.3
+0.4.4
diff --git a/python/dp_auditorium/dp_auditorium/configs/property_tester_config.py b/python/dp_auditorium/dp_auditorium/configs/property_tester_config.py
index d2d9bbb5..143e4d7c 100644
--- a/python/dp_auditorium/dp_auditorium/configs/property_tester_config.py
+++ b/python/dp_auditorium/dp_auditorium/configs/property_tester_config.py
@@ -66,10 +66,14 @@ class HockeyStickPropertyTesterConfig:
   Attributes:
     training_config: Required training parameters.
     approximate_dp: Approximate DP privacy parameters to be tested.
+    evaluation_batch_size: Batch size for computing accuracy of classifier
+      distinguishing two distributions for Hockey Stick divergence. See
+      `HockeyStickPropertyTester` class for details.
   """
 
   training_config: TrainingConfig
   approximate_dp: privacy_property.ApproximateDp
+  evaluation_batch_size: int = 1000
 
 
 @dataclasses.dataclass
@@ -83,6 +87,10 @@ class HistogramPropertyTesterConfig:
     min_value: Lower end value for the histogram.
     max_value: Upper end value for the histogram.
     approximate_dp: Approximate DP privacy parameters to be tested.
+    use_original_tester: Whether to use the original version of the tester due
+      to Gilbert and McMillan (2018), or a new version developed for
+      DP-Auditorium. The new version generally improves over the original
+      verison, but the original version is retained for comparison purposes.
   """
 
   test_discrete_mechanism: bool
@@ -90,6 +98,7 @@ class HistogramPropertyTesterConfig:
   min_value: float
   max_value: float
   approximate_dp: privacy_property.ApproximateDp
+  use_original_tester: bool = False
 
 
 class Kernel(enum.Enum):
diff --git a/python/dp_auditorium/dp_auditorium/examples/pipelinedp_mean_mechanism_example.ipynb b/python/dp_auditorium/dp_auditorium/examples/pipelinedp_mean_mechanism_example.ipynb
new file mode 100644
index 00000000..df140e51
--- /dev/null
+++ b/python/dp_auditorium/dp_auditorium/examples/pipelinedp_mean_mechanism_example.ipynb
@@ -0,0 +1,235 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "I0Z7vNS_ybbU"
+      },
+      "source": [
+        "This colab notebook uses DP-auditorium to test differentially private mechanisms computing aggregate statistics using PipelineDP."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "wUtLsXpF9q4D"
+      },
+      "source": [
+        "\u003ctable class=\"tfo-notebook-buttons\" align=\"left\"\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca target=\"_blank\" href=\"https://colab.research.google.com/github/google/differential-privacy/blob/main/python/dp_auditorium/dp_auditorium/examples/pipelinedp_mean_mechanism_example.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca target=\"_blank\" href=\"https://github.com/google/differential-privacy/blob/main/python/dp_auditorium/dp_auditorium/examples/pipelinedp_mean_mechanism_example.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView source on GitHub\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "\u003c/table\u003e\n",
+        "\n",
+        "\u003cbr\u003e\n",
+        "\u003cbr\u003e"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "WPLSKwjEHfXI"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Install and import dp_auditorium and all necessary libraries.\n",
+        "!pip install google-vizier equinox pipeline_dp\n",
+        "!git clone https://github.com/google/differential-privacy.git\n",
+        "import sys\n",
+        "sys.path.append('differential-privacy/python/dp_auditorium')\n",
+        "\n",
+        "from dp_auditorium import privacy_test_runner\n",
+        "from dp_auditorium.generators import pipeline_dp_vizier_dataset_generator\n",
+        "from dp_auditorium.configs import dataset_generator_config\n",
+        "from dp_auditorium.configs import privacy_property\n",
+        "from dp_auditorium.configs import privacy_test_runner_config\n",
+        "from dp_auditorium.configs import property_tester_config\n",
+        "from dp_auditorium.mechanisms.pipeline_dp import aggregation as pipeline_dp_mechanism\n",
+        "from dp_auditorium.testers import hockey_stick_tester\n",
+        "\n",
+        "import pipeline_dp\n",
+        "import tensorflow as tf\n",
+        "tf.compat.v1.enable_eager_execution()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "dr5A5W7Aq2SO"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Example of testing PipelineDP mean mechanism\n",
+        "import time\n",
+        "\n",
+        "def pipeline_dp_mean_mechanism_report(\n",
+        "    epsilon: float,\n",
+        "    delta: float,\n",
+        "    seed: int,\n",
+        "    max_number_partitions: int = 10,\n",
+        ") -\u003e privacy_test_runner_config.PrivacyTestRunnerResults:\n",
+        "  \"\"\"Runs the example code for a mean mechanism.\n",
+        "\n",
+        "  Args:\n",
+        "    epsilon: standard approximate DP parameter.\n",
+        "    delta: standard approximate DP parameter.\n",
+        "    seed: seed to initialize the random number generator.\n",
+        "    max_number_partitions: maximum number of partitions which can be used by\n",
+        "      dataset generator.\n",
+        "\n",
+        "  Returns:\n",
+        "    The result of the example code as PrivacyTestRunnerResults.\n",
+        "  \"\"\"\n",
+        "  tf.random.set_seed(seed)\n",
+        "\n",
+        "  # Specify a config for computing with PipeineDP Mean aggregation, namely\n",
+        "  # computing mean aggregation per partition, i.e. in SQL terms DP version of\n",
+        "  #   SELECT partition_key, mean(value)\n",
+        "  #   GROUP BY partition_key\n",
+        "  # is computed.\n",
+        "  # See https://pipelinedp.io/key-definitions/ on more details of PipelineDP terminology.\n",
+        "  mech_config = pipeline_dp.AggregateParams(\n",
+        "      metrics=[pipeline_dp.Metrics.MEAN],\n",
+        "      # Laplace noise is used for ensuring DP\n",
+        "      noise_kind=pipeline_dp.NoiseKind.LAPLACE,\n",
+        "      # Set contribution bounds:\n",
+        "\n",
+        "      # 1. If some privacy unit contributes more than to 1 partition then\n",
+        "      # PipelineDP will choose randomly 1 partition, contributions to others\n",
+        "      # will be dropped.\n",
+        "      max_partitions_contributed=1,\n",
+        "      # 2. If some privacy unit contributes to more than 1 time to some\n",
+        "      # partition then PipelineDP will choose randomly 1 contribution, others\n",
+        "      # contribution will be dropped\n",
+        "      max_contributions_per_partition=1,\n",
+        "\n",
+        "      # 3. Each contributions will be clipped to [-1, 1].\n",
+        "      min_value=-1.0,\n",
+        "      max_value=1.0)\n",
+        "\n",
+        "  # Initialize the mechanism.\n",
+        "  public_partitions = list(range(max_number_partitions))\n",
+        "  mechanism = pipeline_dp_mechanism.AggregationMechanism(mech_config,\n",
+        "                                                         privacy_property.ApproximateDp(\n",
+        "                                                             epsilon=epsilon,\n",
+        "                                                             delta=delta,\n",
+        "                                                         ), public_partitions)\n",
+        "\n",
+        "  # Configuration for a Hockey-Stick property tester. Given arrays s1 and s2\n",
+        "  # with samples from two distributions it will estimate the hockey-stick\n",
+        "  # divergence between the underlying distributions. It checks if the estimated\n",
+        "  # divergence is bounded by delta.\n",
+        "  tester_config = property_tester_config.HockeyStickPropertyTesterConfig(\n",
+        "      training_config=hockey_stick_tester.make_default_hs_training_config(),\n",
+        "      approximate_dp=privacy_property.ApproximateDp(\n",
+        "          epsilon=epsilon,\n",
+        "          delta=delta,\n",
+        "      ),\n",
+        "  )\n",
+        "\n",
+        "  # Initialize a classifier model for the Hockey-Stick property tester.\n",
+        "  # This classifier will learn to distinguish between samples of the mechanism\n",
+        "  # on adjacent datasets. Its accuracy level should be controlled by the privacy\n",
+        "  # guarantee.\n",
+        "  base_model = hockey_stick_tester.make_default_hs_base_model()\n",
+        "  # Initialize a property tester.\n",
+        "  property_tester = hockey_stick_tester.HockeyStickPropertyTester(\n",
+        "      config=tester_config,\n",
+        "      base_model=base_model,\n",
+        "  )\n",
+        "\n",
+        "  # Configuration for dataset generator. It generates neighboring datasets under\n",
+        "  # the add/remove definition. Unique study name prevents using cached results\n",
+        "  # from previous runs.\n",
+        "  generator_config = dataset_generator_config.VizierDatasetGeneratorConfig(\n",
+        "      study_name=str(time.time()),\n",
+        "      study_owner=\"owner\",\n",
+        "      num_vizier_parameters=2,\n",
+        "      data_type=dataset_generator_config.DataType.DATA_TYPE_FLOAT,\n",
+        "      min_value=-1.0,\n",
+        "      max_value=1.0,\n",
+        "      search_algorithm=\"RANDOM_SEARCH\",\n",
+        "      metric_name=\"hockey_stick_divergence\",\n",
+        "  )\n",
+        "\n",
+        "  # Dataset generator will generate datasets of not more than\n",
+        "  # max_number_partitions partitions and not more than 10 privacy units.\n",
+        "  # The same partitions are used as public_partitions and as partitions in\n",
+        "  # dataset. So the mechanism will not drop the partitions. We do not check\n",
+        "  # partition selection. We focus only on checking noise.\n",
+        "  pipeline_dp_generator_config = pipeline_dp_vizier_dataset_generator.PipelineDpDatasetGeneratorConfig(\n",
+        "    max_num_privacy_ids=10, max_num_partitions=max_number_partitions)\n",
+        "\n",
+        "  # Initialize the dataset generator.\n",
+        "  dataset_generator = pipeline_dp_vizier_dataset_generator.PipelineDpDatasetGenerator(\n",
+        "    generator_config, pipeline_dp_generator_config)\n",
+        "\n",
+        "  # Configuration for the test runner.\n",
+        "  # The test runner coordinates how the test is evaluated. It receives a\n",
+        "  # dataset generator, a property tester and a configuration (see base class for\n",
+        "  # details on these parameters), and runs privacy tests using the property\n",
+        "  # tester on datasets generated by the dataset generator.\n",
+        "  test_runner_config = privacy_test_runner_config.PrivacyTestRunnerConfig(\n",
+        "      property_tester=privacy_test_runner_config.PropertyTester.HOCKEY_STICK_TESTER,\n",
+        "      max_num_trials=10,\n",
+        "      failure_probability=0.05,\n",
+        "      num_samples=10_000,\n",
+        "      # Apply a hyperbolic tangent function to the output of the mechanism\n",
+        "      post_processing=privacy_test_runner_config.PostProcessing.TANH,\n",
+        "  )\n",
+        "  # Initialize the test runner.\n",
+        "  test_runner = privacy_test_runner.PrivacyTestRunner(\n",
+        "      config=test_runner_config,\n",
+        "      dataset_generator=dataset_generator,\n",
+        "      property_tester=property_tester,\n",
+        "  )\n",
+        "\n",
+        "  return test_runner.test_privacy(mechanism, \"pipeline_dp-mean-mechanism\")\n",
+        "\n",
+        "\n",
+        "EPSILON = 1.0\n",
+        "DELTA = 1e-5\n",
+        "SEED = 1\n",
+        "\n",
+        "# The results indicate whether a privacy violation was identified within the\n",
+        "# designated number of trials defined in the configuration. In the absence of a\n",
+        "# violation, a message is returned indicating that the limit of the number of\n",
+        "# trials has been reached. For reference, all computed divergences across all\n",
+        "# trials are also reported.\n",
+        "results = pipeline_dp_mean_mechanism_report(EPSILON, DELTA, SEED)\n",
+        "print(f\" \\nResults: \\n{results}\")\n",
+        "if results.found_privacy_violation is not None:\n",
+        "  print(\"Privacy violations found!\")\n"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "private_outputs": true,
+      "provenance": [
+        {
+          "file_id": "1QyFD_doucyHewiRMtxGvFxNrFlgbCqQa",
+          "timestamp": 1708693099970
+        },
+        {
+          "file_id": "1pBgTlH19OwJ3diUYf3m3QaZcVNQGeB8B",
+          "timestamp": 1708692052606
+        }
+      ]
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/python/dp_auditorium/dp_auditorium/examples/run_mean_mechanism_example.ipynb b/python/dp_auditorium/dp_auditorium/examples/run_mean_mechanism_example.ipynb
index 7e6ad5ce..27d74260 100644
--- a/python/dp_auditorium/dp_auditorium/examples/run_mean_mechanism_example.ipynb
+++ b/python/dp_auditorium/dp_auditorium/examples/run_mean_mechanism_example.ipynb
@@ -81,8 +81,8 @@
         "  \"\"\"Runs the example code for a mean mechanism.\n",
         "\n",
         "  Args:\n",
-        "    epsilon: standard DP parmaeter.\n",
-        "    delta: standard DP parameter.\n",
+        "    epsilon: standard approximate DP parmaeter.\n",
+        "    delta: standard approximate DP parameter.\n",
         "    seed: seed to initialize the random number generator.\n",
         "    generator_factory: factory to create a generator; to be replaced in tests\n",
         "\n",
@@ -182,10 +182,6 @@
   ],
   "metadata": {
     "colab": {
-      "last_runtime": {
-        "build_target": "//learning/vizier/service/colab:notebook",
-        "kind": "private"
-      },
       "private_outputs": true,
       "provenance": [
         {
diff --git a/python/dp_auditorium/dp_auditorium/mechanisms/pipeline_dp/aggregation.py b/python/dp_auditorium/dp_auditorium/mechanisms/pipeline_dp/aggregation.py
new file mode 100644
index 00000000..834d5164
--- /dev/null
+++ b/python/dp_auditorium/dp_auditorium/mechanisms/pipeline_dp/aggregation.py
@@ -0,0 +1,86 @@
+# Copyright 2024 Google LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Pipeline-DP library mechanisms."""
+
+
+import itertools
+
+import numpy as np
+import pipeline_dp
+
+from dp_auditorium import interfaces
+from dp_auditorium.configs import privacy_property
+
+
+class AggregationMechanism(interfaces.Mechanism):
+  """Pipeline DP mechanism wrapper for privacy auditing."""
+
+  def __init__(
+      self,
+      config: pipeline_dp.AggregateParams,
+      tested_privacy_property: privacy_property.ApproximateDp,
+      public_partitions: list[int] | None,
+  ):
+    self._epsilon = tested_privacy_property.epsilon
+    self._delta = tested_privacy_property.delta
+    self._config = config
+    self._public_partitions = public_partitions
+
+  def _compute_aggregations(self, data: list[float]) -> list[float]:
+    """Returns one sample of a DP aggregation using the pipeline_dp library.
+
+    Args:
+      data: One dimensional array with scalar corresponding to different
+        records.
+    """
+    budget_accountant = pipeline_dp.NaiveBudgetAccountant(
+        self._epsilon, self._delta
+    )
+    dp_engine = pipeline_dp.DPEngine(
+        budget_accountant, pipeline_dp.LocalBackend()
+    )
+
+    data_extractors = pipeline_dp.DataExtractors(
+        partition_extractor=lambda x: x[0],
+        privacy_id_extractor=lambda x: x[1],
+        value_extractor=lambda x: x[2],
+    )
+
+    result = dp_engine.aggregate(
+        data,
+        self._config,
+        data_extractors,
+        public_partitions=self._public_partitions,
+    )
+    budget_accountant.compute_budgets()
+
+    # result is an iterator where each item is a tuple
+    # `(`partition_id`, MetricsTuple)`. We drop partition_id and concatenate all
+    # metrics' values.
+    values = [row[1] for row in result]
+
+    # The output of this wrapper is designed for `interfaces.PropertyTester`
+    # which receives arrays of samples where each sample is a one-dimensional
+    # array. The specific metric defining each entry does not affect the privacy
+    # test result, so for each sample we flatten all metrics across distinct
+    # partitions.
+    return [x for x in itertools.chain(*values)]
+
+  def __call__(self, data: np.ndarray, num_samples: int) -> np.ndarray:
+    """Returns an array of samples of a DP aggregation using pipeline_dp."""
+    result = []
+    data = list(data)  # PipelineDP works now for list only.
+    for _ in range(num_samples):
+      result.append(self._compute_aggregations(data))
+    return np.array(result)
diff --git a/python/dp_auditorium/dp_auditorium/mechanisms/pipeline_dp/aggregation_test.py b/python/dp_auditorium/dp_auditorium/mechanisms/pipeline_dp/aggregation_test.py
new file mode 100644
index 00000000..3303cf91
--- /dev/null
+++ b/python/dp_auditorium/dp_auditorium/mechanisms/pipeline_dp/aggregation_test.py
@@ -0,0 +1,89 @@
+#
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for the mean mechanism with Pipeline DP."""
+from absl.testing import absltest
+from absl.testing import parameterized
+import numpy as np
+import pipeline_dp
+
+from dp_auditorium.configs import privacy_property
+from dp_auditorium.mechanisms.pipeline_dp import aggregation
+
+
+class AggregationMechanismTest(parameterized.TestCase):
+
+  @parameterized.product(
+      metrics=[
+          [pipeline_dp.Metrics.MEAN],
+          [pipeline_dp.Metrics.PERCENTILE(0.1)],
+          [pipeline_dp.Metrics.MEAN, pipeline_dp.Metrics.SUM],
+      ],
+      num_samples=[1, 2, 3],
+      public_partitions=[[1], [1, 2, 3]],
+      delta=[0.0, 0.5],
+  )
+  def test_pipeline_dp_mechanism(
+      self, metrics, num_samples, public_partitions, delta
+  ):
+    """Tests that the output of the mechanism has the expected shape.
+
+    Correctness of the implementation and returned values will be verified using
+    DP-Auditorium testers. Here we only verify the mechanism wrapper works as
+    expected.
+
+    Args:
+      metrics: aggregations to be tested.
+      num_samples: Number of samples to draw from the mechanism.
+      public_partitions: List with ids of public partitions.
+      delta: Privacy parameter.
+    """
+    # Stub data to test the mechanism. The first column represents a partition
+    # id, the second column represents the user id, and the third the
+    # corresponding value.
+    data = np.array([
+        [1, 1, 1.0],
+        [1, 1, 1.5],
+        [2, 1, 3.1],
+        [1, 2, 1.0],
+        [2, 2, 1.0],
+        [1, 3, 1.7],
+        [3, 3, 2.0],
+    ])
+    epsilon = 10000
+    tested_privacy_property = privacy_property.ApproximateDp(
+        epsilon=epsilon, delta=delta
+    )
+    config = pipeline_dp.AggregateParams(
+        metrics=metrics,
+        min_value=0.01,
+        max_value=1.0,
+        max_partitions_contributed=2,
+        max_contributions_per_partition=1,
+        contribution_bounds_already_enforced=False,
+    )
+    aggregation_mechanism = aggregation.AggregationMechanism(
+        config=config,
+        tested_privacy_property=tested_privacy_property,
+        public_partitions=public_partitions,
+    )
+
+    result = aggregation_mechanism(data, num_samples=num_samples)
+    self.assertEqual(
+        result.shape, (num_samples, len(public_partitions)* len(metrics))
+    )
+
+
+if __name__ == "__main__":
+  absltest.main()
diff --git a/python/dp_auditorium/dp_auditorium/testers/BUILD.bazel b/python/dp_auditorium/dp_auditorium/testers/BUILD.bazel
index 78b6ebca..4e4da9a9 100644
--- a/python/dp_auditorium/dp_auditorium/testers/BUILD.bazel
+++ b/python/dp_auditorium/dp_auditorium/testers/BUILD.bazel
@@ -35,8 +35,8 @@ py_library(
     name = "hockey_stick_tester",
     srcs = ["hockey_stick_tester.py"],
     deps = [
+        ":divergence_tester",
         ":property_tester_utils",
-        "//dp_auditorium:interfaces",
         "//dp_auditorium/configs",
         requirement("absl-py"),
         requirement("numpy"),
@@ -83,8 +83,8 @@ py_library(
     name = "renyi_tester",
     srcs = ["renyi_tester.py"],
     deps = [
+        ":divergence_tester",
         ":property_tester_utils",
-        "//dp_auditorium:interfaces",
         "//dp_auditorium/configs",
         requirement("absl-py"),
         requirement("numpy"),
@@ -128,6 +128,30 @@ py_test(
     ],
 )
 
+py_library(
+    name = "divergence_tester",
+    srcs = ["divergence_tester.py"],
+    deps = [
+        ":property_tester_utils",
+        "//dp_auditorium:interfaces",
+        requirement("numpy"),
+        requirement("tensorflow"),
+        requirement("typing_extensions"),
+    ],
+)
+
+py_test(
+    name = "divergence_tester_test",
+    srcs = ["divergence_tester_test.py"],
+    deps = [
+        ":divergence_tester",
+        "//dp_auditorium/configs",
+        requirement("absl-py"),
+        requirement("numpy"),
+        requirement("tensorflow"),
+    ],
+)
+
 py_library(
     name = "property_tester_utils",
     srcs = ["property_tester_utils.py"],
diff --git a/python/dp_auditorium/dp_auditorium/testers/__init__.py b/python/dp_auditorium/dp_auditorium/testers/__init__.py
index e94f69e6..88b2d1b6 100644
--- a/python/dp_auditorium/dp_auditorium/testers/__init__.py
+++ b/python/dp_auditorium/dp_auditorium/testers/__init__.py
@@ -15,7 +15,6 @@
 """Differential Privacy Property Testers."""
 
 from dp_auditorium.testers.histogram_tester import HistogramTester
-from dp_auditorium.testers.hockey_stick_tester import HockeyStickDivergenceTrainingOptions
 from dp_auditorium.testers.hockey_stick_tester import HockeyStickPropertyTester
 from dp_auditorium.testers.mmd_tester import MMDPropertyTester
 from dp_auditorium.testers.renyi_tester import RenyiModel
diff --git a/python/dp_auditorium/dp_auditorium/testers/divergence_tester.py b/python/dp_auditorium/dp_auditorium/testers/divergence_tester.py
new file mode 100644
index 00000000..c82a3b6f
--- /dev/null
+++ b/python/dp_auditorium/dp_auditorium/testers/divergence_tester.py
@@ -0,0 +1,123 @@
+# Copyright 2024 Google LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Testers type aliases used throughout the dp-auditorium library."""
+
+import abc
+from typing import Any, final
+
+import numpy as np
+import tensorflow as tf
+from typing_extensions import override
+
+from dp_auditorium import interfaces
+from dp_auditorium.testers import property_tester_utils
+
+
+class DivergencePropertyTester(interfaces.PropertyTester, abc.ABC):
+  """PropertyTester that estimate divergences optimizing a parametrized model."""
+
+  def __init__(self, config: Any, base_model: tf.keras.Model):
+    """Initializes the instance.
+
+    Args:
+      config: Configuration for initializing property tester.
+      base_model: A Keras model that discriminates between samples generated by
+        a mechanism run on two different datasets.
+    """
+
+  @property
+  @abc.abstractmethod
+  def _test_threshold(self) -> float:
+    """Threshold above which a divergence estimator will fail the privacy test."""
+
+  @abc.abstractmethod
+  def _get_optimized_divergence_estimation_model(
+      self,
+      samples_first_distribution: np.ndarray,
+      samples_second_distribution: np.ndarray,
+  ) -> tf.keras.Model:
+    """Fits model weights that maximize a given divergence on provided samples.
+
+    This method optimizes parameters of a base model using samples from two
+    distributions, maximizing a lower bound on the divergence between those
+    distributions.
+
+    Args:
+      samples_first_distribution: Array with training samples from first
+        distribution.
+      samples_second_distribution: Arrays with training samples from second
+        distribution.
+
+    Returns:
+      tf.keras.Model fitted on input samples to maximize a given divergence
+      estimator.
+    """
+
+  @abc.abstractmethod
+  def _compute_divergence_on_samples(
+      self,
+      model: tf.keras.Model,
+      samples_first_distribution: np.ndarray,
+      samples_second_distribution: np.ndarray,
+      failure_probability: float,
+  ) -> float:
+    """Estimate lower bound divergence on given samples.
+
+    Args:
+      model: Model used to estimate the divergence on test samples.
+      samples_first_distribution: Array with samples from first distribution.
+      samples_second_distribution: Arrays with samples from second distribution.
+      failure_probability: Probability of test failure.
+
+    Returns:
+      Estimated divergence.
+    """
+
+  @override
+  @final
+  def estimate_lower_bound(
+      self,
+      samples_first_distribution: np.ndarray,
+      samples_second_distribution: np.ndarray,
+      failure_probability: float,
+  ) -> float:
+    samples1_train, samples1_test = (
+        property_tester_utils.split_train_test_samples(
+            samples_first_distribution
+        )
+    )
+    samples2_train, samples2_test = (
+        property_tester_utils.split_train_test_samples(
+            samples_second_distribution
+        )
+    )
+
+    model = self._get_optimized_divergence_estimation_model(
+        samples1_train,
+        samples2_train,
+    )
+
+    divergence_test = self._compute_divergence_on_samples(
+        model,
+        samples1_test,
+        samples2_test,
+        failure_probability,
+    )
+
+    return divergence_test
+
+  @override
+  @final
+  def reject_property(self, lower_bound: float) -> bool:
+    return lower_bound > self._test_threshold
diff --git a/python/dp_auditorium/dp_auditorium/testers/divergence_tester_test.py b/python/dp_auditorium/dp_auditorium/testers/divergence_tester_test.py
new file mode 100644
index 00000000..3dd8e4e4
--- /dev/null
+++ b/python/dp_auditorium/dp_auditorium/testers/divergence_tester_test.py
@@ -0,0 +1,122 @@
+# Copyright 2024 Google LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for DP-Auditorium interfaces."""
+
+import dataclasses
+
+from absl.testing import absltest
+from absl.testing import parameterized
+import numpy as np
+import tensorflow as tf
+
+from dp_auditorium.configs import privacy_property
+from dp_auditorium.testers import divergence_tester
+
+
+@dataclasses.dataclass
+class StubDivergencePropertyTesterConfig:
+  """Configuration for stub divergence property tester."""
+
+  estimated_divergence: float
+  test_threshold: float
+
+
+class StubDivergencePropertyTester(divergence_tester.DivergencePropertyTester):
+
+  def __init__(
+      self,
+      config: StubDivergencePropertyTesterConfig,
+      base_model: tf.keras.Model,
+  ):
+    self._config_test_threshold = config.test_threshold
+    self._estimated_divergence = config.estimated_divergence
+    self._base_model = base_model
+
+  @property
+  def _test_threshold(self) -> float:
+    return self._config_test_threshold
+
+  @property
+  def privacy_property(self) -> privacy_property.PrivacyProperty:
+    return privacy_property.PureDp(epsilon=0.1)
+
+  def _get_optimized_divergence_estimation_model(
+      self,
+      samples_first_distribution: np.ndarray,
+      samples_second_distribution: np.ndarray,
+  ) -> tf.keras.Model:
+    return self._base_model
+
+  def _compute_divergence_on_samples(
+      self,
+      model: tf.keras.Model,
+      samples_first_distribution: np.ndarray,
+      samples_second_distribution: np.ndarray,
+      failure_probability: float,
+  ) -> float:
+    del (
+        model,
+        samples_first_distribution,
+        samples_second_distribution,
+        failure_probability,
+    )
+    return self._estimated_divergence
+
+
+class DivergencePropertyTesterTest(parameterized.TestCase):
+
+  def setUp(self):
+    super().setUp()
+    self.base_model = tf.keras.Sequential([
+        tf.keras.Input(shape=(3,)),
+        tf.keras.layers.Dense(1, activation="relu"),
+    ])
+
+  @parameterized.parameters(0.3, 1.1, 2.5)
+  def test_divergence_property_estimates_lower_bound_returns_expected_divergence(
+      self, divergence
+  ):
+    samples1 = np.ones((100, 1))
+    samples2 = np.ones((100, 1))
+
+    config = StubDivergencePropertyTesterConfig(
+        estimated_divergence=divergence, test_threshold=0.1
+    )
+    divergence_property_tester = StubDivergencePropertyTester(
+        config, self.base_model
+    )
+
+    estimated_divergence = divergence_property_tester.estimate_lower_bound(
+        samples1, samples2, failure_probability=0.1
+    )
+    self.assertAlmostEqual(estimated_divergence, divergence)
+
+  @parameterized.product(lower_bound=[0.1, 0.5], threshold=[0.0, 0.5, 1.0])
+  def test_divergence_property_tester_rejects_property(
+      self, lower_bound, threshold
+  ):
+    # In this test the estimated divergence parameter will be unused and we can
+    # set a dummy value.
+    config = StubDivergencePropertyTesterConfig(
+        estimated_divergence=0.314, test_threshold=threshold
+    )
+    divergence_property_tester = StubDivergencePropertyTester(
+        config, self.base_model
+    )
+    result = divergence_property_tester.reject_property(lower_bound)
+    self.assertEqual(lower_bound > threshold, result)
+
+
+if __name__ == "__main__":
+  absltest.main()
diff --git a/python/dp_auditorium/dp_auditorium/testers/histogram_tester.py b/python/dp_auditorium/dp_auditorium/testers/histogram_tester.py
index c1691d61..83b4fc00 100644
--- a/python/dp_auditorium/dp_auditorium/testers/histogram_tester.py
+++ b/python/dp_auditorium/dp_auditorium/testers/histogram_tester.py
@@ -139,6 +139,7 @@ def __init__(
       )
     self._epsilon = config.approximate_dp.epsilon
     self._delta = config.approximate_dp.delta
+    self._use_original_tester = config.use_original_tester
     self._histogram_size = config.histogram_size
     self._approximate_dp = config.approximate_dp
 
@@ -148,18 +149,33 @@ def privacy_property(self) -> privacy_property.PrivacyProperty:
     return privacy_property.PrivacyProperty(approximate_dp=self._approximate_dp)
 
   def _get_error_tolerance(
-      self, num_samples: float, failure_probability: float
+      self,
+      num_samples: float,
+      probabilities1: np.ndarray,
+      probabilities2: np.ndarray,
+      failure_probability: float
   ) -> float:
     """Gets error tolerance for Histogram property tester."""
-    term_1 = (
-        2
-        * (1 + np.exp(self._epsilon))
-        * np.sqrt(self._histogram_size / num_samples)
-    )
+    if self._use_original_tester:
+      term_1 = (
+          2.0
+          * (1.0 + np.exp(self._epsilon))
+          * np.sqrt(self._histogram_size / num_samples)
+      )
+    else:
+      term_1a = (
+          2.0 / np.sqrt(num_samples)
+          * sum(np.sqrt(probabilities1))
+      )
+      term_1b = (
+          2.0 * np.exp(self._epsilon) / np.sqrt(num_samples)
+          * sum(np.sqrt(probabilities2))
+      )
+      term_1 = term_1a + term_1b
     term_2 = (
-        6
-        * (1 + np.exp(self._epsilon))
-        * np.sqrt(np.log(4 / failure_probability) / (2 * num_samples))
+        6.0
+        * (1.0 + np.exp(self._epsilon))
+        * np.sqrt(np.log(4.0 / failure_probability) / (2.0 * num_samples))
     )
     return term_1 + term_2
 
@@ -177,7 +193,7 @@ def estimate_lower_bound(
     per_outcome_delta = probabilities1 - np.exp(self._epsilon) * probabilities2
     estimated_delta = np.sum(per_outcome_delta[per_outcome_delta > 0])
     error_tolerance = self._get_error_tolerance(
-        num_samples, failure_probability
+        num_samples, probabilities1, probabilities2, failure_probability
     )
     return estimated_delta - error_tolerance
 
diff --git a/python/dp_auditorium/dp_auditorium/testers/histogram_tester_test.py b/python/dp_auditorium/dp_auditorium/testers/histogram_tester_test.py
index 342823ae..4a223c1e 100644
--- a/python/dp_auditorium/dp_auditorium/testers/histogram_tester_test.py
+++ b/python/dp_auditorium/dp_auditorium/testers/histogram_tester_test.py
@@ -42,11 +42,28 @@ def test_get_error_tolerance(
         test_discrete_mechanism=False,
         histogram_size=histogram_size,
     )
-    tester = histogram_tester.HistogramTester(config)
-    result = tester._get_error_tolerance(num_samples, failure_probability)
-    self.assertAllClose(result, expected_error_tolerance, rtol=1e-2)
-
-  def test_estimate_lower_bound(self):
+    probabilities1 = np.random.dirichlet(alpha=np.ones(histogram_size))
+    probabilities2 = np.random.dirichlet(alpha=np.ones(histogram_size))
+    with self.subTest(use_original_tester=True):
+      config.use_original_tester = True
+      tester = histogram_tester.HistogramTester(config)
+      result = tester._get_error_tolerance(num_samples,
+                                           probabilities1,
+                                           probabilities2,
+                                           failure_probability)
+      self.assertAllClose(result, expected_error_tolerance, rtol=1e-2)
+    with self.subTest(use_original_tester=False):
+      config.use_original_tester = False
+      tester = histogram_tester.HistogramTester(config)
+      result = tester._get_error_tolerance(num_samples,
+                                           probabilities1,
+                                           probabilities2,
+                                           failure_probability)
+      # New tester always has smaller error tolerance than original tester.
+      self.assertLess(result, expected_error_tolerance)
+
+  @parameterized.parameters(True, False)
+  def test_estimate_lower_bound(self, use_original_tester):
     """Verifies estimate of delta lower bound.
 
     Verifies that correct delta is calculated for a mechanism that
@@ -56,6 +73,8 @@ def test_estimate_lower_bound(self):
     num_samples = 100
     samples1 = np.zeros(num_samples)
     samples2 = np.ones(num_samples)
+    probabilities1 = np.array([1, 0])
+    probabilities2 = np.array([0, 1])
 
     # Initialize tester.
     config = property_tester_config.HistogramPropertyTesterConfig(
@@ -65,12 +84,13 @@ def test_estimate_lower_bound(self):
         test_discrete_mechanism=False,
         histogram_size=2,
     )
+    config.use_original_tester = use_original_tester
     tester = histogram_tester.HistogramTester(config)
 
     # Estimate delta.
     failure_probability = 0.1
     expected_delta = 1.0 - tester._get_error_tolerance(
-        num_samples, failure_probability
+        num_samples, probabilities1, probabilities2, failure_probability
     )
     estimated_delta = tester.estimate_lower_bound(
         samples1, samples2, failure_probability
@@ -85,6 +105,7 @@ def test_reject_property(self):
         max_value=1,
         min_value=0,
         test_discrete_mechanism=False,
+        use_original_tester=True,
         histogram_size=2,
     )
     tester = histogram_tester.HistogramTester(config)
diff --git a/python/dp_auditorium/dp_auditorium/testers/hockey_stick_tester.py b/python/dp_auditorium/dp_auditorium/testers/hockey_stick_tester.py
index dfdbbf75..a8b6bbc7 100644
--- a/python/dp_auditorium/dp_auditorium/testers/hockey_stick_tester.py
+++ b/python/dp_auditorium/dp_auditorium/testers/hockey_stick_tester.py
@@ -18,39 +18,19 @@
 using its dual formulation as the weighted accuracy of a classifier.
 """
 
-import dataclasses
 from typing import Tuple
 
 from absl import logging
 import numpy as np
 import tensorflow as tf
+from typing_extensions import override
 
-from dp_auditorium import interfaces
 from dp_auditorium.configs import privacy_property
 from dp_auditorium.configs import property_tester_config
+from dp_auditorium.testers import divergence_tester
 from dp_auditorium.testers import property_tester_utils
 
 
-@dataclasses.dataclass(frozen=False)
-class HockeyStickDivergenceTrainingOptions:
-  """Training options for the HockeyStickDivergenceTester.
-
-  Attributes:
-    num_epochs: Number of epochs to run the training pipeline.
-    learning_rate: Learning rate for Adam optimizer.
-    batch_size: Batch size to use during training.
-    failure_probability: Probability of returning a false positive. That is, the
-      divergence suggests the mechanism is not private even though it is.
-    verbose: Integer to pass to keras to decide the verbosity of the training
-      process.
-  """
-
-  num_epochs: int
-  learning_rate: float
-  batch_size: int
-  verbose: int
-
-
 def make_default_hs_training_config() -> property_tester_config.TrainingConfig:
   return property_tester_config.TrainingConfig(
       training_epochs=2,
@@ -69,20 +49,9 @@ def make_default_hs_base_model() -> tf.keras.Model:
   ])
 
 
-def make_training_options_from_config(
-    training_config: property_tester_config.TrainingConfig,
-):
-  return HockeyStickDivergenceTrainingOptions(
-      num_epochs=training_config.training_epochs,
-      learning_rate=training_config.optimizer_learning_rate,
-      batch_size=training_config.batch_size,
-      verbose=training_config.verbose,
-  )
-
-
 # Helper functions and classes for the HockeyStickDivergenceTester
 def _get_accuracy_confidence_bound(
-    n_samples: int, confidence: float = 0.95
+    range_bound: float, n_samples: int, confidence: float = 0.95,
 ) -> float:
   r"""Returns a confidence bound on the estimate of P(h(X) = y).
 
@@ -90,6 +59,7 @@ def _get_accuracy_confidence_bound(
   \frac{1}{n} \sum_{i=1}^n {h(X_i) = Y_i}.
 
   Args:
+    range_bound: a bound on the length of the range on estimated values.
     n_samples: Number of samples used in the estimate.
     confidence: The level of confidence we want the estimate to have.
 
@@ -97,10 +67,10 @@ def _get_accuracy_confidence_bound(
     The one-sided confidence error around the estimate.
   """
   delta = 1.0 - confidence
-  return np.sqrt(np.log(1.0 / delta) / 2.0 / n_samples)
+  return range_bound * np.sqrt(np.log(1.0 / delta) / 2.0 / n_samples)
 
 
-class HockeyStickPropertyTester(interfaces.PropertyTester):
+class HockeyStickPropertyTester(divergence_tester.DivergencePropertyTester):
   r"""Uses a model to estimate divergence between the outputs of a mechanism.
 
   Specifically, given two neighboring datasets D_0, D_1 and epsilon. Generates
@@ -109,7 +79,11 @@ class HockeyStickPropertyTester(interfaces.PropertyTester):
   tries to distinguish between "positive" and "negative" examples. A mechanism
   is (epsilon,delta) DP if and only if the accuracy of a classifier in this
   dataset is less than (e^epsilon + delta) / (1 + e^epsilon). The hockey stick
-  divergence corresponds to \delta.
+  divergence corresponds to delta.
+
+  NOTE: This property tester overrides any user-specified value of
+  config.training_config.model_output_coordinate_bound with 1.0
+  for the sake of validity and efficiency.
 
   Attributes:
     _base_model: A keras model that discriminates between samples generated by a
@@ -117,9 +91,6 @@ class HockeyStickPropertyTester(interfaces.PropertyTester):
       class must return logits.
     _epsilon: The epsilon in the (epsilon, delta) guarantee the mechanism is
       supposed to satisfy.
-    _delta: The delta in the (epsilon,delta) guarantee the mechanism is supposed
-      to satisfy.
-    _has_called_fit: Boolean that verifies if model has been trained.
   """
 
   def __init__(
@@ -138,40 +109,35 @@ def __init__(
     property_tester_utils.validate_approximate_dp_property(
         config.approximate_dp
     )
+    # This constant defines the maximum output value of a `base_model` and is
+    # used to get confidence intervals for the lower bound of the divergence.
+    # We set it here to 1.0 given that the tester optimizes for a binary
+    # classification task.
+    logging.info(
+        "Overwriting `model_output_coordinate_bound`; the validity and efficacy"
+        " of the test is optimized for `model_output_coordinate_bound=1.0`"
+    )
+    config.training_config.model_output_coordinate_bound = 1.0
+    property_tester_utils.validate_training_config(config.training_config)
+    self._model_coordinate_bound = (
+        config.training_config.model_output_coordinate_bound
+    )
     self._base_model = base_model
     self._epsilon = config.approximate_dp.epsilon
     self._delta = config.approximate_dp.delta
     self._approximate_dp = config.approximate_dp
-    self._has_called_fit = False
-    self._training_options = make_training_options_from_config(
-        config.training_config
-    )
-    self.initialize(self._training_options)
+    self._training_options = config.training_config
+    self._evaluation_batch_size = config.evaluation_batch_size
+
+  @property
+  def _test_threshold(self) -> float:
+    return self._delta
 
   @property
   def privacy_property(self) -> privacy_property.PrivacyProperty:
     """The privacy guarantee that the tester is being used to test for."""
     return privacy_property.PrivacyProperty(approximate_dp=self._approximate_dp)
 
-  def initialize(self, training_options: HockeyStickDivergenceTrainingOptions):
-    """Compiles internal model.
-
-    Auxiliary function to use as a standalone tester while changing training
-    options of the tester across different runs.
-
-    Args:
-      training_options: Training options for keras optimization.
-    """
-
-    if self._has_called_fit:
-      self._base_model = tf.keras.models.clone_model(self._base_model)
-    self._base_model.compile(
-        optimizer=tf.keras.optimizers.Adam(training_options.learning_rate),
-        loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
-        metrics=tf.keras.metrics.BinaryAccuracy(threshold=0.0),
-    )
-    self._training_options = training_options
-
   def _generate_inputs_to_model(
       self,
       samples1: np.ndarray,
@@ -195,7 +161,7 @@ def _generate_inputs_to_model(
     Raises:
       ValueError if the ranks of sample1 and sample 2 are not equal.
     """
-    sample_cutoff_fraction = 1.0 / (np.exp(self._epsilon) + 1)
+    sample_cutoff_fraction = 1.0 / (np.exp(self._epsilon) + 1.0)
 
     if len(samples1.shape) != len(samples2.shape):
       raise ValueError(f"""Mechanism outputs on dataset 1 and dataset 2 should
@@ -215,146 +181,60 @@ def _generate_inputs_to_model(
     labels = np.concatenate([labels_1, labels_2], axis=0)
     return features, labels
 
-  def _fit(
+  @override
+  def _get_optimized_divergence_estimation_model(
       self,
-      samples1: np.ndarray,
-      samples2: np.ndarray,
-      batch_size: int,
-      epochs: int,
-      verbose: int,
+      samples_first_distribution: np.ndarray,
+      samples_second_distribution: np.ndarray,
   ):
-    """Fits the underlying model on the labeled output of a mechansim.
-
-    Args:
-      samples1: Samples from one distribution
-      samples2: Samples from the other distribution
-      batch_size: Batch size to use in the training process.
-      epochs: Number of epochs to train for.
-      verbose: Option passed to keras trainer.
-    """
-    self.initialize(self._training_options)
-    features, labels = self._generate_inputs_to_model(samples1, samples2)
-    self._base_model.fit(
+    model = tf.keras.models.clone_model(self._base_model)
+    model.compile(
+        optimizer=tf.keras.optimizers.Adam(
+            self._training_options.optimizer_learning_rate
+        ),
+        loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
+        metrics=tf.keras.metrics.BinaryAccuracy(threshold=0.0),
+    )
+    features, labels = self._generate_inputs_to_model(
+        samples_first_distribution,
+        samples_second_distribution,
+    )
+    model.fit(
         features,
         labels,
         shuffle=True,
-        epochs=epochs,
-        batch_size=batch_size,
-        verbose=verbose,
-    )
-    self._has_called_fit = True
-
-  def _get_accuracy_and_divergence_estimate(
-      self,
-      samples1,
-      samples2,
-      failure_probability: float,
-  ) -> Tuple[float, float]:
-    """Returns the accuracy of a trained classifier.
-
-    Args:
-      samples1: Samples from one distribution
-      samples2: Samples from the other distribution
-      failure_probability: Probability of having a false positive. I.e. the test
-        suggests that this is a privacy violation when in reality it is not.
-
-    Returns:
-      The accuracy of the classifier (adjusted with confidence) and the
-      associated hockey stick divergence (this corresponds to the delta of the
-      mechanism).
-
-    Raises:
-      AttributeError if called before calling fit().
-    """
-    if not self._has_called_fit:
-      raise AttributeError(
-          "Estimator should be trained with fit() before getting accuracy"
-      )
-    features, labels = self._generate_inputs_to_model(samples1, samples2)
-
-    accuracy = self._base_model.evaluate(features, labels, batch_size=1000)[1]
-    test_sample_size = samples1.shape[0]
-    accuracy -= _get_accuracy_confidence_bound(
-        test_sample_size, 1 - failure_probability
-    )
-    hs_divergence = accuracy * (1 + np.exp(self._epsilon)) - np.exp(
-        self._epsilon
-    )
-    return accuracy, hs_divergence
-
-  def _estimate_discriminative_accuracy_and_hs_divergence_of_mechanism(
-      self, samples_1, samples_2, failure_probability: float
-  ) -> Tuple[float, float]:
-    """End to end estimation of accuracy and divergence.
-
-    Args:
-      samples_1: Samples from one distribution
-      samples_2: Samples from the other distribution
-      failure_probability: The probability of the test asserting that the the
-        accuracy estimated by the method is lower than the returned value.
-
-    Returns:
-      Accuracy and hockey stick divergence of the mechanism on datasets.
-    """
-    train_samples1, test_samples_1 = (
-        property_tester_utils.split_train_test_samples(
-            samples_1
-        )
-    )
-    train_samples2, test_samples_2 = (
-        property_tester_utils.split_train_test_samples(
-            samples_2
-        )
-    )
-    self._fit(
-        train_samples1,
-        train_samples2,
-        epochs=self._training_options.num_epochs,
+        epochs=self._training_options.training_epochs,
         batch_size=self._training_options.batch_size,
         verbose=self._training_options.verbose,
     )
-    logging.info("Evaluating model")
-    return self._get_accuracy_and_divergence_estimate(
-        test_samples_1,
-        test_samples_2,
-        failure_probability=failure_probability,
-    )
 
-  def estimate_lower_bound(
+    return model
+
+  @override
+  def _compute_divergence_on_samples(
       self,
-      samples_1: np.ndarray,
-      samples_2: np.ndarray,
+      model: tf.keras.Model,
+      samples_first_distribution: np.ndarray,
+      samples_second_distribution: np.ndarray,
       failure_probability: float,
   ) -> float:
-    """Returns a lower bound on the hockey stick divergence between the samples.
-
-    Args:
-      samples_1: First set of samples.
-      samples_2: Second set of samples
-      failure_probability: The probability that the returned value is not in
-        fact a lower bound on the divergence between the distributions that
-        generated these samples.
-
-    Returns:
-      Estimated lower bound on the divergence between two distributions
-      represented by samples_1 and samples_2.
-    """
-    accuracy, divergence = (
-        self._estimate_discriminative_accuracy_and_hs_divergence_of_mechanism(
-            samples_1, samples_2, failure_probability
-        )
+    features, labels = self._generate_inputs_to_model(
+        samples_first_distribution, samples_second_distribution
     )
-    logging.info("Accuracy: %f, Divergence: %f", accuracy, divergence)
-    return divergence
-
-  def reject_property(self, lower_bound: float) -> bool:
-    """Tests whether a mechanism is epsilon-delta private.
 
-    Args:
-      lower_bound: Divergence obtained from estimate_divergence
-
-    Returns:
-      True if the esitmated lower bound on the divergence is above the
-      expected delta parameter of a mechanism.
-    """
-    return lower_bound > self._delta
+    accuracy = model.evaluate(
+        features, labels, batch_size=self._evaluation_batch_size
+    )[1]
+    test_sample_size = min(
+        samples_first_distribution.shape[0],
+        samples_second_distribution.shape[0],
+    )
+    accuracy -= _get_accuracy_confidence_bound(
+        self._model_coordinate_bound,
+        test_sample_size,
+        1.0 - failure_probability,
+    )
+    hs_divergence = accuracy * (1.0 + np.exp(self._epsilon)) - np.exp(
+        self._epsilon
+    )
+    return hs_divergence
diff --git a/python/dp_auditorium/dp_auditorium/testers/hockey_stick_tester_test.py b/python/dp_auditorium/dp_auditorium/testers/hockey_stick_tester_test.py
index 00b2a3b3..3403e03f 100644
--- a/python/dp_auditorium/dp_auditorium/testers/hockey_stick_tester_test.py
+++ b/python/dp_auditorium/dp_auditorium/testers/hockey_stick_tester_test.py
@@ -16,13 +16,14 @@
 from absl.testing import absltest
 import numpy as np
 from tensorflow import keras
-from dp_auditorium.configs import privacy_property as privacy_property
+from dp_auditorium.configs import privacy_property
 from dp_auditorium.configs import property_tester_config as config
 from dp_auditorium.testers import hockey_stick_tester as hst
 
 
 _SEED = 123456
 _RNG = np.random.default_rng(seed=_SEED)
+_ESTIMATION_RANGE_BOUND = 1.0
 
 
 class HockeyStickDivergenceTest(absltest.TestCase):
@@ -30,7 +31,9 @@ class HockeyStickDivergenceTest(absltest.TestCase):
   def test_confidence_bound(self):
     n_samples = 100
     n_experiments = 1000
-    cb = hst._get_accuracy_confidence_bound(n_samples, confidence=0.95)
+    cb = hst._get_accuracy_confidence_bound(
+        _ESTIMATION_RANGE_BOUND, n_samples, confidence=0.95
+    )
     # Generate 1000 sums of bernoulli random variables.
     sample = _RNG.binomial(n_samples, 0.3, n_experiments)
     errors = np.abs(sample / n_samples - 0.3)
@@ -52,7 +55,7 @@ def test_get_model_inputs(self):
     training_config = hst.make_default_hs_training_config()
     hs_config = config.HockeyStickPropertyTesterConfig(
         approximate_dp=self.make_privacy_property(epsilon=1.0, delta=0.001),
-        training_config=training_config,
+        training_config=training_config, evaluation_batch_size=1000,
     )
     div_estimator = hst.HockeyStickPropertyTester(
         config=hs_config, base_model=model
@@ -81,7 +84,7 @@ def test_get_model_inputs_higher_dim(self):
     training_config = hst.make_default_hs_training_config()
     hs_config = config.HockeyStickPropertyTesterConfig(
         approximate_dp=self.make_privacy_property(epsilon=1.0, delta=0.001),
-        training_config=training_config,
+        training_config=training_config, evaluation_batch_size=1000,
     )
     div_estimator = hst.HockeyStickPropertyTester(
         config=hs_config, base_model=model
@@ -124,35 +127,37 @@ def test_get_model_inputs_higher_dim(self):
         labels[features_sort_ix, ...], expected_labels
     )
 
-  def test_get_accuracy_and_divergence(self):
+  def test_compute_divergence_on_samples(self):
     data1 = np.array([1])
     data2 = np.array([-1])
-    samples1 = self.dummy_mechanism(data1, 2000)
-    samples2 = self.dummy_mechanism(data2, 2000)
+    samples1_train = self.dummy_mechanism(data1, 2000)
+    samples2_train = self.dummy_mechanism(data2, 2000)
+
+    samples1_test = self.dummy_mechanism(data1, 2000)
+    samples2_test = self.dummy_mechanism(data2, 2000)
     model = keras.Sequential([keras.layers.Dense(1, use_bias=False)])
     training_options = hst.make_default_hs_training_config()
     training_options.training_epochs = 1000
     hs_config = config.HockeyStickPropertyTesterConfig(
         approximate_dp=self.make_privacy_property(epsilon=0.5, delta=0.1),
-        training_config=training_options,
+        training_config=training_options, evaluation_batch_size=1000,
     )
     div_estimator = hst.HockeyStickPropertyTester(
         config=hs_config, base_model=model
     )
 
-    accuracy, div = (
-        div_estimator._estimate_discriminative_accuracy_and_hs_divergence_of_mechanism(  # pylint: disable=line-too-long
-            samples1, samples2, 0.05
-        )
+    model = div_estimator._get_optimized_divergence_estimation_model(
+        samples1_train, samples2_train
+    )
+    div = div_estimator._compute_divergence_on_samples(
+        model, samples1_test, samples2_test, 0.05
     )
-    expected_accuracy = 0.99 - hst._get_accuracy_confidence_bound(1000)
-    self.assertGreater(accuracy, expected_accuracy)
     self.assertGreater(div, 0.5)
 
   def laplace_mechanism(self, x, n_samples):
     return _RNG.laplace(0, 1.0, n_samples) + x
 
-  def test_get_accuracy_and_divergence_private_mechanism(self):
+  def test_compute_divergence_private_mechanism(self):
     data1 = np.array([1])
     data2 = np.array([0])
     model = keras.Sequential([keras.layers.Dense(1)])
@@ -162,56 +167,38 @@ def test_get_accuracy_and_divergence_private_mechanism(self):
     training_config.training_epochs = 1000
     hs_config = config.HockeyStickPropertyTesterConfig(
         approximate_dp=self.make_privacy_property(epsilon=1.0, delta=0.0),
-        training_config=training_config,
+        training_config=training_config, evaluation_batch_size=1000,
     )
     div_estimator = hst.HockeyStickPropertyTester(
         config=hs_config, base_model=model
     )
-
-    accuracy, div = (
-        div_estimator._estimate_discriminative_accuracy_and_hs_divergence_of_mechanism(  # pylint: disable=line-too-long
-            samples1, samples2, 0.05
-        )
-    )
-    expected_accuracy = np.exp(1) / (1 + np.exp(1))
-    self.assertLess(accuracy, expected_accuracy)
-    self.assertLess(div, 0.0)
-
-  def test_fails_to_evaluate_when_not_fitted(self):
-    model = keras.Sequential([keras.layers.Dense(1)])
-    training_options = hst.make_default_hs_training_config()
-    hs_config = config.HockeyStickPropertyTesterConfig(
-        approximate_dp=self.make_privacy_property(epsilon=1.0, delta=0.0),
-        training_config=training_options,
+    model = div_estimator._get_optimized_divergence_estimation_model(
+        samples1[:1000], samples2[:1000]
     )
-    div_estimator = hst.HockeyStickPropertyTester(
-        config=hs_config, base_model=model
+    div = div_estimator._compute_divergence_on_samples(
+        model, samples1, samples2, 0.05
     )
 
-    with self.assertRaises(AttributeError) as context:
-      div_estimator._get_accuracy_and_divergence_estimate(
-          np.array([0]), np.array([1]), 0.05
-      )
-    self.assertIn("should be trained", str(context.exception))
+    self.assertLess(div, 0.0)
 
-  def bad_mechanism(self, data, n_samples):
+  def mechanism_with_different_output_shapes(self, data, n_samples):
     if data[0] == 0:
       return np.ones((n_samples, 2))
     if data[0] == 1:
       return np.ones((n_samples, 3, 4))
 
-  def test_fails_on_bad_mechanism(self):
+  def test_fails_on_mechanism_with_different_output_shapes(self):
     model = keras.Sequential([keras.layers.Dense(1)])
     training_options = hst.make_default_hs_training_config()
     hs_config = config.HockeyStickPropertyTesterConfig(
         approximate_dp=self.make_privacy_property(epsilon=1.0, delta=0.1),
-        training_config=training_options,
+        training_config=training_options, evaluation_batch_size=1000,
     )
     div_estimator = hst.HockeyStickPropertyTester(
         config=hs_config, base_model=model
     )
-    samples1 = self.bad_mechanism(np.array([0]), 100)
-    samples2 = self.bad_mechanism(np.array([1]), 100)
+    samples1 = self.mechanism_with_different_output_shapes(np.array([0]), 100)
+    samples2 = self.mechanism_with_different_output_shapes(np.array([1]), 100)
     with self.assertRaises(ValueError) as context:
       div_estimator._generate_inputs_to_model(samples1, samples2)
     self.assertIn("rank", str(context.exception))
@@ -233,7 +220,7 @@ def test_hockey_stick_privacy_tester(self):
     training_options.training_epochs = 1000
     hs_config = config.HockeyStickPropertyTesterConfig(
         approximate_dp=self.make_privacy_property(epsilon=epsilon, delta=delta),
-        training_config=training_options,
+        training_config=training_options, evaluation_batch_size=1000,
     )
     hsdt = hst.HockeyStickPropertyTester(config=hs_config, base_model=model)
 
@@ -244,10 +231,11 @@ def test_hockey_stick_privacy_tester(self):
     divergence = hsdt.estimate_lower_bound(
         samples1, samples2, failure_probability
     )
-
-    self.assertLess(divergence, 0.0)
+    with self.subTest("divergence_less_than_threshold"):
+      self.assertLess(divergence, 0.0)
     found_privacy_violation = hsdt.reject_property(divergence)
-    self.assertFalse(found_privacy_violation)
+    with self.subTest("found_privacy_violation"):
+      self.assertFalse(found_privacy_violation)
 
   def test_hockey_stick_non_private_mechanism(self):
     model = keras.Sequential([keras.layers.Dense(1, use_bias=True)])
@@ -257,22 +245,24 @@ def test_hockey_stick_non_private_mechanism(self):
     training_options.training_epochs = 1000
     hs_config = config.HockeyStickPropertyTesterConfig(
         approximate_dp=self.make_privacy_property(epsilon=epsilon, delta=delta),
-        training_config=training_options,
+        training_config=training_options, evaluation_batch_size=1000,
     )
     hsdt = hst.HockeyStickPropertyTester(config=hs_config, base_model=model)
     samples1 = self.non_private_mechanism_for_testing(np.array([0, 1]), 200)
     samples2 = self.non_private_mechanism_for_testing(np.array([0]), 200)
     divergence = hsdt.estimate_lower_bound(samples1, samples2, 0.05)
+    with self.subTest("divergence_greater_than_threshold"):
+      self.assertLess(0.5, divergence)
 
-    self.assertLess(0.5, divergence)
     found_privacy_violation = hsdt.reject_property(divergence)
-    self.assertTrue(found_privacy_violation)
+    with self.subTest("found_privacy_violation"):
+      self.assertTrue(found_privacy_violation)
 
   def test_assert_privacy_violation(self):
     training_options = hst.make_default_hs_training_config()
     hs_config = config.HockeyStickPropertyTesterConfig(
         approximate_dp=self.make_privacy_property(epsilon=1.0, delta=0.1),
-        training_config=training_options,
+        training_config=training_options, evaluation_batch_size=1000,
     )
     hsdt = hst.HockeyStickPropertyTester(
         config=hs_config,
@@ -286,14 +276,15 @@ def test_privacy_property(self):
     training_options = hst.make_default_hs_training_config()
     hs_config = config.HockeyStickPropertyTesterConfig(
         approximate_dp=self.make_privacy_property(epsilon=1.0, delta=0.1),
-        training_config=training_options,
+        training_config=training_options, evaluation_batch_size=1000,
     )
     hs_tester = hst.HockeyStickPropertyTester(
-        config=hs_config,
-        base_model=hst.make_default_hs_base_model())
+        config=hs_config, base_model=hst.make_default_hs_base_model()
+    )
     self.assertEqual(
         hs_config.approximate_dp, hs_tester.privacy_property.approximate_dp
     )
 
+
 if __name__ == "__main__":
   absltest.main()
diff --git a/python/dp_auditorium/dp_auditorium/testers/property_tester_utils.py b/python/dp_auditorium/dp_auditorium/testers/property_tester_utils.py
index b5b230af..dbe283fb 100644
--- a/python/dp_auditorium/dp_auditorium/testers/property_tester_utils.py
+++ b/python/dp_auditorium/dp_auditorium/testers/property_tester_utils.py
@@ -48,7 +48,7 @@ def split_train_test_samples(
   return samples[0 : n // 2, ...], samples[n // 2 :, ...]
 
 
-def validate_training_params(
+def validate_training_config(
     training_config: property_tester_config.TrainingConfig,
 ):
   """Returns true if the training_params parameters are valid."""
diff --git a/python/dp_auditorium/dp_auditorium/testers/property_tester_utils_test.py b/python/dp_auditorium/dp_auditorium/testers/property_tester_utils_test.py
index b50467e6..60ed9500 100644
--- a/python/dp_auditorium/dp_auditorium/testers/property_tester_utils_test.py
+++ b/python/dp_auditorium/dp_auditorium/testers/property_tester_utils_test.py
@@ -34,30 +34,30 @@ def setUp(self):
     )
 
   @parameterized.parameters(-0.1, 0.0)
-  def test_validate_training_params_wrong_lr(self, learning_rate):
+  def test_validate_training_config_wrong_lr(self, learning_rate):
     self.training_config.optimizer_learning_rate = learning_rate
     with self.assertRaises(ValueError):
-      _ = property_tester_utils.validate_training_params(self.training_config)
+      _ = property_tester_utils.validate_training_config(self.training_config)
 
   @parameterized.parameters(-10, 0)
-  def test_validate_training_params_wrong_training_epochs(
+  def test_validate_training_config_wrong_training_epochs(
       self, training_epochs
   ):
     self.training_config.training_epochs = training_epochs
     with self.assertRaises(ValueError):
-      _ = property_tester_utils.validate_training_params(self.training_config)
+      _ = property_tester_utils.validate_training_config(self.training_config)
 
   @parameterized.parameters(-5, 0)
-  def test_validate_training_params_wrong_batch_size(self, batch_size):
+  def test_validate_training_config_wrong_batch_size(self, batch_size):
     self.training_config.batch_size = batch_size
     with self.assertRaises(ValueError):
-      _ = property_tester_utils.validate_training_params(self.training_config)
+      _ = property_tester_utils.validate_training_config(self.training_config)
 
   @parameterized.parameters(-0.1, 0.0)
-  def test_validate_training_params_wrong_bound(self, bound):
+  def test_validate_training_config_wrong_bound(self, bound):
     self.training_config.model_output_coordinate_bound = bound
     with self.assertRaises(ValueError):
-      _ = property_tester_utils.validate_training_params(self.training_config)
+      _ = property_tester_utils.validate_training_config(self.training_config)
 
   def test_split_train_test_samples_raises_exception(self):
     with self.assertRaises(ValueError):
diff --git a/python/dp_auditorium/dp_auditorium/testers/renyi_tester.py b/python/dp_auditorium/dp_auditorium/testers/renyi_tester.py
index 56b560a1..126fc5ae 100644
--- a/python/dp_auditorium/dp_auditorium/testers/renyi_tester.py
+++ b/python/dp_auditorium/dp_auditorium/testers/renyi_tester.py
@@ -16,103 +16,24 @@
 Functions to estimate Renyi divergence between samples of two distributions.
 """
 
-from typing import Dict, Optional, Union
+from typing import Dict
 
 import numpy as np
 import tensorflow as tf
 from typing_extensions import override
 
-from dp_auditorium import interfaces
 from dp_auditorium.configs import privacy_property
 from dp_auditorium.configs import property_tester_config
+from dp_auditorium.testers import divergence_tester
 from dp_auditorium.testers import property_tester_utils
 
 
-def _renyi_model_parameters_initializer(
-    config: property_tester_config.RenyiPropertyTesterConfig,
-    base_model: Optional[tf.keras.Model] = None,
-) -> dict[str, Union[float, int, None, tf.keras.Model]]:
-  """Initializes attributes for RenyiPropertyTester.
-
-  This function processes `config` to extract privacy parameters
-  and initialize the model parametrizing the Renyi divergence approximation. See
-  section 4.1. of https://arxiv.org/pdf/2307.05608.pdf for more details.
-
-  Args:
-    config: A RenyiPropertyTester configuration.
-    base_model: A keras model to use to parametrize the variational formulation
-      of the Renyi divergence.
-
-  Returns:
-    A dictionary with relevant attributes to initialize a RenyiPropertyTester.
-    The dictionary contains (1) a value `alpha` for the order of the Renyi
-    divergence being estimated, (2) the `test_threshold`, and (3) a `base_model`
-    keras model parametrizing the function space to estimate the Renyi
-    divergence.
-
-  Raises:
-    ValueError if the config sets two different alpha parameters
-    when testing Renyi DP or if the privacy property is different than pure or
-    Renyi DP.
-  """
-
-  if config.privacy_property.renyi_dp is not None:
-    privacy_type = 'renyi_dp'
-    alpha = config.privacy_property.renyi_dp.alpha
-    epsilon = config.privacy_property.renyi_dp.epsilon
-    if config.alpha != alpha:
-      raise ValueError(
-          'Alpha parameter for Renyi DP should be specified in'
-          ' privacy_tester_config.privacy_property. It was specified in'
-          ' config.alpha which is only used for Pure DP tests.'
-      )
-  elif config.privacy_property.pure_dp is not None:
-    privacy_type = 'pure_dp'
-    epsilon = config.privacy_property.pure_dp.epsilon
-    alpha = config.alpha
-  else:
-    raise ValueError(
-        'The specified privacy_property is not supported by RenyiTester.'
-    )
-
-  model_output_coordinate_bound = (
-      config.training_config.model_output_coordinate_bound
-  )
-
-  def scaled_tanh(x):
-    return model_output_coordinate_bound * tf.keras.activations.tanh(x)
-
-  if base_model is None:
-    base_model = tf.keras.models.Sequential([
-        tf.keras.layers.Dense(100, activation=scaled_tanh),
-        tf.keras.layers.Dense(100, activation=scaled_tanh),
-        tf.keras.layers.Dense(1),
-    ])
-
-  base_model.add(tf.keras.layers.Activation(scaled_tanh))
-
-  if privacy_type == 'renyi_dp':
-    threshold = epsilon
-  else:
-    threshold = min(epsilon, 2 * alpha * epsilon**2)
-  return {
-      'alpha': alpha,
-      'test_threshold': threshold,
-      'base_model': base_model,
-  }
-
-
-def _compute_error_from_gamma(gamma: float) -> float:
-  """Returns additive error from convenience variable gamma.
-
-  To estimate number of samples we allow for a multiplicative error `gamma` from
-  chernoff bound in https://arxiv.org/abs/2307.05608. This function converts the
-  multiplicative error to additive error.
-
-  Args:
-    gamma: Multiplicative error.
-  """
-  return np.log((1 + gamma) / (1 - gamma))
+def make_default_renyi_base_model() -> tf.keras.Model:
+  return tf.keras.models.Sequential([
+      tf.keras.layers.Dense(100, activation=tf.keras.activations.tanh),
+      tf.keras.layers.Dense(100, activation=tf.keras.activations.tanh),
+      tf.keras.layers.Dense(1),
+  ])
 
 
 def _compute_error_from_samples(
@@ -143,7 +64,8 @@ def _compute_error_from_samples(
       / num_samples
   )
   gamma = max(error_1, error_2)
-  return _compute_error_from_gamma(gamma)
+  error_from_gamma = np.log((1 + gamma) / (1 - gamma))
+  return error_from_gamma
 
 
 class RenyiModel(tf.keras.Model):
@@ -164,7 +86,7 @@ def train_step(
     trainable_vars = self.nn_model.trainable_variables
     d_loss = tape.gradient(loss, trainable_vars)
     self.optimizer.apply_gradients(zip(d_loss, trainable_vars))
-    return {'renyi': divergence}
+    return {'divergence': divergence}
 
   def call(
       self, data: tuple[np.ndarray, np.ndarray], training: bool = None
@@ -196,7 +118,7 @@ def call(
     return divergence
 
 
-class RenyiPropertyTester(interfaces.PropertyTester):
+class RenyiPropertyTester(divergence_tester.DivergencePropertyTester):
   """Renyi tester main class.
 
   RenyiTester computes a lower bound for the Renyi divergence using Algorithm 2
@@ -209,40 +131,56 @@ class RenyiPropertyTester(interfaces.PropertyTester):
   def __init__(
       self,
       config: property_tester_config.RenyiPropertyTesterConfig,
-      base_model: Optional[tf.keras.Model] = None,
+      base_model: tf.keras.Model,
   ):
     # Get privacy parameters
     if config.privacy_property.renyi_dp is not None:
       property_tester_utils.validate_renyi_dp_property(
           config.privacy_property.renyi_dp
       )
+      privacy_type = 'renyi_dp'
+      epsilon = config.privacy_property.renyi_dp.epsilon
+      alpha = config.privacy_property.renyi_dp.alpha
+      if config.alpha != alpha:
+        raise ValueError(
+            'Alpha parameter for Renyi DP should be specified in'
+            ' privacy_tester_config.privacy_property. It was specified in'
+            ' config.alpha which is only used for Pure DP tests.'
+        )
     elif config.privacy_property.pure_dp is not None:
       property_tester_utils.validate_pure_dp_property(
           config.privacy_property.pure_dp
       )
+      privacy_type = 'pure_dp'
+      epsilon = config.privacy_property.pure_dp.epsilon
+      alpha = config.alpha
     else:
       raise ValueError(
           'The specified privacy_property is not supported by'
           ' RenyiPropertyTester.'
       )
-    property_tester_utils.validate_training_params(config.training_config)
-    params = _renyi_model_parameters_initializer(
-        config=config,
-        base_model=base_model,
-    )
+    property_tester_utils.validate_training_config(config.training_config)
+
+    if privacy_type == 'renyi_dp':
+      self._initial_test_threshold = epsilon
+    else:
+      self._initial_test_threshold = min(epsilon, 2 * alpha * epsilon**2)
 
-    # Privacy test parameters.
     self._tested_property = config.privacy_property
+    self._alpha = alpha
+
+    self._training_config = config.training_config
+
     self._model_output_coordinate_bound = (
         config.training_config.model_output_coordinate_bound
     )
-    self._alpha = params['alpha']
-    self._test_threshold = params['test_threshold']
 
-    # Optimization parameters.
-    self._training_config = config.training_config
+    def scaled_tanh(x):
+      return self._model_output_coordinate_bound * tf.keras.activations.tanh(x)
 
-    self._renyi_model = RenyiModel(params['base_model'], self._alpha)
+    base_model.add(tf.keras.layers.Activation(scaled_tanh))
+
+    self._renyi_model = RenyiModel(base_model, self._alpha)
     self._renyi_model.compile(
         optimizer=tf.keras.optimizers.Adam(
             config.training_config.optimizer_learning_rate
@@ -250,11 +188,15 @@ def __init__(
     )
     self._divergence_train = []
 
+  @property
+  def _test_threshold(self) -> float:
+    return self._initial_test_threshold
+
   @property
   def privacy_property(self) -> privacy_property.PrivacyProperty:
     return self._tested_property
 
-  def _reinitialize_nn_model(self):
+  def _reset_model_weights(self):
     for layer in self._renyi_model.nn_model.layers:
       if hasattr(layer, 'kernel'):
         if layer.kernel is not None and hasattr(layer, 'kernel_initializer'):
@@ -263,77 +205,34 @@ def _reinitialize_nn_model(self):
         if layer.bias is not None and hasattr(layer, 'bias_initializer'):
           layer.bias.assign(layer.bias_initializer(tf.shape(layer.bias)))
 
-  def _optimize_renyi_divergence(
+  @override
+  def _get_optimized_divergence_estimation_model(
       self,
       samples_first_distribution: np.ndarray,
       samples_second_distribution: np.ndarray,
-      verbose: int = 0,
-  ) -> tf.Tensor:
-    """Renyi divergence computation.
-
-    Args:
-      samples_first_distribution: one dimensional array with samples.
-      samples_second_distribution: one dimensional arrays with samples and same
-        shape as as p.
-      verbose: whether to print training evolution, for details see
-        `tf.keras.mode.fit`.
-
-    Returns:
-      Estimated Renyi divergence on train samples.
-    """
-    self._reinitialize_nn_model()
+  ) -> tf.keras.Model:
+    self._reset_model_weights()
     self._renyi_model.fit(
         samples_first_distribution,
         samples_second_distribution,
         batch_size=self._training_config.batch_size,
         epochs=self._training_config.training_epochs,
-        verbose=verbose,
+        verbose=self._training_config.verbose,
     )
-    train_renyi = self._renyi_model.history.history['renyi'][-1]
-
-    return train_renyi
+    return self._renyi_model
 
-  def estimate_divergence_from_samples(
+  @override
+  def _compute_divergence_on_samples(
       self,
-      samples_1_train: np.ndarray,
-      samples_2_train: np.ndarray,
-      samples_1_test: np.ndarray,
-      samples_2_test: np.ndarray,
+      model: tf.keras.Model,
+      samples1_test: np.ndarray,
+      samples2_test: np.ndarray,
       failure_probability: float,
-      verbose: int,
-  ) -> tuple[tf.Tensor, tf.Tensor]:
-    """Estimates Renyi divergence from samples.
-
-    This method estimates the Renyi divergence beween two distributions. First
-    it optimizes over a function space determined by the RenyiModel and
-    then uses the learned function to estimate the Renyi divergence over test
-    samples.
-
-    Args:
-      samples_1_train: Samples from the first distribution used to find a
-        suitable set of parameters for `renyi_model`.
-      samples_2_train: Samples from the second distribution used to find a
-        suitable set of parameters for `renyi_model`.
-      samples_1_test: Samples from the first distribution used to estimate
-        divergence.
-      samples_2_test: Samples from the second distribution used to estimate
-        divergence.
-      failure_probability: P
-      verbose: integer passed to `fit` method for logging.
-
-    Returns:
-      A tuple where the first element is the train divergence and the second is
-      the estimated divergence lower bound.
-    """
-    # Find suitable model parameters.
-    divergence_train = self._optimize_renyi_divergence(
-        samples_1_train, samples_2_train, verbose=verbose
-    )
-
-    divergence_test = self._renyi_model((samples_1_test, samples_2_test))
+  ) -> float:
+    divergence_test = model((samples1_test, samples2_test))
 
     # Calculate lower end of confidence interval.
-    num_samples = samples_1_test.shape[0]
+    num_samples = min(samples1_test.shape[0], samples2_test.shape[0])
     error = _compute_error_from_samples(
         num_samples=num_samples,
         failure_probability=failure_probability,
@@ -342,34 +241,4 @@ def estimate_divergence_from_samples(
     )
     divergence_test_lower_bound = divergence_test - error
 
-    return divergence_train, divergence_test_lower_bound
-
-  @override
-  def estimate_lower_bound(
-      self,
-      samples1: np.ndarray,
-      samples2: np.ndarray,
-      failure_probability: float,
-  ) -> float:
-    samples1_train, samples1_test = (
-        property_tester_utils.split_train_test_samples(samples1)
-    )
-    samples2_train, samples2_test = (
-        property_tester_utils.split_train_test_samples(samples2)
-    )
-
-    divergence_train, divergence_test = self.estimate_divergence_from_samples(
-        samples1_train,
-        samples2_train,
-        samples1_test,
-        samples2_test,
-        failure_probability,
-        verbose=0,
-    )
-    self._divergence_train.append(divergence_train)
-
-    return divergence_test.numpy()
-
-  @override
-  def reject_property(self, lower_bound: float) -> bool:
-    return lower_bound > self._test_threshold
+    return divergence_test_lower_bound
diff --git a/python/dp_auditorium/dp_auditorium/testers/renyi_tester_test.py b/python/dp_auditorium/dp_auditorium/testers/renyi_tester_test.py
index 74f7029d..6b7d079a 100644
--- a/python/dp_auditorium/dp_auditorium/testers/renyi_tester_test.py
+++ b/python/dp_auditorium/dp_auditorium/testers/renyi_tester_test.py
@@ -118,7 +118,8 @@ def setUp(self):
         alpha=3.0,
     )
     self.renyi_tester = renyi_tester.RenyiPropertyTester(
-        self.renyi_tester_config
+        self.renyi_tester_config,
+        base_model=renyi_tester.make_default_renyi_base_model(),
     )
 
   @parameterized.parameters(1.1, 1.5)
@@ -136,10 +137,17 @@ def test_returns_lower_bound_gaussian(self, alpha: float):
     x_test = self.rng.normal(0, sigma, (num_samples, 1))
     y_test = self.rng.normal(mu, sigma, (num_samples, 1))
     self.renyi_tester_config.alpha = alpha
-    tester = renyi_tester.RenyiPropertyTester(self.renyi_tester_config)
+    tester = renyi_tester.RenyiPropertyTester(
+        self.renyi_tester_config,
+        base_model=renyi_tester.make_default_renyi_base_model(),
+    )
 
-    _, divergence_test = tester.estimate_divergence_from_samples(
-        x, y, x_test, y_test, failure_probability=0.1, verbose=0
+    model = tester._get_optimized_divergence_estimation_model(x, y)
+    divergence_test = tester._compute_divergence_on_samples(
+        model,
+        x_test,
+        y_test,
+        failure_probability=0.1,
     )
     logging.info('Result divergence test: %.3f', divergence_test)
     logging.info('Expected divergence: %.3f', expected_divergence)
@@ -165,10 +173,18 @@ def test_returns_lower_bound_uniform(self, alpha: float):
     x_test = self.rng.uniform(low_1, high_1, (num_samples, 1))
     y_test = self.rng.uniform(low_2, high_2, (num_samples, 1))
     self.renyi_tester_config.alpha = alpha
-    tester = renyi_tester.RenyiPropertyTester(self.renyi_tester_config)
 
-    _, divergence_test = tester.estimate_divergence_from_samples(
-        x, y, x_test, y_test, failure_probability=0.1, verbose=0
+    tester = renyi_tester.RenyiPropertyTester(
+        self.renyi_tester_config,
+        base_model=renyi_tester.make_default_renyi_base_model(),
+    )
+
+    model = tester._get_optimized_divergence_estimation_model(x, y)
+    divergence_test = tester._compute_divergence_on_samples(
+        model,
+        x_test,
+        y_test,
+        failure_probability=0.1,
     )
     logging.info('Result divergence test: %.3f', divergence_test)
     logging.info('Expected divergence: %.3f', expected_divergence)
@@ -198,12 +214,18 @@ def test_returns_lower_bound_exponential(self, alpha: float):
     y_test = self.rng.exponential(lambda_2, (num_samples, 1))
 
     self.renyi_tester_config.alpha = alpha
-    tester = renyi_tester.RenyiPropertyTester(self.renyi_tester_config)
-
-    _, divergence_test = tester.estimate_divergence_from_samples(
-        x, y, x_test, y_test, failure_probability=0.1, verbose=0
+    tester = renyi_tester.RenyiPropertyTester(
+        self.renyi_tester_config,
+        base_model=renyi_tester.make_default_renyi_base_model(),
     )
 
+    model = tester._get_optimized_divergence_estimation_model(x, y)
+    divergence_test = tester._compute_divergence_on_samples(
+        model,
+        x_test,
+        y_test,
+        failure_probability=0.1,
+    )
     logging.info('Result divergence test: %.3f', divergence_test)
     logging.info('Expected divergence: %.3f', expected_divergence)
     self.assertLess(divergence_test, expected_divergence)
@@ -228,10 +250,17 @@ def test_returns_lower_bound_laplace(self, alpha: float):
     x_test = self.rng.laplace(mu_1, scale_1, (num_samples, 1))
     y_test = self.rng.laplace(mu_2, scale_2, (num_samples, 1))
     self.renyi_tester_config.alpha = alpha
-    tester = renyi_tester.RenyiPropertyTester(self.renyi_tester_config)
+    tester = renyi_tester.RenyiPropertyTester(
+        self.renyi_tester_config,
+        base_model=renyi_tester.make_default_renyi_base_model(),
+    )
 
-    _, divergence_test = tester.estimate_divergence_from_samples(
-        x, y, x_test, y_test, failure_probability=0.1, verbose=0
+    model = tester._get_optimized_divergence_estimation_model(x, y)
+    divergence_test = tester._compute_divergence_on_samples(
+        model,
+        x_test,
+        y_test,
+        failure_probability=0.1,
     )
     logging.info('Result divergence test: %.3f', divergence_test)
     logging.info('Expected divergence: %.3f', expected_divergence)
@@ -261,13 +290,10 @@ def test_renyi_model_parameters_initializer_wrong_property(self):
         training_config=self.training_config,
         privacy_property=approx_dp_privacy_property,
     )
-
-    with self.assertRaisesRegex(
-        ValueError,
-        'The specified privacy_property is not supported by RenyiTester.',
-    ):
-      _ = renyi_tester._renyi_model_parameters_initializer(
+    with self.assertRaises(ValueError):
+      _ = renyi_tester.RenyiPropertyTester(
           config=renyi_tester_config,
+          base_model=renyi_tester.make_default_renyi_base_model(),
       )
 
   @parameterized.parameters(
@@ -301,11 +327,12 @@ def test_renyi_model_parameters_initializer_sets_params(
         training_config=self.training_config,
         privacy_property=tested_property,
     )
-    params = renyi_tester._renyi_model_parameters_initializer(
+    tester = renyi_tester.RenyiPropertyTester(
         config=renyi_tester_config,
+        base_model=renyi_tester.make_default_renyi_base_model(),
     )
-    self.assertAlmostEqual(params['test_threshold'], threshold, places=6)
-    self.assertAlmostEqual(params['alpha'], alpha, places=6)
+    self.assertAlmostEqual(tester._test_threshold, threshold, places=6)
+    self.assertAlmostEqual(tester._alpha, alpha, places=6)
 
   def test_computes_error_from_samples(self):
     alpha = 2
diff --git a/python/dp_auditorium/requirements.in b/python/dp_auditorium/requirements.in
index ea1d3510..81c07f35 100644
--- a/python/dp_auditorium/requirements.in
+++ b/python/dp_auditorium/requirements.in
@@ -25,3 +25,5 @@ googleapis-common-protos~=1.56.4
 pysqlite3~=0.5.2
 # Used for multi-platform compatibility.
 tensorflow-io-gcs-filesystem~=0.36.0
+pipeline-dp
+python-dp
\ No newline at end of file
diff --git a/python/dp_auditorium/requirements.txt b/python/dp_auditorium/requirements.txt
index aeaddde9..faa18bee 100644
--- a/python/dp_auditorium/requirements.txt
+++ b/python/dp_auditorium/requirements.txt
@@ -78,7 +78,7 @@ grpcio-tools==1.60.0
     # via google-vizier
 h5py==3.10.0
     # via tensorflow
-idna==3.6
+idna==3.7
     # via requests
 importlib-metadata==7.0.1
     # via -r requirements.in
@@ -159,6 +159,8 @@ orbax-checkpoint==0.4.4
     # via flax
 packaging==22.0
     # via tensorflow
+pipeline-dp
+    # via -r requirements.in
 portpicker==1.6.0
     # via google-vizier
 protobuf==4.23.4
@@ -176,6 +178,8 @@ pyasn1==0.5.1
     # via
     #   pyasn1-modules
     #   rsa
+python-dp
+    # via -r requirements.in
 pyasn1-modules==0.3.0
     # via google-auth
 pygments==2.17.2