DP Auditorium divergence testers update

Privacy on Beam: * Bump golang.org/x/net from 0.22.0 to 0.23.0 DP Auditorium: * Unify RenyiPropertyTester under new divergence tester class * Add interface for divergence based testers * Add example of testing PipelineDP mean mechanism in IPython * Upgrade Histogram tester * Update dependencies DP Accounting: * Increment patch version of DP accounting library for PyPi release Change-Id: I3a513cf7d7c7e144b11c778f792f723dff53132f GitOrigin-RevId: 1833e65df37c76d756d3a62ed6ba99112c0a5dd0
google · Apr 24, 2024 · 70c8c7c · 70c8c7c
1 parent 04589d4
commit 70c8c7c
Show file tree

Hide file tree

Showing 23 changed files with 997 additions and 504 deletions.
diff --git a/privacy-on-beam/go.mod b/privacy-on-beam/go.mod
@@ -58,7 +58,7 @@ require (
 	golang.org/x/exp v0.0.0-20240325151524-a685a6edb6d8 // indirect
 	golang.org/x/image v0.15.0 // indirect
 	golang.org/x/mod v0.16.0 // indirect
-	golang.org/x/net v0.22.0 // indirect
+	golang.org/x/net v0.23.0 // indirect
 	golang.org/x/oauth2 v0.18.0 // indirect
 	golang.org/x/sync v0.6.0 // indirect
 	golang.org/x/sys v0.18.0 // indirect

diff --git a/privacy-on-beam/go.sum b/privacy-on-beam/go.sum
@@ -223,8 +223,8 @@ golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwY
 golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
 golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
 golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
-golang.org/x/net v0.22.0 h1:9sGLhx7iRIHEiX0oAJ3MRZMUCElJgy7Br1nO+AMN3Tc=
-golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
+golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs=
+golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.18.0 h1:09qnuIAgzdx1XplqJvW6CQqMCtGZykZWcXzPMPUusvI=
 golang.org/x/oauth2 v0.18.0/go.mod h1:Wf7knwG0MPoWIMMBgFlEaSUDaKskp0dCfrlJRJXbBi8=

diff --git a/privacy-on-beam/privacy_on_beam_deps.bzl b/privacy-on-beam/privacy_on_beam_deps.bzl
@@ -2001,8 +2001,8 @@ def privacy_on_beam_deps():
     go_repository(
         name = "org_golang_x_net",
         importpath = "golang.org/x/net",
-        sum = "h1:9sGLhx7iRIHEiX0oAJ3MRZMUCElJgy7Br1nO+AMN3Tc=",
-        version = "v0.22.0",
+        sum = "h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs=",
+        version = "v0.23.0",
     )
     go_repository(
         name = "org_golang_x_oauth2",

diff --git a/python/dp_accounting/VERSION b/python/dp_accounting/VERSION
@@ -1,2 +1,2 @@
 """ Version of the current release of DP Accounting """
-0.4.3
+0.4.4
diff --git a/python/dp_auditorium/dp_auditorium/configs/property_tester_config.py b/python/dp_auditorium/dp_auditorium/configs/property_tester_config.py
@@ -66,10 +66,14 @@ class HockeyStickPropertyTesterConfig:
   Attributes:
     training_config: Required training parameters.
     approximate_dp: Approximate DP privacy parameters to be tested.
+    evaluation_batch_size: Batch size for computing accuracy of classifier
+      distinguishing two distributions for Hockey Stick divergence. See
+      `HockeyStickPropertyTester` class for details.
   """
 
   training_config: TrainingConfig
   approximate_dp: privacy_property.ApproximateDp
+  evaluation_batch_size: int = 1000
 
 
 @dataclasses.dataclass
@@ -83,13 +87,18 @@ class HistogramPropertyTesterConfig:
     min_value: Lower end value for the histogram.
     max_value: Upper end value for the histogram.
     approximate_dp: Approximate DP privacy parameters to be tested.
+    use_original_tester: Whether to use the original version of the tester due
+      to Gilbert and McMillan (2018), or a new version developed for
+      DP-Auditorium. The new version generally improves over the original
+      verison, but the original version is retained for comparison purposes.
   """
 
   test_discrete_mechanism: bool
   histogram_size: int
   min_value: float
   max_value: float
   approximate_dp: privacy_property.ApproximateDp
+  use_original_tester: bool = False
 
 
 class Kernel(enum.Enum):

diff --git a/python/dp_auditorium/dp_auditorium/examples/pipelinedp_mean_mechanism_example.ipynb b/python/dp_auditorium/dp_auditorium/examples/pipelinedp_mean_mechanism_example.ipynb
@@ -0,0 +1,235 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "I0Z7vNS_ybbU"
+      },
+      "source": [
+        "This colab notebook uses DP-auditorium to test differentially private mechanisms computing aggregate statistics using PipelineDP."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "wUtLsXpF9q4D"
+      },
+      "source": [
+        "\u003ctable class=\"tfo-notebook-buttons\" align=\"left\"\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca target=\"_blank\" href=\"https://colab.research.google.com/github/google/differential-privacy/blob/main/python/dp_auditorium/dp_auditorium/examples/pipelinedp_mean_mechanism_example.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca target=\"_blank\" href=\"https://github.com/google/differential-privacy/blob/main/python/dp_auditorium/dp_auditorium/examples/pipelinedp_mean_mechanism_example.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView source on GitHub\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "\u003c/table\u003e\n",
+        "\n",
+        "\u003cbr\u003e\n",
+        "\u003cbr\u003e"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "WPLSKwjEHfXI"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Install and import dp_auditorium and all necessary libraries.\n",
+        "!pip install google-vizier equinox pipeline_dp\n",
+        "!git clone https://github.com/google/differential-privacy.git\n",
+        "import sys\n",
+        "sys.path.append('differential-privacy/python/dp_auditorium')\n",
+        "\n",
+        "from dp_auditorium import privacy_test_runner\n",
+        "from dp_auditorium.generators import pipeline_dp_vizier_dataset_generator\n",
+        "from dp_auditorium.configs import dataset_generator_config\n",
+        "from dp_auditorium.configs import privacy_property\n",
+        "from dp_auditorium.configs import privacy_test_runner_config\n",
+        "from dp_auditorium.configs import property_tester_config\n",
+        "from dp_auditorium.mechanisms.pipeline_dp import aggregation as pipeline_dp_mechanism\n",
+        "from dp_auditorium.testers import hockey_stick_tester\n",
+        "\n",
+        "import pipeline_dp\n",
+        "import tensorflow as tf\n",
+        "tf.compat.v1.enable_eager_execution()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "dr5A5W7Aq2SO"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Example of testing PipelineDP mean mechanism\n",
+        "import time\n",
+        "\n",
+        "def pipeline_dp_mean_mechanism_report(\n",
+        "    epsilon: float,\n",
+        "    delta: float,\n",
+        "    seed: int,\n",
+        "    max_number_partitions: int = 10,\n",
+        ") -\u003e privacy_test_runner_config.PrivacyTestRunnerResults:\n",
+        "  \"\"\"Runs the example code for a mean mechanism.\n",
+        "\n",
+        "  Args:\n",
+        "    epsilon: standard approximate DP parameter.\n",
+        "    delta: standard approximate DP parameter.\n",
+        "    seed: seed to initialize the random number generator.\n",
+        "    max_number_partitions: maximum number of partitions which can be used by\n",
+        "      dataset generator.\n",
+        "\n",
+        "  Returns:\n",
+        "    The result of the example code as PrivacyTestRunnerResults.\n",
+        "  \"\"\"\n",
+        "  tf.random.set_seed(seed)\n",
+        "\n",
+        "  # Specify a config for computing with PipeineDP Mean aggregation, namely\n",
+        "  # computing mean aggregation per partition, i.e. in SQL terms DP version of\n",
+        "  #   SELECT partition_key, mean(value)\n",
+        "  #   GROUP BY partition_key\n",
+        "  # is computed.\n",
+        "  # See https://pipelinedp.io/key-definitions/ on more details of PipelineDP terminology.\n",
+        "  mech_config = pipeline_dp.AggregateParams(\n",
+        "      metrics=[pipeline_dp.Metrics.MEAN],\n",
+        "      # Laplace noise is used for ensuring DP\n",
+        "      noise_kind=pipeline_dp.NoiseKind.LAPLACE,\n",
+        "      # Set contribution bounds:\n",
+        "\n",
+        "      # 1. If some privacy unit contributes more than to 1 partition then\n",
+        "      # PipelineDP will choose randomly 1 partition, contributions to others\n",
+        "      # will be dropped.\n",
+        "      max_partitions_contributed=1,\n",
+        "      # 2. If some privacy unit contributes to more than 1 time to some\n",
+        "      # partition then PipelineDP will choose randomly 1 contribution, others\n",
+        "      # contribution will be dropped\n",
+        "      max_contributions_per_partition=1,\n",
+        "\n",
+        "      # 3. Each contributions will be clipped to [-1, 1].\n",
+        "      min_value=-1.0,\n",
+        "      max_value=1.0)\n",
+        "\n",
+        "  # Initialize the mechanism.\n",
+        "  public_partitions = list(range(max_number_partitions))\n",
+        "  mechanism = pipeline_dp_mechanism.AggregationMechanism(mech_config,\n",
+        "                                                         privacy_property.ApproximateDp(\n",
+        "                                                             epsilon=epsilon,\n",
+        "                                                             delta=delta,\n",
+        "                                                         ), public_partitions)\n",
+        "\n",
+        "  # Configuration for a Hockey-Stick property tester. Given arrays s1 and s2\n",
+        "  # with samples from two distributions it will estimate the hockey-stick\n",
+        "  # divergence between the underlying distributions. It checks if the estimated\n",
+        "  # divergence is bounded by delta.\n",
+        "  tester_config = property_tester_config.HockeyStickPropertyTesterConfig(\n",
+        "      training_config=hockey_stick_tester.make_default_hs_training_config(),\n",
+        "      approximate_dp=privacy_property.ApproximateDp(\n",
+        "          epsilon=epsilon,\n",
+        "          delta=delta,\n",
+        "      ),\n",
+        "  )\n",
+        "\n",
+        "  # Initialize a classifier model for the Hockey-Stick property tester.\n",
+        "  # This classifier will learn to distinguish between samples of the mechanism\n",
+        "  # on adjacent datasets. Its accuracy level should be controlled by the privacy\n",
+        "  # guarantee.\n",
+        "  base_model = hockey_stick_tester.make_default_hs_base_model()\n",
+        "  # Initialize a property tester.\n",
+        "  property_tester = hockey_stick_tester.HockeyStickPropertyTester(\n",
+        "      config=tester_config,\n",
+        "      base_model=base_model,\n",
+        "  )\n",
+        "\n",
+        "  # Configuration for dataset generator. It generates neighboring datasets under\n",
+        "  # the add/remove definition. Unique study name prevents using cached results\n",
+        "  # from previous runs.\n",
+        "  generator_config = dataset_generator_config.VizierDatasetGeneratorConfig(\n",
+        "      study_name=str(time.time()),\n",
+        "      study_owner=\"owner\",\n",
+        "      num_vizier_parameters=2,\n",
+        "      data_type=dataset_generator_config.DataType.DATA_TYPE_FLOAT,\n",
+        "      min_value=-1.0,\n",
+        "      max_value=1.0,\n",
+        "      search_algorithm=\"RANDOM_SEARCH\",\n",
+        "      metric_name=\"hockey_stick_divergence\",\n",
+        "  )\n",
+        "\n",
+        "  # Dataset generator will generate datasets of not more than\n",
+        "  # max_number_partitions partitions and not more than 10 privacy units.\n",
+        "  # The same partitions are used as public_partitions and as partitions in\n",
+        "  # dataset. So the mechanism will not drop the partitions. We do not check\n",
+        "  # partition selection. We focus only on checking noise.\n",
+        "  pipeline_dp_generator_config = pipeline_dp_vizier_dataset_generator.PipelineDpDatasetGeneratorConfig(\n",
+        "    max_num_privacy_ids=10, max_num_partitions=max_number_partitions)\n",
+        "\n",
+        "  # Initialize the dataset generator.\n",
+        "  dataset_generator = pipeline_dp_vizier_dataset_generator.PipelineDpDatasetGenerator(\n",
+        "    generator_config, pipeline_dp_generator_config)\n",
+        "\n",
+        "  # Configuration for the test runner.\n",
+        "  # The test runner coordinates how the test is evaluated. It receives a\n",
+        "  # dataset generator, a property tester and a configuration (see base class for\n",
+        "  # details on these parameters), and runs privacy tests using the property\n",
+        "  # tester on datasets generated by the dataset generator.\n",
+        "  test_runner_config = privacy_test_runner_config.PrivacyTestRunnerConfig(\n",
+        "      property_tester=privacy_test_runner_config.PropertyTester.HOCKEY_STICK_TESTER,\n",
+        "      max_num_trials=10,\n",
+        "      failure_probability=0.05,\n",
+        "      num_samples=10_000,\n",
+        "      # Apply a hyperbolic tangent function to the output of the mechanism\n",
+        "      post_processing=privacy_test_runner_config.PostProcessing.TANH,\n",
+        "  )\n",
+        "  # Initialize the test runner.\n",
+        "  test_runner = privacy_test_runner.PrivacyTestRunner(\n",
+        "      config=test_runner_config,\n",
+        "      dataset_generator=dataset_generator,\n",
+        "      property_tester=property_tester,\n",
+        "  )\n",
+        "\n",
+        "  return test_runner.test_privacy(mechanism, \"pipeline_dp-mean-mechanism\")\n",
+        "\n",
+        "\n",
+        "EPSILON = 1.0\n",
+        "DELTA = 1e-5\n",
+        "SEED = 1\n",
+        "\n",
+        "# The results indicate whether a privacy violation was identified within the\n",
+        "# designated number of trials defined in the configuration. In the absence of a\n",
+        "# violation, a message is returned indicating that the limit of the number of\n",
+        "# trials has been reached. For reference, all computed divergences across all\n",
+        "# trials are also reported.\n",
+        "results = pipeline_dp_mean_mechanism_report(EPSILON, DELTA, SEED)\n",
+        "print(f\" \\nResults: \\n{results}\")\n",
+        "if results.found_privacy_violation is not None:\n",
+        "  print(\"Privacy violations found!\")\n"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "private_outputs": true,
+      "provenance": [
+        {
+          "file_id": "1QyFD_doucyHewiRMtxGvFxNrFlgbCqQa",
+          "timestamp": 1708693099970
+        },
+        {
+          "file_id": "1pBgTlH19OwJ3diUYf3m3QaZcVNQGeB8B",
+          "timestamp": 1708692052606
+        }
+      ]
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/python/dp_auditorium/dp_auditorium/examples/run_mean_mechanism_example.ipynb b/python/dp_auditorium/dp_auditorium/examples/run_mean_mechanism_example.ipynb
@@ -81,8 +81,8 @@
         "  \"\"\"Runs the example code for a mean mechanism.\n",
         "\n",
         "  Args:\n",
-        "    epsilon: standard DP parmaeter.\n",
-        "    delta: standard DP parameter.\n",
+        "    epsilon: standard approximate DP parmaeter.\n",
+        "    delta: standard approximate DP parameter.\n",
         "    seed: seed to initialize the random number generator.\n",
         "    generator_factory: factory to create a generator; to be replaced in tests\n",
         "\n",
@@ -182,10 +182,6 @@
   ],
   "metadata": {
     "colab": {
-      "last_runtime": {
-        "build_target": "//learning/vizier/service/colab:notebook",
-        "kind": "private"
-      },
       "private_outputs": true,
       "provenance": [
         {