Skip to content

Commit

Permalink
Smaller fixes for DP Auditorium
Browse files Browse the repository at this point in the history
* Refactor DP Auditorium example
* Fix dependencies
* Fix README

PiperOrigin-RevId: 606329824
Change-Id: I425c7d754416664f710f1e46db338d4241d145ef
GitOrigin-RevId: f952488cad35fad5b15fb1306d4b62e5e6a79102
  • Loading branch information
Differential Privacy Team authored and dibakch committed Feb 13, 2024
1 parent 4ca99c0 commit 12c1279
Show file tree
Hide file tree
Showing 10 changed files with 209 additions and 79 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ private statistics over datasets. It contains the following tools.
tracking privacy budget.
* A [command line interface](examples/zetasql) for running differentially
private SQL queries with [ZetaSQL](https://github.com/google/zetasql).
* [DP Auditorium](python/dp_auditorium) is a library for auditing differential
privacy guarantees.

To get started on generating differentially private data, we recommend you follow
the [Privacy on Beam codelab](https://codelabs.developers.google.com/codelabs/privacy-on-beam/).
Expand Down
40 changes: 22 additions & 18 deletions python/dp_auditorium/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,35 +30,39 @@ Details on the signature of the above objects, can be found in the
An illustrative example is available in
`examples/run_mean_mechanism_example.py`. This binary defines a Hockey-Stick
divergence test as the property tester and a dataset generator that employs a
Gaussian process bandit (using OSS Vizier) for suggesting datasets.
Subsequently, the runner is instantiated with these two objects to conduct a
test on a non-private mean mechanism.
random search (using [Vizier](https://github.com/google/vizier)) for suggesting
datasets. Subsequently, the runner is instantiated with these two objects to
conduct a test on a non-private mean mechanism.

There are two ways to run this, either via Bazel or after installing the library
using `setup.py`.
using `pip install`. Before install, please ensure that your machine has the
`sqlite3` library installed, e.g., `sudo apt-get install libsqlite3-dev` on Ubuntu
machines.

### Run with bazel
### Run with Bazelisk

For the first option, you need to have
[Bazel installed](https://docs.bazel.build/versions/main/install.html).
Once that is done, run:
[Bazelisk installed](https://github.com/bazelbuild/bazelisk). Once that is done,
run:

```
bazel build dp_auditorium:all
bazel run dp_auditorium/examples:run_mean_mechanism_example
bazelisk build dp_auditorium:all
bazelisk run dp_auditorium/examples:run_mean_mechanism_example
```

### Run via setup.py
### Run via pip

For the second option, you will need the
[setuptools package](https://pypi.org/project/setuptools/) installed.
To ensure this, you may run
For the second option, you will need the [setuptools
package](https://pypi.org/project/setuptools/) installed. To ensure this, you
may run `pip install --upgrade setuptools`. Then, to demonstrate our example,
run:
```
pip install --upgrade setuptools
python -m pip install .
python dp_auditorium/examples/run_mean_mechanism_example.py
```
Then, to demonstrate our example, run:
Some MacOS users may run into issues linking Sqlite3 with Python during the
installation of DP-Auditorium. In these cases, users may have to pass the
relevant C++ options to `pip` directly. For example,
```
python setup.py install
python dp_auditorium/examples/run_mean_mechanism_example.py
CFLAGS=-Wno-error=implicit-function-declaration pip install .
```

25 changes: 25 additions & 0 deletions python/dp_auditorium/dp_auditorium/examples/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,31 @@ py_binary(
"//dp_auditorium/mechanisms:mean",
"//dp_auditorium/testers:hockey_stick_tester",
requirement("absl-py"),
requirement("jax"),
requirement("jaxopt"),
requirement("numpy"),
requirement("tensorflow"),
requirement("tensorflow_probability"),
],
)

py_test(
name = "run_mean_mechanism_example_test",
srcs = ["run_mean_mechanism_example_test.py"],
deps = [
":run_mean_mechanism_example",
"//dp_auditorium/generators:vizier_dataset_generator",
requirement("absl-py"),
requirement("google-vizier"),
requirement("numpy"),
# Needed to fix strange dependency bugs in Vizier. Order is important!
requirement("equinox"),
requirement("flax"),
requirement("googleapis-common-protos"),
requirement("jax"),
requirement("jaxlib"),
requirement("pysqlite3"),
"//dp_auditorium:interfaces",
"//dp_auditorium/configs",
],
)
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,13 @@
"""Example binary running privacy tests for a DP mean mechanism."""

from collections.abc import Sequence
import os
import time
from typing import Callable

from absl import app
from absl import flags
from absl import logging
import numpy as np
import tensorflow as tf

from dp_auditorium import privacy_test_runner
from dp_auditorium.configs import dataset_generator_config
Expand All @@ -32,28 +33,51 @@
from dp_auditorium.testers import hockey_stick_tester


_OUTPUT_DIR = flags.DEFINE_string("output_dir", "", "Output directory")
_EXPERIMENT_NAME = flags.DEFINE_string("experiment_name", "", "Experiment name")
_EPSILON = flags.DEFINE_float("epsilon", 0.01, "Privacy parameter")
_EPSILON = flags.DEFINE_float("epsilon", 1.0, "Privacy parameter")
_DELTA = flags.DEFINE_float("delta", 0.0, "Privacy parameter")
_SEED = flags.DEFINE_integer(
"seed", 0, "Seed to initialize random numbers generator."
)


def main(argv: Sequence[str]) -> None:
if len(argv) > 1:
raise app.UsageError("Too many command-line arguments.")
def default_generator_factory(
config: dataset_generator_config.VizierDatasetGeneratorConfig,
) -> vizier_dataset_generator.VizierScalarDataAddRemoveGenerator:
return vizier_dataset_generator.VizierScalarDataAddRemoveGenerator(
config=config
)


rng = np.random.default_rng(seed=_SEED.value)
def mean_mechanism_report(
epsilon: float,
delta: float,
seed: int,
generator_factory: Callable[
[dataset_generator_config.VizierDatasetGeneratorConfig],
vizier_dataset_generator.VizierScalarDataAddRemoveGenerator,
] = default_generator_factory,
) -> privacy_test_runner_config.PrivacyTestRunnerResults:
"""Runs the example code for a mean mechanism.
Args:
epsilon: standard DP parmaeter.
delta: standard DP parameter.
seed: seed to initialize the random number generator.
generator_factory: factory to create a generator; to be replaced in tests
Returns:
The result of the example code as PrivacyTestRunnerResults.
"""
rng = np.random.default_rng(seed=seed)
tf.random.set_seed(seed)

# Configuration for a non-private mean mechanism that uses the true number of
# points to calculate the average.
# points to calculate the average and the scale of the noise.
mech_config = mechanism_config.MeanMechanismConfig(
epsilon=_EPSILON.value,
delta=_DELTA.value,
epsilon=epsilon,
delta=delta,
use_noised_counts_for_calculating_mean=False,
use_noised_counts_for_calculating_noise_scale=True,
use_noised_counts_for_calculating_noise_scale=False,
min_value=0.0,
max_value=1.0,
)
Expand All @@ -67,8 +91,8 @@ def main(argv: Sequence[str]) -> None:
tester_config = property_tester_config.HockeyStickPropertyTesterConfig(
training_config=hockey_stick_tester.make_default_hs_training_config(),
approximate_dp=privacy_property.ApproximateDp(
epsilon=_EPSILON.value,
delta=_DELTA.value,
epsilon=epsilon,
delta=delta,
),
)
# Initialize a classifier model for the Hockey-Stick property tester.
Expand All @@ -80,28 +104,25 @@ def main(argv: Sequence[str]) -> None:
)

# Configuration for dataset generator. It generates neighboring datasets under
# the add/remove definition.
# the add/remove definition. Unique study name prevents using cached results
# from previous runs.
generator_config = dataset_generator_config.VizierDatasetGeneratorConfig(
study_name="non-private-mean-hockey-stick-test",
study_name=str(time.time()),
study_owner="owner",
num_vizier_parameters=2,
data_type=dataset_generator_config.DataType.DATA_TYPE_FLOAT,
min_value=-1.0,
max_value=1.0,
search_algorithm="GAUSSIAN_PROCESS_BANDIT",
search_algorithm="RANDOM_SEARCH",
metric_name="hockey_stick_divergence",
)
# Initialize the dataset generator.
dataset_generator = (
vizier_dataset_generator.VizierScalarDataAddRemoveGenerator(
config=generator_config,
)
)
dataset_generator = generator_factory(generator_config)

# Configuration for the test runner.
test_runner_config = privacy_test_runner_config.PrivacyTestRunnerConfig(
property_tester=privacy_test_runner_config.PropertyTester.HOCKEY_STICK_TESTER,
max_num_trials=5,
max_num_trials=10,
failure_probability=0.05,
num_samples=10_000,
# Apply a hyperbolic tangent function to the output of the mechanism
Expand All @@ -114,22 +135,17 @@ def main(argv: Sequence[str]) -> None:
property_tester=property_tester,
)

results = test_runner.test_privacy(mechanism, "non-private-mean-mechanism")
return test_runner.test_privacy(mechanism, "non-private-mean-mechanism")

logging.info("\nResults: \n")
logging.info(results)

# Write results to the output directory.
if not os.path.exists(_OUTPUT_DIR.value):
os.makedirs(_OUTPUT_DIR.value)
logging.info("Directory %s created successfully", _OUTPUT_DIR.value)
else:
print(f"Directory '{_OUTPUT_DIR.value}' already exists")
def main(argv: Sequence[str]) -> None:
if len(argv) > 1:
raise app.UsageError("Too many command-line arguments.")

output_file_name = os.path.join(_OUTPUT_DIR.value, _EXPERIMENT_NAME.value)
with open(output_file_name, mode="w") as f:
f.write(str(results))
results = mean_mechanism_report(_EPSILON.value, _DELTA.value, _SEED.value)

print(" \nResults: \n")
print(results)

if __name__ == "__main__":
app.run(main)
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Copyright 2024 Google LLC.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from absl.testing import absltest
import numpy as np
from vizier.service import clients

from dp_auditorium import interfaces
from dp_auditorium.configs import dataset_generator_config
from dp_auditorium.examples.run_mean_mechanism_example import mean_mechanism_report
from dp_auditorium.generators import vizier_dataset_generator


class StubVizierGenerator(
vizier_dataset_generator.VizierScalarDataAddRemoveGenerator
):

def get_neighboring_datasets_from_vizier_params(
self, vizier_params: np.ndarray
) -> interfaces.NeighboringDatasetsType:
return np.ones(2), np.ones(2)


class RunMeanMechanismExampleTest(absltest.TestCase):

def test_generates_result(self):
clients.environment_variables.servicer_use_sql_ram()
output = mean_mechanism_report(
0.1,
0.1,
1,
lambda config: vizier_dataset_generator.VizierScalarDataAddRemoveGenerator(
config=config
),
)
self.assertNotEmpty(str(output))


if __name__ == "__main__":
absltest.main()
18 changes: 13 additions & 5 deletions python/dp_auditorium/dp_auditorium/testers/hockey_stick_tester.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@
import tensorflow as tf

from dp_auditorium import interfaces
from dp_auditorium.configs import property_tester_config as config
from dp_auditorium.configs import privacy_property
from dp_auditorium.configs import property_tester_config
from dp_auditorium.testers import property_tester_utils


Expand All @@ -50,8 +51,8 @@ class HockeyStickDivergenceTrainingOptions:
verbose: int


def make_default_hs_training_config() -> config.TrainingConfig:
return config.TrainingConfig(
def make_default_hs_training_config() -> property_tester_config.TrainingConfig:
return property_tester_config.TrainingConfig(
training_epochs=2,
optimizer_learning_rate=1e-2,
batch_size=100,
Expand All @@ -69,7 +70,7 @@ def make_default_hs_base_model() -> tf.keras.Model:


def make_training_options_from_config(
training_config: config.TrainingConfig,
training_config: property_tester_config.TrainingConfig,
):
return HockeyStickDivergenceTrainingOptions(
num_epochs=training_config.training_epochs,
Expand Down Expand Up @@ -123,7 +124,7 @@ class must return logits.

def __init__(
self,
config: config.HockeyStickPropertyTesterConfig,
config: property_tester_config.HockeyStickPropertyTesterConfig,
base_model: tf.keras.Model,
):
"""Initializes the instance.
Expand All @@ -140,12 +141,18 @@ def __init__(
self._base_model = base_model
self._epsilon = config.approximate_dp.epsilon
self._delta = config.approximate_dp.delta
self._approximate_dp = config.approximate_dp
self._has_called_fit = False
self._training_options = make_training_options_from_config(
config.training_config
)
self.initialize(self._training_options)

@property
def privacy_property(self) -> privacy_property.PrivacyProperty:
"""The privacy guarantee that the tester is being used to test for."""
return privacy_property.PrivacyProperty(approximate_dp=self._approximate_dp)

def initialize(self, training_options: HockeyStickDivergenceTrainingOptions):
"""Compiles internal model.
Expand Down Expand Up @@ -225,6 +232,7 @@ def _fit(
epochs: Number of epochs to train for.
verbose: Option passed to keras trainer.
"""
self.initialize(self._training_options)
features, labels = self._generate_inputs_to_model(samples1, samples2)
self._base_model.fit(
features,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,18 @@ def test_assert_privacy_violation(self):
self.assertTrue(hsdt.reject_property(0.11))
self.assertFalse(hsdt.reject_property(0.09))

def test_privacy_property(self):
training_options = hst.make_default_hs_training_config()
hs_config = config.HockeyStickPropertyTesterConfig(
approximate_dp=self.make_privacy_property(epsilon=1.0, delta=0.1),
training_config=training_options,
)
hs_tester = hst.HockeyStickPropertyTester(
config=hs_config,
base_model=hst.make_default_hs_base_model())
self.assertEqual(
hs_config.approximate_dp, hs_tester.privacy_property.approximate_dp
)

if __name__ == "__main__":
absltest.main()
Loading

0 comments on commit 12c1279

Please sign in to comment.