diff --git a/privacy-on-beam/codelab/count.go b/privacy-on-beam/codelab/count.go index a7f577eb..a660dd53 100644 --- a/privacy-on-beam/codelab/count.go +++ b/privacy-on-beam/codelab/count.go @@ -55,7 +55,7 @@ func extractVisitHourFn(v Visit) int { func PrivateCountVisitsPerHour(s beam.Scope, col beam.PCollection) beam.PCollection { s = s.Scope("PrivateCountVisitsPerHour") // Create a Privacy Spec and convert col into a PrivatePCollection - spec, err := pbeam.NewPrivacySpecTemp(pbeam.PrivacySpecParams{ + spec, err := pbeam.NewPrivacySpec(pbeam.PrivacySpecParams{ AggregationEpsilon: epsilon / 2, PartitionSelectionEpsilon: epsilon / 2, PartitionSelectionDelta: delta, diff --git a/privacy-on-beam/codelab/mean.go b/privacy-on-beam/codelab/mean.go index 81e016be..941f3ce2 100644 --- a/privacy-on-beam/codelab/mean.go +++ b/privacy-on-beam/codelab/mean.go @@ -48,7 +48,7 @@ func extractVisitHourAndTimeSpentFn(v Visit) (int, int) { func PrivateMeanTimeSpent(s beam.Scope, col beam.PCollection) beam.PCollection { s = s.Scope("PrivateMeanTimeSpent") // Create a Privacy Spec and convert col into a PrivatePCollection. - spec, err := pbeam.NewPrivacySpecTemp(pbeam.PrivacySpecParams{AggregationEpsilon: epsilon}) + spec, err := pbeam.NewPrivacySpec(pbeam.PrivacySpecParams{AggregationEpsilon: epsilon}) if err != nil { log.Fatalf("Couldn't create a PrivacySpec: %v", err) } diff --git a/privacy-on-beam/codelab/multiple.go b/privacy-on-beam/codelab/multiple.go index af481375..05ad5b1a 100644 --- a/privacy-on-beam/codelab/multiple.go +++ b/privacy-on-beam/codelab/multiple.go @@ -29,7 +29,7 @@ import ( func ComputeCountMeanSum(s beam.Scope, col beam.PCollection) (visitsPerHour, meanTimeSpent, revenues beam.PCollection) { s = s.Scope("ComputeCountMeanSum") // Create a Privacy Spec and convert col into a PrivatePCollection. - spec, err := pbeam.NewPrivacySpecTemp(pbeam.PrivacySpecParams{AggregationEpsilon: epsilon}) // Shared by count, mean and sum. + spec, err := pbeam.NewPrivacySpec(pbeam.PrivacySpecParams{AggregationEpsilon: epsilon}) // Shared by count, mean and sum. if err != nil { log.Fatalf("Couldn't create a PrivacySpec: %v", err) } diff --git a/privacy-on-beam/codelab/public_partitions.go b/privacy-on-beam/codelab/public_partitions.go index c66028f4..2e43f164 100644 --- a/privacy-on-beam/codelab/public_partitions.go +++ b/privacy-on-beam/codelab/public_partitions.go @@ -28,7 +28,7 @@ import ( func PrivateCountVisitsPerHourWithPublicPartitions(s beam.Scope, col beam.PCollection) beam.PCollection { s = s.Scope("PrivateCountVisitsPerHourWithPublicPartitions") // Create a Privacy Spec and convert col into a PrivatePCollection. - spec, err := pbeam.NewPrivacySpecTemp(pbeam.PrivacySpecParams{AggregationEpsilon: epsilon}) + spec, err := pbeam.NewPrivacySpec(pbeam.PrivacySpecParams{AggregationEpsilon: epsilon}) if err != nil { log.Fatalf("Couldn't create a PrivacySpec: %v", err) } diff --git a/privacy-on-beam/codelab/sum.go b/privacy-on-beam/codelab/sum.go index 60db8fee..f6a2b75c 100644 --- a/privacy-on-beam/codelab/sum.go +++ b/privacy-on-beam/codelab/sum.go @@ -47,7 +47,7 @@ func extractVisitHourAndMoneySpentFn(v Visit) (int, int) { func PrivateRevenuePerHour(s beam.Scope, col beam.PCollection) beam.PCollection { s = s.Scope("PrivateRevenuePerHour") // Create a Privacy Spec and convert col into a PrivatePCollection. - spec, err := pbeam.NewPrivacySpecTemp(pbeam.PrivacySpecParams{AggregationEpsilon: epsilon}) + spec, err := pbeam.NewPrivacySpec(pbeam.PrivacySpecParams{AggregationEpsilon: epsilon}) if err != nil { log.Fatalf("Couldn't create a PrivacySpec: %v", err) } diff --git a/privacy-on-beam/pbeam/aggregations.go b/privacy-on-beam/pbeam/aggregations.go index 59ea9016..6774fe53 100644 --- a/privacy-on-beam/pbeam/aggregations.go +++ b/privacy-on-beam/pbeam/aggregations.go @@ -249,34 +249,7 @@ func (fn *decodePairArrayFloat64Fn) ProcessElement(pair pairArrayFloat64) (beam. } // newBoundedSumFn returns a boundedSumInt64Fn or boundedSumFloat64Fn depending on vKind. -func newBoundedSumFn(epsilon, delta float64, maxPartitionsContributed int64, lower, upper float64, noiseKind noise.Kind, vKind reflect.Kind, publicPartitions bool, testMode TestMode) (any, error) { - var err, checkErr error - var bsFn any - - switch vKind { - case reflect.Int64: - checkErr = checks.CheckBoundsFloat64AsInt64(lower, upper) - if checkErr != nil { - return nil, checkErr - } - bsFn, err = newBoundedSumInt64Fn(epsilon, delta, maxPartitionsContributed, int64(lower), int64(upper), noiseKind, publicPartitions, testMode) - case reflect.Float64: - checkErr = checks.CheckBoundsFloat64(lower, upper) - if checkErr != nil { - return nil, checkErr - } - bsFn, err = newBoundedSumFloat64Fn(epsilon, delta, maxPartitionsContributed, lower, upper, noiseKind, publicPartitions, testMode) - default: - err = fmt.Errorf("vKind(%v) should be int64 or float64", vKind) - } - - return bsFn, err -} - -// newBoundedSumFn returns a boundedSumInt64Fn or boundedSumFloat64Fn depending on vKind. -// -// Uses the new privacy budget API where clients specify aggregation budget and partition selection budget separately. -func newBoundedSumFnTemp(spec PrivacySpec, params SumParams, noiseKind noise.Kind, vKind reflect.Kind, publicPartitions bool) (any, error) { +func newBoundedSumFn(spec PrivacySpec, params SumParams, noiseKind noise.Kind, vKind reflect.Kind, publicPartitions bool) (any, error) { var err, checkErr error var bsFn any switch vKind { @@ -285,13 +258,13 @@ func newBoundedSumFnTemp(spec PrivacySpec, params SumParams, noiseKind noise.Kin if checkErr != nil { return nil, checkErr } - bsFn, err = newBoundedSumInt64FnTemp(params.AggregationEpsilon, params.AggregationDelta, params.PartitionSelectionParams.Epsilon, params.PartitionSelectionParams.Delta, spec.preThreshold, params.MaxPartitionsContributed, int64(params.MinValue), int64(params.MaxValue), noiseKind, publicPartitions, spec.testMode) + bsFn, err = newBoundedSumInt64Fn(spec, params, noiseKind, publicPartitions) case reflect.Float64: checkErr = checks.CheckBoundsFloat64(params.MinValue, params.MaxValue) if checkErr != nil { return nil, checkErr } - bsFn, err = newBoundedSumFloat64FnTemp(params.AggregationEpsilon, params.AggregationDelta, params.PartitionSelectionParams.Epsilon, params.PartitionSelectionParams.Delta, spec.preThreshold, params.MaxPartitionsContributed, params.MinValue, params.MaxValue, noiseKind, publicPartitions, spec.testMode) + bsFn, err = newBoundedSumFloat64Fn(spec, params, noiseKind, publicPartitions) default: err = fmt.Errorf("vKind(%v) should be int64 or float64", vKind) } @@ -324,53 +297,22 @@ type boundedSumInt64Fn struct { } // newBoundedSumInt64Fn returns a boundedSumInt64Fn with the given budget and parameters. -func newBoundedSumInt64Fn(epsilon, delta float64, maxPartitionsContributed, lower, upper int64, noiseKind noise.Kind, publicPartitions bool, testMode TestMode) (*boundedSumInt64Fn, error) { - fn := &boundedSumInt64Fn{ - MaxPartitionsContributed: maxPartitionsContributed, - Lower: lower, - Upper: upper, - NoiseKind: noiseKind, - PublicPartitions: publicPartitions, - TestMode: testMode, - } - if fn.PublicPartitions { - fn.NoiseEpsilon = epsilon - fn.NoiseDelta = delta - return fn, nil - } - fn.NoiseEpsilon = epsilon / 2 - fn.PartitionSelectionEpsilon = epsilon - fn.NoiseEpsilon - switch noiseKind { - case noise.GaussianNoise: - fn.NoiseDelta = delta / 2 - case noise.LaplaceNoise: - fn.NoiseDelta = 0 - default: - return nil, fmt.Errorf("unknown noise.Kind (%v) is specified. Please specify a valid noise", noiseKind) - } - fn.PartitionSelectionDelta = delta - fn.NoiseDelta - return fn, nil -} - -// newBoundedSumInt64Fn returns a boundedSumInt64Fn with the given budget and parameters. -// -// Uses the new privacy budget API where clients specify aggregation budget and partition selection budget separately. -func newBoundedSumInt64FnTemp(aggregationEpsilon, aggregationDelta, partitionSelectionEpsilon, partitionSelectionDelta float64, preThreshold, maxPartitionsContributed, lower, upper int64, noiseKind noise.Kind, publicPartitions bool, testMode TestMode) (*boundedSumInt64Fn, error) { +func newBoundedSumInt64Fn(spec PrivacySpec, params SumParams, noiseKind noise.Kind, publicPartitions bool) (*boundedSumInt64Fn, error) { if noiseKind != noise.GaussianNoise && noiseKind != noise.LaplaceNoise { return nil, fmt.Errorf("unknown noise.Kind (%v) is specified. Please specify a valid noise", noiseKind) } return &boundedSumInt64Fn{ - NoiseEpsilon: aggregationEpsilon, - NoiseDelta: aggregationDelta, - PartitionSelectionEpsilon: partitionSelectionEpsilon, - PartitionSelectionDelta: partitionSelectionDelta, - PreThreshold: preThreshold, - MaxPartitionsContributed: maxPartitionsContributed, - Lower: lower, - Upper: upper, + NoiseEpsilon: params.AggregationEpsilon, + NoiseDelta: params.AggregationDelta, + PartitionSelectionEpsilon: params.PartitionSelectionParams.Epsilon, + PartitionSelectionDelta: params.PartitionSelectionParams.Delta, + PreThreshold: spec.preThreshold, + MaxPartitionsContributed: params.MaxPartitionsContributed, + Lower: int64(params.MinValue), + Upper: int64(params.MaxValue), NoiseKind: noiseKind, PublicPartitions: publicPartitions, - TestMode: testMode, + TestMode: spec.testMode, }, nil } @@ -488,54 +430,23 @@ type boundedSumFloat64Fn struct { TestMode TestMode } -// newBoundedSumFloat64Fn returns a boundedSumFloat64Fn with the given budget and parameters. -func newBoundedSumFloat64Fn(epsilon, delta float64, maxPartitionsContributed int64, lower, upper float64, noiseKind noise.Kind, publicPartitions bool, testMode TestMode) (*boundedSumFloat64Fn, error) { - fn := &boundedSumFloat64Fn{ - MaxPartitionsContributed: maxPartitionsContributed, - Lower: lower, - Upper: upper, - NoiseKind: noiseKind, - PublicPartitions: publicPartitions, - TestMode: testMode, - } - if fn.PublicPartitions { - fn.NoiseEpsilon = epsilon - fn.NoiseDelta = delta - return fn, nil - } - fn.NoiseEpsilon = epsilon / 2 - fn.PartitionSelectionEpsilon = epsilon - fn.NoiseEpsilon - switch noiseKind { - case noise.GaussianNoise: - fn.NoiseDelta = delta / 2 - case noise.LaplaceNoise: - fn.NoiseDelta = 0 - default: - return nil, fmt.Errorf("unknown noise.Kind (%v) is specified. Please specify a valid noise", noiseKind) - } - fn.PartitionSelectionDelta = delta - fn.NoiseDelta - return fn, nil -} - // newBoundedSumFloat64FnTemp returns a boundedSumFloat64Fn with the given budget and parameters. -// -// Uses the new privacy budget API where clients specify aggregation budget and partition selection budget separately. -func newBoundedSumFloat64FnTemp(aggregationEpsilon, aggregationDelta, partitionSelectionEpsilon, partitionSelectionDelta float64, preThreshold, maxPartitionsContributed int64, lower, upper float64, noiseKind noise.Kind, publicPartitions bool, testMode TestMode) (*boundedSumFloat64Fn, error) { +func newBoundedSumFloat64Fn(spec PrivacySpec, params SumParams, noiseKind noise.Kind, publicPartitions bool) (*boundedSumFloat64Fn, error) { if noiseKind != noise.GaussianNoise && noiseKind != noise.LaplaceNoise { return nil, fmt.Errorf("unknown noise.Kind (%v) is specified. Please specify a valid noise", noiseKind) } return &boundedSumFloat64Fn{ - NoiseEpsilon: aggregationEpsilon, - NoiseDelta: aggregationDelta, - PartitionSelectionEpsilon: partitionSelectionEpsilon, - PartitionSelectionDelta: partitionSelectionDelta, - PreThreshold: preThreshold, - MaxPartitionsContributed: maxPartitionsContributed, - Lower: lower, - Upper: upper, + NoiseEpsilon: params.AggregationEpsilon, + NoiseDelta: params.AggregationDelta, + PartitionSelectionEpsilon: params.PartitionSelectionParams.Epsilon, + PartitionSelectionDelta: params.PartitionSelectionParams.Delta, + PreThreshold: spec.preThreshold, + MaxPartitionsContributed: params.MaxPartitionsContributed, + Lower: params.MinValue, + Upper: params.MaxValue, NoiseKind: noiseKind, PublicPartitions: publicPartitions, - TestMode: testMode, + TestMode: spec.testMode, }, nil } diff --git a/privacy-on-beam/pbeam/aggregations_test.go b/privacy-on-beam/pbeam/aggregations_test.go index 20e2ba80..b5c3e738 100644 --- a/privacy-on-beam/pbeam/aggregations_test.go +++ b/privacy-on-beam/pbeam/aggregations_test.go @@ -28,78 +28,6 @@ import ( ) func TestNewBoundedSumFn(t *testing.T) { - opts := []cmp.Option{ - cmpopts.EquateApprox(0, 1e-10), - cmpopts.IgnoreUnexported(boundedSumFloat64Fn{}, boundedSumInt64Fn{}), - } - for _, tc := range []struct { - desc string - noiseKind noise.Kind - vKind reflect.Kind - want any - }{ - {"Laplace Float64", noise.LaplaceNoise, reflect.Float64, - &boundedSumFloat64Fn{ - NoiseEpsilon: 0.5, - PartitionSelectionEpsilon: 0.5, - NoiseDelta: 0, - PartitionSelectionDelta: 1e-5, - MaxPartitionsContributed: 17, - Lower: 0, - Upper: 10, - NoiseKind: noise.LaplaceNoise, - PublicPartitions: false, - }}, - {"Gaussian Float64", noise.GaussianNoise, reflect.Float64, - &boundedSumFloat64Fn{ - NoiseEpsilon: 0.5, - PartitionSelectionEpsilon: 0.5, - NoiseDelta: 5e-6, - PartitionSelectionDelta: 5e-6, - MaxPartitionsContributed: 17, - Lower: 0, - Upper: 10, - NoiseKind: noise.GaussianNoise, - PublicPartitions: false, - }}, - {"Laplace Int64", noise.LaplaceNoise, reflect.Int64, - &boundedSumInt64Fn{ - NoiseEpsilon: 0.5, - PartitionSelectionEpsilon: 0.5, - NoiseDelta: 0, - PartitionSelectionDelta: 1e-5, - MaxPartitionsContributed: 17, - Lower: 0, - Upper: 10, - NoiseKind: noise.LaplaceNoise, - PublicPartitions: false, - }}, - {"Gaussian Int64", noise.GaussianNoise, reflect.Int64, - &boundedSumInt64Fn{ - NoiseEpsilon: 0.5, - PartitionSelectionEpsilon: 0.5, - NoiseDelta: 5e-6, - PartitionSelectionDelta: 5e-6, - MaxPartitionsContributed: 17, - Lower: 0, - Upper: 10, - NoiseKind: noise.GaussianNoise, - PublicPartitions: false, - }}, - } { - got, err := newBoundedSumFn(1, 1e-5, 17, 0, 10, tc.noiseKind, tc.vKind, false, TestModeDisabled) - if err != nil { - t.Fatalf("Couldn't get boundedSumFn: %v", err) - } - if diff := cmp.Diff(tc.want, got, opts...); diff != "" { - t.Errorf("newBoundedSumFn mismatch for '%s' (-want +got):\n%s", tc.desc, diff) - } - } -} - -// The logic mirrors TestNewBoundedSumFn, but with the new privacy budget API where -// clients specify aggregation budget and partition selection budget separately. -func TestNewBoundedSumFnTemp(t *testing.T) { opts := []cmp.Option{ cmpopts.EquateApprox(0, 1e-10), cmpopts.IgnoreUnexported(boundedSumFloat64Fn{}, boundedSumInt64Fn{}), @@ -195,7 +123,7 @@ func TestNewBoundedSumFnTemp(t *testing.T) { {"lower > upper", noise.GaussianNoise, reflect.Int64, 0.5, 1e-5, 0.5, 1e-5, 0, 10, 0, true, nil}, {"Float64 bounds that overflow when converted to int64", noise.GaussianNoise, reflect.Int64, 0.5, 1e-5, 0.5, 1e-5, 0, 0, 1e100, true, nil}, } { - got, err := newBoundedSumFnTemp(PrivacySpec{preThreshold: tc.preThreshold, testMode: TestModeDisabled}, + got, err := newBoundedSumFn(PrivacySpec{preThreshold: tc.preThreshold, testMode: TestModeDisabled}, SumParams{ AggregationEpsilon: tc.aggregationEpsilon, AggregationDelta: tc.aggregationDelta, @@ -221,7 +149,13 @@ func TestBoundedSumFloat64FnSetup(t *testing.T) { }{ {"Laplace noise kind", noise.LaplaceNoise, noise.Laplace()}, {"Gaussian noise kind", noise.GaussianNoise, noise.Gaussian()}} { - got, err := newBoundedSumFloat64Fn(1, 1e-5, 17, 0, 10, tc.noiseKind, false, TestModeDisabled) + spec := privacySpec(t, PrivacySpecParams{AggregationEpsilon: 0.5, PartitionSelectionEpsilon: 0.5, PartitionSelectionDelta: 1e-5}) + got, err := newBoundedSumFloat64Fn( + *spec, + SumParams{AggregationEpsilon: 0.5, PartitionSelectionParams: PartitionSelectionParams{Epsilon: 0.5, Delta: 1e-5}, MaxPartitionsContributed: 17, MinValue: 0, MaxValue: 10}, + tc.noiseKind, + false, + ) if err != nil { t.Fatalf("Couldn't get boundedSumFloat64Fn: %v", err) } @@ -239,8 +173,15 @@ func TestBoundedSumInt64FnSetup(t *testing.T) { wantNoise any }{ {"Laplace noise kind", noise.LaplaceNoise, noise.Laplace()}, - {"Gaussian noise kind", noise.GaussianNoise, noise.Gaussian()}} { - got, err := newBoundedSumInt64Fn(1, 1e-5, 17, 0, 10, tc.noiseKind, false, TestModeDisabled) + {"Gaussian noise kind", noise.GaussianNoise, noise.Gaussian()}, + } { + spec := privacySpec(t, PrivacySpecParams{AggregationEpsilon: 0.5, PartitionSelectionEpsilon: 0.5, PartitionSelectionDelta: 1e-5}) + got, err := newBoundedSumInt64Fn( + *spec, + SumParams{AggregationEpsilon: 0.5, PartitionSelectionParams: PartitionSelectionParams{Epsilon: 0.5, Delta: 1e-5}, MaxPartitionsContributed: 17, MinValue: 0, MaxValue: 10}, + tc.noiseKind, + false, + ) if err != nil { t.Fatalf("Couldn't get boundedSumInf64Fn: %v", err) } @@ -253,9 +194,15 @@ func TestBoundedSumInt64FnSetup(t *testing.T) { func TestBoundedSumInt64FnAddInput(t *testing.T) { // Since δ=0.5 and 2 entries are added, PreAggPartitionSelection always emits. - // Since ε=1e100, the noise is added with probability in the order of exp(-1e100), + // Since AggregationEpsilon=1e50, the noise is added with probability in the order of exp(-1e50), // which means we don't have to worry about tolerance/flakiness calculations. - fn, err := newBoundedSumInt64Fn(1e100, 0.5, 1, 0, 2, noise.LaplaceNoise, false, TestModeDisabled) + spec := privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1e50, PartitionSelectionEpsilon: 1e50, PartitionSelectionDelta: 0.5}) + fn, err := newBoundedSumInt64Fn( + *spec, + SumParams{AggregationEpsilon: 1e50, PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1e50, Delta: 0.5}, MaxPartitionsContributed: 1, MinValue: 0, MaxValue: 2}, + noise.LaplaceNoise, + false, + ) if err != nil { t.Fatalf("Couldn't get boundedSumInt64Fn: %v", err) } @@ -283,9 +230,15 @@ func TestBoundedSumInt64FnMergeAccumulators(t *testing.T) { // and deterministic with 2 inputs. This is used to verify that merging // accumulators is also affecting our partition selection outcome. // - // Since ε=1e100, the noise is added with probability in the order of exp(-1e100), + // Since AggregationEpsilon=1e50, the noise is added with probability in the order of exp(-1e50), // which means we don't have to worry about tolerance/flakiness calculations. - fn, err := newBoundedSumInt64Fn(1e100, 0.5, 1, 0, 2, noise.LaplaceNoise, false, TestModeDisabled) + spec := privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1e50, PartitionSelectionEpsilon: 1e50, PartitionSelectionDelta: 0.5}) + fn, err := newBoundedSumInt64Fn( + *spec, + SumParams{AggregationEpsilon: 1e50, PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1e50, Delta: 0.5}, MaxPartitionsContributed: 1, MinValue: 0, MaxValue: 2}, + noise.LaplaceNoise, + false, + ) if err != nil { t.Fatalf("Couldn't get boundedSumInt64Fn: %v", err) } @@ -321,9 +274,15 @@ func TestBoundedSumInt64FnExtractOutputReturnsNilForSmallPartitions(t *testing.T // It's a special case for partition selection in which the algorithm should always eliminate the partition. {"Empty input", 0}, // The probability of keeping a partition with 1 privacy unit is equal to δ=1e-23 which results in a flakiness of 10⁻²³. - {"Input with 1 privacy unit", 1}} { - - fn, err := newBoundedSumInt64Fn(1, 1e-23, 1, 0, 2, noise.LaplaceNoise, false, TestModeDisabled) + {"Input with 1 privacy unit", 1}, + } { + spec := privacySpec(t, PrivacySpecParams{AggregationEpsilon: 0.5, PartitionSelectionEpsilon: 0.5, PartitionSelectionDelta: 1e-23}) + fn, err := newBoundedSumFloat64Fn( + *spec, + SumParams{AggregationEpsilon: 0.5, PartitionSelectionParams: PartitionSelectionParams{Epsilon: 0.5, Delta: 1e-23}, MaxPartitionsContributed: 1, MinValue: 0, MaxValue: 2}, + noise.LaplaceNoise, + false, + ) if err != nil { t.Fatalf("Couldn't get boundedSumInt64Fn: %v", err) } @@ -356,9 +315,15 @@ func TestBoundedSumInt64FnExtractOutputWithPublicPartitionsDoesNotThreshold(t *t {"Empty input", 0}, {"Input with 1 user", 1}, {"Input with 10 users", 10}, - {"Input with 100 users", 100}} { - - fn, err := newBoundedSumInt64Fn(1, 0, 1, 0, 2, noise.LaplaceNoise, true, TestModeDisabled) + {"Input with 100 users", 100}, + } { + spec := privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1.0}) + fn, err := newBoundedSumFloat64Fn( + *spec, + SumParams{AggregationEpsilon: 1.0, MaxPartitionsContributed: 1, MinValue: 0, MaxValue: 2}, + noise.LaplaceNoise, + true, + ) if err != nil { t.Fatalf("Couldn't get boundedSumInt64Fn: %v", err) } @@ -384,8 +349,14 @@ func TestBoundedSumInt64FnExtractOutputWithPublicPartitionsDoesNotThreshold(t *t func TestBoundedSumFloat64FnAddInput(t *testing.T) { // Since δ=0.5 and 2 entries are added, PreAggPartitionSelection always emits. - // Since ε=1e100, added noise is negligible. - fn, err := newBoundedSumFloat64Fn(1e100, 0.5, 1, 0, 2, noise.LaplaceNoise, false, TestModeDisabled) + // Since AggregationEpsilon=1e50, added noise is negligible. + spec := privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1e50, PartitionSelectionEpsilon: 1e50, PartitionSelectionDelta: 0.5}) + fn, err := newBoundedSumFloat64Fn( + *spec, + SumParams{AggregationEpsilon: 1e50, PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1e50, Delta: 0.5}, MaxPartitionsContributed: 1, MinValue: 0, MaxValue: 2}, + noise.LaplaceNoise, + false, + ) if err != nil { t.Fatalf("Couldn't get boundedSumFloat64Fn: %v", err) } @@ -403,7 +374,7 @@ func TestBoundedSumFloat64FnAddInput(t *testing.T) { t.Fatalf("Couldn't extract output: %v", err) } want := testutils.Float64Ptr(4) - if diff := cmp.Diff(want, got, cmpopts.EquateApprox(0, testutils.LaplaceTolerance(23, 2, 1e100))); diff != "" { + if diff := cmp.Diff(want, got, cmpopts.EquateApprox(0, testutils.LaplaceTolerance(23, 2, 1e50))); diff != "" { t.Errorf("unexpected output (-want +got):\n%s", diff) } } @@ -413,8 +384,14 @@ func TestBoundedSumFloat64FnMergeAccumulators(t *testing.T) { // and deterministic with 2 inputs. This is used to verify that merging // accumulators is also effecting our partition selection outcome. // - // Since ε=1e100, added noise is negligible. - fn, err := newBoundedSumFloat64Fn(1e100, 0.5, 1, 0, 2, noise.LaplaceNoise, false, TestModeDisabled) + // Since AggregationEpsilon=1e50, added noise is negligible. + spec := privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1e50, PartitionSelectionEpsilon: 1e50, PartitionSelectionDelta: 0.5}) + fn, err := newBoundedSumFloat64Fn( + *spec, + SumParams{AggregationEpsilon: 1e50, PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1e50, Delta: 0.5}, MaxPartitionsContributed: 1, MinValue: 0, MaxValue: 2}, + noise.LaplaceNoise, + false, + ) if err != nil { t.Fatalf("Couldn't get boundedSumFloat64Fn: %v", err) } @@ -437,7 +414,7 @@ func TestBoundedSumFloat64FnMergeAccumulators(t *testing.T) { t.Fatalf("Couldn't extract output: %v", err) } want := testutils.Float64Ptr(3) - if diff := cmp.Diff(want, got, cmpopts.EquateApprox(0, testutils.LaplaceTolerance(23, 2, 1e100))); diff != "" { + if diff := cmp.Diff(want, got, cmpopts.EquateApprox(0, testutils.LaplaceTolerance(23, 2, 1e50))); diff != "" { t.Errorf("unexpected output (-want +got):\n%s", diff) } } @@ -452,7 +429,13 @@ func TestBoundedSumFloat64FnExtractOutputReturnsNilForSmallPartitions(t *testing // The probability of keeping a partition with 1 privacy unit is equal to δ=1e-23 which results in a flakiness of 10⁻²³. {"Input with 1 privacy unit", 1}} { - fn, err := newBoundedSumFloat64Fn(1, 1e-23, 1, 0, 2, noise.LaplaceNoise, false, TestModeDisabled) + spec := privacySpec(t, PrivacySpecParams{AggregationEpsilon: 0.5, PartitionSelectionEpsilon: 0.5, PartitionSelectionDelta: 1e-23}) + fn, err := newBoundedSumFloat64Fn( + *spec, + SumParams{AggregationEpsilon: 0.5, PartitionSelectionParams: PartitionSelectionParams{Epsilon: 0.5, Delta: 1e-23}, MaxPartitionsContributed: 1, MinValue: 0, MaxValue: 2}, + noise.LaplaceNoise, + false, + ) if err != nil { t.Fatalf("Couldn't get boundedSumFloat64Fn: %v", err) } @@ -486,8 +469,13 @@ func TestBoundedSumFloat64FnExtractOutputWithPublicPartitionsDoesNotThreshold(t {"Input with 1 user", 1}, {"Input with 10 users", 10}, {"Input with 100 users", 100}} { - publicPartitions := true - fn, err := newBoundedSumFloat64Fn(1, 0, 1, 0, 2, noise.LaplaceNoise, publicPartitions, TestModeDisabled) + spec := privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1}) + fn, err := newBoundedSumFloat64Fn( + *spec, + SumParams{AggregationEpsilon: 1, MaxPartitionsContributed: 1, MinValue: 0, MaxValue: 2}, + noise.LaplaceNoise, + true, + ) if err != nil { t.Fatalf("Couldn't get boundedSumFloat64Fn: %v", err) } diff --git a/privacy-on-beam/pbeam/count.go b/privacy-on-beam/pbeam/count.go index a480177b..800bc6a8 100644 --- a/privacy-on-beam/pbeam/count.go +++ b/privacy-on-beam/pbeam/count.go @@ -137,21 +137,14 @@ func Count(s beam.Scope, pcol PrivatePCollection, params CountParams) beam.PColl // Get privacy parameters. spec := pcol.privacySpec var err error - if spec.usesNewPrivacyBudgetAPI { - params.AggregationEpsilon, params.AggregationDelta, err = spec.aggregationBudget.consume(params.AggregationEpsilon, params.AggregationDelta) - if err != nil { - log.Fatalf("Couldn't consume aggregation budget for Count: %v", err) - } - if params.PublicPartitions == nil { - params.PartitionSelectionParams.Epsilon, params.PartitionSelectionParams.Delta, err = spec.partitionSelectionBudget.consume(params.PartitionSelectionParams.Epsilon, params.PartitionSelectionParams.Delta) - if err != nil { - log.Fatalf("Couldn't consume partition selection budget for Count: %v", err) - } - } - } else { - params.Epsilon, params.Delta, err = spec.budget.consume(params.Epsilon, params.Delta) + params.AggregationEpsilon, params.AggregationDelta, err = spec.aggregationBudget.consume(params.AggregationEpsilon, params.AggregationDelta) + if err != nil { + log.Fatalf("Couldn't consume aggregation budget for Count: %v", err) + } + if params.PublicPartitions == nil { + params.PartitionSelectionParams.Epsilon, params.PartitionSelectionParams.Delta, err = spec.partitionSelectionBudget.consume(params.PartitionSelectionParams.Epsilon, params.PartitionSelectionParams.Delta) if err != nil { - log.Fatalf("Couldn't consume budget for Count: %v", err) + log.Fatalf("Couldn't consume partition selection budget for Count: %v", err) } } @@ -163,7 +156,7 @@ func Count(s beam.Scope, pcol PrivatePCollection, params CountParams) beam.PColl noiseKind = params.NoiseKind.toNoiseKind() } - err = checkCountParams(params, spec.usesNewPrivacyBudgetAPI, noiseKind, partitionT.Type()) + err = checkCountParams(params, noiseKind, partitionT.Type()) if err != nil { log.Fatalf("pbeam.Count: %v", err) } @@ -195,18 +188,9 @@ func Count(s beam.Scope, pcol PrivatePCollection, params CountParams) beam.PColl var result beam.PCollection // Add public partitions and compute the aggregation output, if public partitions are specified. if params.PublicPartitions != nil { - if spec.usesNewPrivacyBudgetAPI { - result = addPublicPartitionsForCount(s, params.AggregationEpsilon, params.AggregationDelta, params, noiseKind, countsKV, spec.testMode) - } else { - result = addPublicPartitionsForCount(s, params.Epsilon, params.Delta, params, noiseKind, countsKV, spec.testMode) - } + result = addPublicPartitionsForCount(s, *spec, params, noiseKind, countsKV) } else { - var boundedSumFn *boundedSumInt64Fn - if spec.usesNewPrivacyBudgetAPI { - boundedSumFn, err = newBoundedSumInt64FnTemp(params.AggregationEpsilon, params.AggregationDelta, params.PartitionSelectionParams.Epsilon, params.PartitionSelectionParams.Delta, spec.preThreshold, params.MaxPartitionsContributed, 0, params.MaxValue, noiseKind, false, spec.testMode) - } else { - boundedSumFn, err = newBoundedSumInt64Fn(params.Epsilon, params.Delta, params.MaxPartitionsContributed, 0, params.MaxValue, noiseKind, false, spec.testMode) - } + boundedSumFn, err := newBoundedSumInt64Fn(*spec, countToSumParams(params), noiseKind, false) if err != nil { log.Fatalf("Couldn't get boundedSumInt64Fn for Count: %v", err) } @@ -221,44 +205,34 @@ func Count(s beam.Scope, pcol PrivatePCollection, params CountParams) beam.PColl // Clamp negative counts to zero. result = beam.ParDo(s, clampNegativePartitionsInt64, result) } + return result } -func checkCountParams(params CountParams, usesNewPrivacyBudgetAPI bool, noiseKind noise.Kind, partitionType reflect.Type) error { +func checkCountParams(params CountParams, noiseKind noise.Kind, partitionType reflect.Type) error { err := checkPublicPartitions(params.PublicPartitions, partitionType) if err != nil { return err } - if usesNewPrivacyBudgetAPI { - err = checks.CheckEpsilon(params.AggregationEpsilon) - if err != nil { - return err - } - err = checkAggregationDelta(params.AggregationDelta, noiseKind) - if err != nil { - return err - } - err = checkPartitionSelectionEpsilon(params.PartitionSelectionParams.Epsilon, params.PublicPartitions) - if err != nil { - return err - } - err = checkPartitionSelectionDelta(params.PartitionSelectionParams.Delta, params.PublicPartitions) - if err != nil { - return err - } - err = checkMaxPartitionsContributedPartitionSelection(params.PartitionSelectionParams.MaxPartitionsContributed) - if err != nil { - return err - } - } else { - err = checks.CheckEpsilon(params.Epsilon) - if err != nil { - return err - } - err = checkDelta(params.Delta, noiseKind, params.PublicPartitions) - if err != nil { - return err - } + err = checks.CheckEpsilon(params.AggregationEpsilon) + if err != nil { + return err + } + err = checkAggregationDelta(params.AggregationDelta, noiseKind) + if err != nil { + return err + } + err = checkPartitionSelectionEpsilon(params.PartitionSelectionParams.Epsilon, params.PublicPartitions) + if err != nil { + return err + } + err = checkPartitionSelectionDelta(params.PartitionSelectionParams.Delta, params.PublicPartitions) + if err != nil { + return err + } + err = checkMaxPartitionsContributedPartitionSelection(params.PartitionSelectionParams.MaxPartitionsContributed) + if err != nil { + return err } if params.MaxValue <= 0 { return fmt.Errorf("MaxValue should be strictly positive, got %d", params.MaxValue) @@ -266,7 +240,7 @@ func checkCountParams(params CountParams, usesNewPrivacyBudgetAPI bool, noiseKin return checkMaxPartitionsContributed(params.MaxPartitionsContributed) } -func addPublicPartitionsForCount(s beam.Scope, epsilon, delta float64, params CountParams, noiseKind noise.Kind, countsKV beam.PCollection, testMode TestMode) beam.PCollection { +func addPublicPartitionsForCount(s beam.Scope, spec PrivacySpec, params CountParams, noiseKind noise.Kind, countsKV beam.PCollection) beam.PCollection { // Turn PublicPartitions from PCollection into PCollection by adding // the value zero to each K. publicPartitions, isPCollection := params.PublicPartitions.(beam.PCollection) @@ -277,10 +251,23 @@ func addPublicPartitionsForCount(s beam.Scope, epsilon, delta float64, params Co // Merge countsKV and emptyCounts. allPartitions := beam.Flatten(s, emptyCounts, countsKV) // Sum and add noise. - boundedSumFn, err := newBoundedSumInt64Fn(epsilon, delta, params.MaxPartitionsContributed, 0, params.MaxValue, noiseKind, true, testMode) + boundedSumFn, err := newBoundedSumInt64Fn(spec, countToSumParams(params), noiseKind, true) if err != nil { log.Fatalf("Couldn't get boundedSumInt64Fn for Count: %v", err) } sums := beam.CombinePerKey(s, boundedSumFn, allPartitions) return beam.ParDo(s, dereferenceValueInt64, sums) } + +func countToSumParams(params CountParams) SumParams { + return SumParams{ + AggregationEpsilon: params.AggregationEpsilon, + AggregationDelta: params.AggregationDelta, + PartitionSelectionParams: params.PartitionSelectionParams, + MaxPartitionsContributed: params.MaxPartitionsContributed, + MinValue: 0, + MaxValue: float64(params.MaxValue), + NoiseKind: params.NoiseKind, + PublicPartitions: params.PublicPartitions, + } +} diff --git a/privacy-on-beam/pbeam/count_test.go b/privacy-on-beam/pbeam/count_test.go index bc35cef6..0d291a67 100644 --- a/privacy-on-beam/pbeam/count_test.go +++ b/privacy-on-beam/pbeam/count_test.go @@ -465,18 +465,17 @@ func TestCountWithPartitionsCrossPartitionContributionBounding(t *testing.T) { func TestCheckCountParams(t *testing.T) { _, _, partitions := ptest.CreateList([]int{0}) for _, tc := range []struct { - desc string - usesNewPrivacyBudgetAPI bool - params CountParams - noiseKind noise.Kind - partitionType reflect.Type - wantErr bool + desc string + params CountParams + noiseKind noise.Kind + partitionType reflect.Type + wantErr bool }{ { desc: "valid parameters w/o public partitions", params: CountParams{ - Epsilon: 1.0, - Delta: 1e-10, + AggregationEpsilon: 1.0, + PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 1e-5}, MaxPartitionsContributed: 1, MaxValue: 1, }, @@ -485,140 +484,7 @@ func TestCheckCountParams(t *testing.T) { wantErr: false, }, { - desc: "valid parameters w/ public partitions", - params: CountParams{ - Epsilon: 1.0, - MaxPartitionsContributed: 1, - MaxValue: 1, - PublicPartitions: []int{0}, - }, - noiseKind: noise.LaplaceNoise, - partitionType: reflect.TypeOf(0), - wantErr: false, - }, - { - desc: "negative epsilon", - params: CountParams{ - Epsilon: -1.0, - Delta: 1e-10, - MaxPartitionsContributed: 1, - MaxValue: 1, - }, - noiseKind: noise.LaplaceNoise, - partitionType: nil, - wantErr: true, - }, - { - desc: "zero delta w/o public partitions", - params: CountParams{ - Epsilon: 1.0, - MaxPartitionsContributed: 1, - MaxValue: 1, - }, - noiseKind: noise.LaplaceNoise, - partitionType: nil, - wantErr: true, - }, - { - desc: "non-zero delta w/ public partitions & laplace noise", - params: CountParams{ - Epsilon: 1.0, - Delta: 1e-10, - MaxPartitionsContributed: 1, - MaxValue: 1, - PublicPartitions: []int{}, - }, - noiseKind: noise.LaplaceNoise, - partitionType: reflect.TypeOf(0), - wantErr: true, - }, - { - desc: "wrong partition type w/ public partitions as beam.PCollection", - params: CountParams{ - Epsilon: 1.0, - MaxPartitionsContributed: 1, - MaxValue: 1, - PublicPartitions: partitions, - }, - noiseKind: noise.LaplaceNoise, - partitionType: reflect.TypeOf(""), - wantErr: true, - }, - { - desc: "wrong partition type w/ public partitions as slice", - params: CountParams{ - Epsilon: 1.0, - MaxPartitionsContributed: 1, - MaxValue: 1, - PublicPartitions: []int{0}, - }, - noiseKind: noise.LaplaceNoise, - partitionType: reflect.TypeOf(""), - wantErr: true, - }, - { - desc: "wrong partition type w/ public partitions as array", - params: CountParams{ - Epsilon: 1.0, - MaxPartitionsContributed: 1, - MaxValue: 1, - PublicPartitions: [1]int{0}, - }, - noiseKind: noise.LaplaceNoise, - partitionType: reflect.TypeOf(""), - wantErr: true, - }, - { - desc: "public partitions as something other than beam.PCollection, slice or array", - params: CountParams{ - Epsilon: 1.0, - MaxPartitionsContributed: 1, - MaxValue: 1, - PublicPartitions: "", - }, - noiseKind: noise.LaplaceNoise, - partitionType: reflect.TypeOf(""), - wantErr: true, - }, - { - desc: "unset MaxPartitionsContributed", - params: CountParams{ - Epsilon: 1.0, - Delta: 1e-10, - MaxValue: 1, - }, - noiseKind: noise.LaplaceNoise, - partitionType: nil, - wantErr: true, - }, - { - desc: "negative max value", - params: CountParams{ - Epsilon: 1.0, - Delta: 1e-10, - MaxPartitionsContributed: 1, - MaxValue: -1, - }, - noiseKind: noise.LaplaceNoise, - partitionType: nil, - wantErr: true, - }, - // Test cases for the new privacy budget API. - { - desc: "new API, valid parameters w/o public partitions", - params: CountParams{ - AggregationEpsilon: 1.0, - PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 1e-5}, - MaxPartitionsContributed: 1, - MaxValue: 1, - }, - usesNewPrivacyBudgetAPI: true, - noiseKind: noise.LaplaceNoise, - partitionType: nil, - wantErr: false, - }, - { - desc: "new API, PartitionSelectionParams.MaxPartitionsContributed set", + desc: "PartitionSelectionParams.MaxPartitionsContributed set", params: CountParams{ AggregationEpsilon: 1.0, PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 1e-5, MaxPartitionsContributed: 1}, @@ -630,7 +496,7 @@ func TestCheckCountParams(t *testing.T) { wantErr: true, }, { - desc: "new API, valid parameters w/ Gaussian noise w/o public partitions", + desc: "valid parameters w/ Gaussian noise w/o public partitions", params: CountParams{ AggregationEpsilon: 1.0, AggregationDelta: 1e-5, @@ -638,39 +504,36 @@ func TestCheckCountParams(t *testing.T) { MaxPartitionsContributed: 1, MaxValue: 1, }, - usesNewPrivacyBudgetAPI: true, - noiseKind: noise.GaussianNoise, - partitionType: nil, - wantErr: false, + noiseKind: noise.GaussianNoise, + partitionType: nil, + wantErr: false, }, { - desc: "new API, zero aggregationDelta w/ Gaussian noise w/o public partitions", + desc: "zero aggregationDelta w/ Gaussian noise w/o public partitions", params: CountParams{ AggregationEpsilon: 1.0, PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 1e-5}, MaxPartitionsContributed: 1, MaxValue: 1, }, - usesNewPrivacyBudgetAPI: true, - noiseKind: noise.GaussianNoise, - partitionType: nil, - wantErr: true, + noiseKind: noise.GaussianNoise, + partitionType: nil, + wantErr: true, }, { - desc: "new API, valid parameters w/ public partitions", + desc: "valid parameters w/ public partitions", params: CountParams{ AggregationEpsilon: 1.0, MaxPartitionsContributed: 1, MaxValue: 1, PublicPartitions: []int{0}, }, - usesNewPrivacyBudgetAPI: true, - noiseKind: noise.LaplaceNoise, - partitionType: reflect.TypeOf(0), - wantErr: false, + noiseKind: noise.LaplaceNoise, + partitionType: reflect.TypeOf(0), + wantErr: false, }, { - desc: "new API, non-zero aggregationDelta w/ laplace noise", + desc: "non-zero aggregationDelta w/ laplace noise", params: CountParams{ AggregationEpsilon: 1.0, AggregationDelta: 1e-5, @@ -678,142 +541,131 @@ func TestCheckCountParams(t *testing.T) { MaxPartitionsContributed: 1, MaxValue: 1, }, - usesNewPrivacyBudgetAPI: true, - noiseKind: noise.LaplaceNoise, - partitionType: reflect.TypeOf(0), - wantErr: true, + noiseKind: noise.LaplaceNoise, + partitionType: reflect.TypeOf(0), + wantErr: true, }, { - desc: "new API, negative aggregationEpsilon", + desc: "negative aggregationEpsilon", params: CountParams{ AggregationEpsilon: -1.0, PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 1e-5}, MaxPartitionsContributed: 1, MaxValue: 1, }, - usesNewPrivacyBudgetAPI: true, - noiseKind: noise.LaplaceNoise, - partitionType: nil, - wantErr: true, + noiseKind: noise.LaplaceNoise, + partitionType: nil, + wantErr: true, }, { - desc: "new API, negative partitionSelectionEpsilon", + desc: "negative partitionSelectionEpsilon", params: CountParams{ AggregationEpsilon: 1.0, PartitionSelectionParams: PartitionSelectionParams{Epsilon: -1.0, Delta: 1e-5}, MaxPartitionsContributed: 1, MaxValue: 1, }, - usesNewPrivacyBudgetAPI: true, - noiseKind: noise.LaplaceNoise, - partitionType: nil, - wantErr: true, + noiseKind: noise.LaplaceNoise, + partitionType: nil, + wantErr: true, }, { - desc: "new API, zero partitionSelectionDelta w/o public partitions", + desc: "zero partitionSelectionDelta w/o public partitions", params: CountParams{ AggregationEpsilon: 1.0, PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 0}, MaxPartitionsContributed: 1, MaxValue: 1, }, - usesNewPrivacyBudgetAPI: true, - noiseKind: noise.LaplaceNoise, - partitionType: nil, - wantErr: true, + noiseKind: noise.LaplaceNoise, + partitionType: nil, + wantErr: true, }, { - desc: "new API, zero partitionSelectionEpsilon w/o public partitions", + desc: "zero partitionSelectionEpsilon w/o public partitions", params: CountParams{ AggregationEpsilon: 1.0, PartitionSelectionParams: PartitionSelectionParams{Epsilon: 0, Delta: 1e-5}, MaxPartitionsContributed: 1, MaxValue: 1, }, - usesNewPrivacyBudgetAPI: true, - noiseKind: noise.LaplaceNoise, - partitionType: nil, - wantErr: true, + noiseKind: noise.LaplaceNoise, + partitionType: nil, + wantErr: true, }, { - desc: "new API, wrong partition type w/ public partitions as beam.PCollection", + desc: "wrong partition type w/ public partitions as beam.PCollection", params: CountParams{ AggregationEpsilon: 1.0, MaxValue: 1, MaxPartitionsContributed: 1, PublicPartitions: partitions, }, - usesNewPrivacyBudgetAPI: true, - noiseKind: noise.LaplaceNoise, - partitionType: reflect.TypeOf(""), - wantErr: true, + noiseKind: noise.LaplaceNoise, + partitionType: reflect.TypeOf(""), + wantErr: true, }, { - desc: "new API, wrong partition type w/ public partitions as slice", + desc: "wrong partition type w/ public partitions as slice", params: CountParams{ AggregationEpsilon: 1.0, MaxValue: 1, MaxPartitionsContributed: 1, PublicPartitions: []int{0}, }, - usesNewPrivacyBudgetAPI: true, - noiseKind: noise.LaplaceNoise, - partitionType: reflect.TypeOf(""), - wantErr: true, + noiseKind: noise.LaplaceNoise, + partitionType: reflect.TypeOf(""), + wantErr: true, }, { - desc: "new API, wrong partition type w/ public partitions as array", + desc: "wrong partition type w/ public partitions as array", params: CountParams{ AggregationEpsilon: 1.0, MaxValue: 1, MaxPartitionsContributed: 1, PublicPartitions: [1]int{0}, }, - usesNewPrivacyBudgetAPI: true, - noiseKind: noise.LaplaceNoise, - partitionType: reflect.TypeOf(""), - wantErr: true, + noiseKind: noise.LaplaceNoise, + partitionType: reflect.TypeOf(""), + wantErr: true, }, { - desc: "new API, public partitions as something other than beam.PCollection, slice or array", + desc: "public partitions as something other than beam.PCollection, slice or array", params: CountParams{ AggregationEpsilon: 1.0, MaxValue: 1, MaxPartitionsContributed: 1, PublicPartitions: "", }, - usesNewPrivacyBudgetAPI: true, - noiseKind: noise.LaplaceNoise, - partitionType: reflect.TypeOf(""), - wantErr: true, + noiseKind: noise.LaplaceNoise, + partitionType: reflect.TypeOf(""), + wantErr: true, }, { - desc: "new API, unset MaxPartitionsContributed", + desc: "unset MaxPartitionsContributed", params: CountParams{ AggregationEpsilon: 1.0, PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 1e-5}, MaxValue: 1, }, - usesNewPrivacyBudgetAPI: true, - noiseKind: noise.LaplaceNoise, - partitionType: nil, - wantErr: true, + noiseKind: noise.LaplaceNoise, + partitionType: nil, + wantErr: true, }, { - desc: "new API, negative max value", + desc: "negative max value", params: CountParams{ AggregationEpsilon: 1.0, PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 1e-5}, MaxPartitionsContributed: 1, MaxValue: -1, }, - usesNewPrivacyBudgetAPI: true, - noiseKind: noise.LaplaceNoise, - partitionType: nil, - wantErr: true, + noiseKind: noise.LaplaceNoise, + partitionType: nil, + wantErr: true, }, } { - if err := checkCountParams(tc.params, tc.usesNewPrivacyBudgetAPI, tc.noiseKind, tc.partitionType); (err != nil) != tc.wantErr { + if err := checkCountParams(tc.params, tc.noiseKind, tc.partitionType); (err != nil) != tc.wantErr { t.Errorf("With %s, got=%v error, wantErr=%t", tc.desc, err, tc.wantErr) } } @@ -867,7 +719,7 @@ func TestCountAllowNegativeOutputs(t *testing.T) { {true}, {false}, } { - privacySpec, err := NewPrivacySpecTemp(PrivacySpecParams{ + privacySpec, err := NewPrivacySpec(PrivacySpecParams{ AggregationEpsilon: 0.1, }) if err != nil { diff --git a/privacy-on-beam/pbeam/distinct_id.go b/privacy-on-beam/pbeam/distinct_id.go index 5a6a2c41..3527c7f7 100644 --- a/privacy-on-beam/pbeam/distinct_id.go +++ b/privacy-on-beam/pbeam/distinct_id.go @@ -136,25 +136,18 @@ func DistinctPrivacyID(s beam.Scope, pcol PrivatePCollection, params DistinctPri // Get privacy parameters. spec := pcol.privacySpec var err error - if spec.usesNewPrivacyBudgetAPI { - params.AggregationEpsilon, params.AggregationDelta, err = spec.aggregationBudget.consume(params.AggregationEpsilon, params.AggregationDelta) - if err != nil { - log.Fatalf("Couldn't consume aggregation budget for DistinctPrivacyID: %v", err) - } - if params.PublicPartitions == nil { - _, params.PartitionSelectionDelta, err = spec.partitionSelectionBudget.consume(0, params.PartitionSelectionDelta) - if err != nil { - log.Fatalf("Couldn't consume partition selection budget for DistinctPrivacyID: %v", err) - } - } - } else { - params.Epsilon, params.Delta, err = spec.budget.consume(params.Epsilon, params.Delta) + params.AggregationEpsilon, params.AggregationDelta, err = spec.aggregationBudget.consume(params.AggregationEpsilon, params.AggregationDelta) + if err != nil { + log.Fatalf("Couldn't consume aggregation budget for DistinctPrivacyID: %v", err) + } + if params.PublicPartitions == nil { + _, params.PartitionSelectionDelta, err = spec.partitionSelectionBudget.consume(0, params.PartitionSelectionDelta) if err != nil { - log.Fatalf("Couldn't consume budget for DistinctPrivacyID: %v", err) + log.Fatalf("Couldn't consume partition selection budget for DistinctPrivacyID: %v", err) } } - err = checkDistinctPrivacyIDParams(params, spec.usesNewPrivacyBudgetAPI, noiseKind, partitionT.Type()) + err = checkDistinctPrivacyIDParams(params, noiseKind, partitionT.Type()) if err != nil { log.Fatalf("pbeam.DistinctPrivacyID: %v", err) } @@ -185,18 +178,9 @@ func DistinctPrivacyID(s beam.Scope, pcol PrivatePCollection, params DistinctPri var result beam.PCollection // Add public partitions and return the aggregation output, if public partitions are specified. if params.PublicPartitions != nil { - if spec.usesNewPrivacyBudgetAPI { - result = addPublicPartitionsForDistinctID(s, params, params.AggregationEpsilon, params.AggregationDelta, noiseKind, emptyCounts, spec.testMode) - } else { - result = addPublicPartitionsForDistinctID(s, params, params.Epsilon, params.Delta, noiseKind, emptyCounts, spec.testMode) - } + result = addPublicPartitionsForDistinctID(s, *spec, params, noiseKind, emptyCounts) } else { - var countFn *countFn - if spec.usesNewPrivacyBudgetAPI { - countFn, err = newCountFnTemp(*spec, params, noiseKind, false) - } else { - countFn, err = newCountFn(params.Epsilon, params.Delta, params.MaxPartitionsContributed, noiseKind, false, spec.testMode) - } + countFn, err := newCountFn(*spec, params, noiseKind, false) if err != nil { log.Fatalf("pbeam.DistinctPrivacyID: %v", err) } @@ -207,10 +191,11 @@ func DistinctPrivacyID(s beam.Scope, pcol PrivatePCollection, params DistinctPri // Clamp negative counts to zero and return. result = beam.ParDo(s, clampNegativePartitionsInt64, result) + return result } -func addPublicPartitionsForDistinctID(s beam.Scope, params DistinctPrivacyIDParams, epsilon, delta float64, noiseKind noise.Kind, countsKV beam.PCollection, testMode TestMode) beam.PCollection { +func addPublicPartitionsForDistinctID(s beam.Scope, spec PrivacySpec, params DistinctPrivacyIDParams, noiseKind noise.Kind, countsKV beam.PCollection) beam.PCollection { publicPartitions, isPCollection := params.PublicPartitions.(beam.PCollection) if !isPCollection { publicPartitions = beam.Reshuffle(s, beam.CreateList(s, params.PublicPartitions)) @@ -218,7 +203,7 @@ func addPublicPartitionsForDistinctID(s beam.Scope, params DistinctPrivacyIDPara prepareAddPublicPartitions := beam.ParDo(s, addZeroValuesToPublicPartitionsInt64, publicPartitions) // Merge countsKV and prepareAddPublicPartitions. allAddPartitions := beam.Flatten(s, countsKV, prepareAddPublicPartitions) - countFn, err := newCountFn(epsilon, delta, params.MaxPartitionsContributed, noiseKind, true, testMode) + countFn, err := newCountFn(spec, params, noiseKind, true) if err != nil { log.Fatalf("pbeam.DistinctPrivacyID: %v", err) } @@ -228,33 +213,22 @@ func addPublicPartitionsForDistinctID(s beam.Scope, params DistinctPrivacyIDPara return beam.ParDo(s, clampNegativePartitionsInt64, finalPartitions) } -func checkDistinctPrivacyIDParams(params DistinctPrivacyIDParams, usesNewPrivacyBudgetAPI bool, noiseKind noise.Kind, partitionType reflect.Type) error { +func checkDistinctPrivacyIDParams(params DistinctPrivacyIDParams, noiseKind noise.Kind, partitionType reflect.Type) error { err := checkPublicPartitions(params.PublicPartitions, partitionType) if err != nil { return err } - if usesNewPrivacyBudgetAPI { - err = checks.CheckEpsilon(params.AggregationEpsilon) - if err != nil { - return err - } - err = checkAggregationDelta(params.AggregationDelta, noiseKind) - if err != nil { - return err - } - err = checkPartitionSelectionDelta(params.PartitionSelectionDelta, params.PublicPartitions) - if err != nil { - return err - } - } else { - err = checks.CheckEpsilon(params.Epsilon) - if err != nil { - return err - } - err = checkDelta(params.Delta, noiseKind, params.PublicPartitions) - if err != nil { - return err - } + err = checks.CheckEpsilon(params.AggregationEpsilon) + if err != nil { + return err + } + err = checkAggregationDelta(params.AggregationDelta, noiseKind) + if err != nil { + return err + } + err = checkPartitionSelectionDelta(params.PartitionSelectionDelta, params.PublicPartitions) + if err != nil { + return err } return checkMaxPartitionsContributed(params.MaxPartitionsContributed) } @@ -277,33 +251,8 @@ type countFn struct { TestMode TestMode } -// newCountFn returns a CountFn with the given budget and parameters. -func newCountFn(epsilon, delta float64, maxPartitionsContributed int64, noiseKind noise.Kind, publicPartitions bool, testMode TestMode) (*countFn, error) { - fn := &countFn{ - MaxPartitionsContributed: maxPartitionsContributed, - NoiseKind: noiseKind, - PublicPartitions: publicPartitions, - TestMode: testMode, - } - fn.Epsilon = epsilon - if fn.PublicPartitions { - fn.NoiseDelta = delta - return fn, nil - } - switch noiseKind { - case noise.GaussianNoise: - fn.NoiseDelta = delta / 2 - case noise.LaplaceNoise: - fn.NoiseDelta = 0 - default: - return nil, fmt.Errorf("unknown noise.Kind (%v) is specified. Please specify a valid noise", noiseKind) - } - fn.ThresholdDelta = delta - fn.NoiseDelta - return fn, nil -} - // newCountFn returns a newCountFn with the given budget and parameters. -func newCountFnTemp(spec PrivacySpec, params DistinctPrivacyIDParams, noiseKind noise.Kind, publicPartitions bool) (*countFn, error) { +func newCountFn(spec PrivacySpec, params DistinctPrivacyIDParams, noiseKind noise.Kind, publicPartitions bool) (*countFn, error) { if noiseKind != noise.GaussianNoise && noiseKind != noise.LaplaceNoise { return nil, fmt.Errorf("unknown noise.Kind (%v) is specified. Please specify a valid noise", noiseKind) } diff --git a/privacy-on-beam/pbeam/distinct_id_test.go b/privacy-on-beam/pbeam/distinct_id_test.go index c1f72c6f..ae4aa4f2 100644 --- a/privacy-on-beam/pbeam/distinct_id_test.go +++ b/privacy-on-beam/pbeam/distinct_id_test.go @@ -625,41 +625,6 @@ func TestDistinctPrivacyIDOptimizedContrib(t *testing.T) { } func TestNewCountFn(t *testing.T) { - for _, tc := range []struct { - desc string - noiseKind noise.Kind - want *countFn - }{ - {"Laplace", noise.LaplaceNoise, - &countFn{ - Epsilon: 1, - NoiseDelta: 0, - ThresholdDelta: 1e-5, - MaxPartitionsContributed: 17, - NoiseKind: noise.LaplaceNoise, - }}, - {"Gaussian", noise.GaussianNoise, - &countFn{ - Epsilon: 1, - NoiseDelta: 5e-6, - ThresholdDelta: 5e-6, - MaxPartitionsContributed: 17, - NoiseKind: noise.GaussianNoise, - }}, - } { - got, err := newCountFn(1, 1e-5, 17, tc.noiseKind, false, TestModeDisabled) - if err != nil { - t.Fatalf("Couldn't get countFn: %v", err) - } - if diff := cmp.Diff(tc.want, got, cmpopts.IgnoreUnexported(countFn{})); diff != "" { - t.Errorf("newCountFn mismatch for '%s' (-want +got):\n%s", tc.desc, diff) - } - } -} - -// The logic mirrors TestCountFnSetup, but with the new privacy budget API where -// clients specify aggregation budget and partition selection budget separately. -func TestNewCountFnTemp(t *testing.T) { for _, tc := range []struct { desc string noiseKind noise.Kind @@ -695,7 +660,7 @@ func TestNewCountFnTemp(t *testing.T) { NoiseKind: noise.LaplaceNoise, }}, } { - got, err := newCountFnTemp(PrivacySpec{preThreshold: tc.preThreshold, testMode: TestModeDisabled}, + got, err := newCountFn(PrivacySpec{preThreshold: tc.preThreshold, testMode: TestModeDisabled}, DistinctPrivacyIDParams{ AggregationEpsilon: tc.aggregationEpsilon, AggregationDelta: tc.aggregationDelta, @@ -718,8 +683,10 @@ func TestCountFnSetup(t *testing.T) { wantNoise any }{ {"Laplace noise kind", noise.LaplaceNoise, noise.Laplace()}, - {"Gaussian noise kind", noise.GaussianNoise, noise.Gaussian()}} { - got, err := newCountFn(1, 1e-5, 17, tc.noiseKind, false, TestModeDisabled) + {"Gaussian noise kind", noise.GaussianNoise, noise.Gaussian()}, + } { + spec := privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1, PartitionSelectionDelta: 1e-5}) + got, err := newCountFn(*spec, DistinctPrivacyIDParams{MaxPartitionsContributed: 17}, tc.noiseKind, false) if err != nil { t.Fatalf("Couldn't get countFn: %v", err) } @@ -859,18 +826,17 @@ func TestCountFnExtractOutputDoesNotReturnNilIfPartitionsPublic(t *testing.T) { func TestCheckDistinctPrivacyIDParams(t *testing.T) { _, _, partitions := ptest.CreateList([]int{0}) for _, tc := range []struct { - desc string - params DistinctPrivacyIDParams - usesNewPrivacyBudgetAPI bool - noiseKind noise.Kind - partitionType reflect.Type - wantErr bool + desc string + params DistinctPrivacyIDParams + noiseKind noise.Kind + partitionType reflect.Type + wantErr bool }{ { desc: "valid parameters w/o public partitions", params: DistinctPrivacyIDParams{ - Epsilon: 1, - Delta: 1e-10, + AggregationEpsilon: 1.0, + PartitionSelectionDelta: 1e-5, MaxPartitionsContributed: 1, }, noiseKind: noise.LaplaceNoise, @@ -878,214 +844,95 @@ func TestCheckDistinctPrivacyIDParams(t *testing.T) { wantErr: false, }, { - desc: "valid parameters w/ public partitions", - params: DistinctPrivacyIDParams{ - Epsilon: 1, - MaxPartitionsContributed: 1, - PublicPartitions: []int{0}, - }, - noiseKind: noise.LaplaceNoise, - partitionType: reflect.TypeOf(0), - wantErr: false, - }, - { - desc: "negative epsilon", - params: DistinctPrivacyIDParams{ - Epsilon: -1, - Delta: 1e-10, - MaxPartitionsContributed: 1, - }, - noiseKind: noise.LaplaceNoise, - partitionType: nil, - wantErr: true, - }, - { - desc: "zero delta w/o public partitions", - params: DistinctPrivacyIDParams{ - Epsilon: 1, - MaxPartitionsContributed: 1, - }, - noiseKind: noise.LaplaceNoise, - partitionType: nil, - wantErr: true, - }, - { - desc: "non-zero delta w/ public partitions & laplace noise", - params: DistinctPrivacyIDParams{ - Epsilon: 1, - Delta: 1e-10, - MaxPartitionsContributed: 1, - PublicPartitions: []int{}, - }, - noiseKind: noise.LaplaceNoise, - partitionType: reflect.TypeOf(0), - wantErr: true, - }, - { - desc: "unset MaxPartitionsContributed", - params: DistinctPrivacyIDParams{ - Epsilon: 1, - Delta: 1e-10, - }, - noiseKind: noise.LaplaceNoise, - partitionType: nil, - wantErr: true, - }, - { - desc: "wrong partition type w/ public partitions as beam.PCollection", - params: DistinctPrivacyIDParams{ - Epsilon: 1, - MaxPartitionsContributed: 1, - PublicPartitions: partitions, - }, - noiseKind: noise.LaplaceNoise, - partitionType: reflect.TypeOf(""), - wantErr: true, - }, - { - desc: "wrong partition type w/ public partitions as slice", - params: DistinctPrivacyIDParams{ - Epsilon: 1, - MaxPartitionsContributed: 1, - PublicPartitions: []int{0}, - }, - noiseKind: noise.LaplaceNoise, - partitionType: reflect.TypeOf(""), - wantErr: true, - }, - { - desc: "wrong partition type w/ public partitions as array", - params: DistinctPrivacyIDParams{ - Epsilon: 1, - MaxPartitionsContributed: 1, - PublicPartitions: [1]int{0}, - }, - noiseKind: noise.LaplaceNoise, - partitionType: reflect.TypeOf(""), - wantErr: true, - }, - { - desc: "public partitions as something other than beam.PCollection, slice or array", - params: DistinctPrivacyIDParams{ - Epsilon: 1, - MaxPartitionsContributed: 1, - PublicPartitions: "", - }, - noiseKind: noise.LaplaceNoise, - partitionType: reflect.TypeOf(""), - wantErr: true, - }, - // Test cases for the new privacy budget API. - { - desc: "new API, valid parameters w/o public partitions", - params: DistinctPrivacyIDParams{ - AggregationEpsilon: 1.0, - PartitionSelectionDelta: 1e-5, - MaxPartitionsContributed: 1, - }, - usesNewPrivacyBudgetAPI: true, - noiseKind: noise.LaplaceNoise, - partitionType: nil, - wantErr: false, - }, - { - desc: "new API, valid parameters w/ gaussian noise w/o public partitions", + desc: "valid parameters w/ gaussian noise w/o public partitions", params: DistinctPrivacyIDParams{ AggregationEpsilon: 1.0, AggregationDelta: 1e-5, PartitionSelectionDelta: 1e-5, MaxPartitionsContributed: 1, }, - usesNewPrivacyBudgetAPI: true, - noiseKind: noise.GaussianNoise, - partitionType: nil, - wantErr: false, + noiseKind: noise.GaussianNoise, + partitionType: nil, + wantErr: false, }, { - desc: "new API, zero aggregationDelta w/ gaussian noise w/o public partitions", + desc: "zero aggregationDelta w/ gaussian noise w/o public partitions", params: DistinctPrivacyIDParams{ AggregationEpsilon: 1.0, PartitionSelectionDelta: 1e-5, MaxPartitionsContributed: 1, }, - usesNewPrivacyBudgetAPI: true, - noiseKind: noise.GaussianNoise, - partitionType: nil, - wantErr: true, + noiseKind: noise.GaussianNoise, + partitionType: nil, + wantErr: true, }, { - desc: "new API, valid parameters w/ public partitions", + desc: "valid parameters w/ public partitions", params: DistinctPrivacyIDParams{ AggregationEpsilon: 1.0, PublicPartitions: []int{0}, MaxPartitionsContributed: 1, }, - usesNewPrivacyBudgetAPI: true, - noiseKind: noise.LaplaceNoise, - partitionType: reflect.TypeOf(0), - wantErr: false, + noiseKind: noise.LaplaceNoise, + partitionType: reflect.TypeOf(0), + wantErr: false, }, { - desc: "new API, negative epsilon", + desc: "negative epsilon", params: DistinctPrivacyIDParams{ AggregationEpsilon: -1.0, PartitionSelectionDelta: 1e-5, MaxPartitionsContributed: 1, }, - usesNewPrivacyBudgetAPI: true, - noiseKind: noise.LaplaceNoise, - partitionType: nil, - wantErr: true, + noiseKind: noise.LaplaceNoise, + partitionType: nil, + wantErr: true, }, { - desc: "new API, zero partitionSelectionDelta w/o public partitions", + desc: "zero partitionSelectionDelta w/o public partitions", params: DistinctPrivacyIDParams{ AggregationEpsilon: 1.0, MaxPartitionsContributed: 1, }, - usesNewPrivacyBudgetAPI: true, - noiseKind: noise.LaplaceNoise, - partitionType: nil, - wantErr: true, + noiseKind: noise.LaplaceNoise, + partitionType: nil, + wantErr: true, }, { - desc: "new API, non-zero partitionSelectionDelta w/ laplace noise", + desc: "non-zero partitionSelectionDelta w/ laplace noise", params: DistinctPrivacyIDParams{ AggregationEpsilon: 1.0, PartitionSelectionDelta: 1e-5, MaxPartitionsContributed: 1, PublicPartitions: []int{}, }, - usesNewPrivacyBudgetAPI: true, - noiseKind: noise.LaplaceNoise, - partitionType: reflect.TypeOf(0), - wantErr: true, + noiseKind: noise.LaplaceNoise, + partitionType: reflect.TypeOf(0), + wantErr: true, }, { - desc: "new API, unset MaxPartitionsContributed", + desc: "unset MaxPartitionsContributed", params: DistinctPrivacyIDParams{ AggregationEpsilon: 1.0, PartitionSelectionDelta: 1e-5, }, - usesNewPrivacyBudgetAPI: true, - noiseKind: noise.LaplaceNoise, - partitionType: nil, - wantErr: true, + noiseKind: noise.LaplaceNoise, + partitionType: nil, + wantErr: true, }, { - desc: "new API, wrong partition type w/ public partitions as beam.PCollection", + desc: "wrong partition type w/ public partitions as beam.PCollection", params: DistinctPrivacyIDParams{ AggregationEpsilon: 1.0, MaxPartitionsContributed: 1, PublicPartitions: partitions, }, - usesNewPrivacyBudgetAPI: true, - noiseKind: noise.LaplaceNoise, - partitionType: reflect.TypeOf(""), - wantErr: true, + noiseKind: noise.LaplaceNoise, + partitionType: reflect.TypeOf(""), + wantErr: true, }, { - desc: "new API, wrong partition type w/ public partitions as slice", + desc: "wrong partition type w/ public partitions as slice", params: DistinctPrivacyIDParams{ AggregationEpsilon: 1.0, MaxPartitionsContributed: 1, @@ -1096,7 +943,7 @@ func TestCheckDistinctPrivacyIDParams(t *testing.T) { wantErr: true, }, { - desc: "new API, wrong partition type w/ public partitions as array", + desc: "wrong partition type w/ public partitions as array", params: DistinctPrivacyIDParams{ AggregationEpsilon: 1.0, MaxPartitionsContributed: 1, @@ -1107,7 +954,7 @@ func TestCheckDistinctPrivacyIDParams(t *testing.T) { wantErr: true, }, { - desc: "new API, public partitions as something other than beam.PCollection, slice or array", + desc: "public partitions as something other than beam.PCollection, slice or array", params: DistinctPrivacyIDParams{ AggregationEpsilon: 1.0, MaxPartitionsContributed: 1, @@ -1118,7 +965,7 @@ func TestCheckDistinctPrivacyIDParams(t *testing.T) { wantErr: true, }, } { - if err := checkDistinctPrivacyIDParams(tc.params, tc.usesNewPrivacyBudgetAPI, tc.noiseKind, tc.partitionType); (err != nil) != tc.wantErr { + if err := checkDistinctPrivacyIDParams(tc.params, tc.noiseKind, tc.partitionType); (err != nil) != tc.wantErr { t.Errorf("With %s, got=%v error, wantErr=%t", tc.desc, err, tc.wantErr) } } diff --git a/privacy-on-beam/pbeam/distinct_per_key.go b/privacy-on-beam/pbeam/distinct_per_key.go index 46a440b9..89801b6f 100644 --- a/privacy-on-beam/pbeam/distinct_per_key.go +++ b/privacy-on-beam/pbeam/distinct_per_key.go @@ -139,24 +139,14 @@ func DistinctPerKey(s beam.Scope, pcol PrivatePCollection, params DistinctPerKey // In the new privacy budget API, budgets are already split. spec := pcol.privacySpec var err error - if spec.usesNewPrivacyBudgetAPI { - params.AggregationEpsilon, params.AggregationDelta, err = spec.aggregationBudget.get(params.AggregationEpsilon, params.AggregationDelta) - if err != nil { - log.Fatalf("Couldn't get aggregation budget for DistinctPerKey: %v", err) - } - if params.PublicPartitions == nil { - params.PartitionSelectionParams.Epsilon, params.PartitionSelectionParams.Delta, err = spec.partitionSelectionBudget.get(params.PartitionSelectionParams.Epsilon, params.PartitionSelectionParams.Delta) - if err != nil { - log.Fatalf("Couldn't get partition selection budget for DistinctPerKey: %v", err) - } - } - } else { - params.AggregationEpsilon, params.AggregationDelta, err = spec.budget.get(params.Epsilon, params.Delta) + params.AggregationEpsilon, params.AggregationDelta, err = spec.aggregationBudget.get(params.AggregationEpsilon, params.AggregationDelta) + if err != nil { + log.Fatalf("Couldn't get aggregation budget for DistinctPerKey: %v", err) + } + if params.PublicPartitions == nil { + params.PartitionSelectionParams.Epsilon, params.PartitionSelectionParams.Delta, err = spec.partitionSelectionBudget.get(params.PartitionSelectionParams.Epsilon, params.PartitionSelectionParams.Delta) if err != nil { - log.Fatalf("Couldn't get budget for DistinctPerKey: %v", err) - } - if params.PublicPartitions == nil { - params.AggregationEpsilon, params.AggregationDelta, params.PartitionSelectionParams.Epsilon, params.PartitionSelectionParams.Delta = splitBudget(params.AggregationEpsilon, params.AggregationDelta, noiseKind) + log.Fatalf("Couldn't get partition selection budget for DistinctPerKey: %v", err) } } err = checkDistinctPerKeyParams(params, noiseKind, pcol.codec.KType.T) diff --git a/privacy-on-beam/pbeam/example_pbeamtest_test.go b/privacy-on-beam/pbeam/example_pbeamtest_test.go index 4a506c6a..a32a28a8 100644 --- a/privacy-on-beam/pbeam/example_pbeamtest_test.go +++ b/privacy-on-beam/pbeam/example_pbeamtest_test.go @@ -86,7 +86,7 @@ func Example_testPipelines() { // This enables per-partition and cross-partition contribution bounding. If you // wish to disable both types of contribution bounding altogether, use // pbeam.TestModeWithoutContributionBounding instead. - privacySpec, err := pbeam.NewPrivacySpecTemp(pbeam.PrivacySpecParams{ + privacySpec, err := pbeam.NewPrivacySpec(pbeam.PrivacySpecParams{ AggregationEpsilon: ε / 2, AggregationDelta: δ / 2, PartitionSelectionEpsilon: ε / 2, diff --git a/privacy-on-beam/pbeam/example_test.go b/privacy-on-beam/pbeam/example_test.go index eb95fec9..20779fcc 100644 --- a/privacy-on-beam/pbeam/example_test.go +++ b/privacy-on-beam/pbeam/example_test.go @@ -64,7 +64,7 @@ func Example() { // provided by the pipeline. const ε, δ = 1, 1e-3 - privacySpec, err := pbeam.NewPrivacySpecTemp(pbeam.PrivacySpecParams{ + privacySpec, err := pbeam.NewPrivacySpec(pbeam.PrivacySpecParams{ AggregationEpsilon: ε / 2, PartitionSelectionEpsilon: ε / 2, AggregationDelta: δ, diff --git a/privacy-on-beam/pbeam/mean.go b/privacy-on-beam/pbeam/mean.go index fc454c26..34499a1d 100644 --- a/privacy-on-beam/pbeam/mean.go +++ b/privacy-on-beam/pbeam/mean.go @@ -151,21 +151,14 @@ func MeanPerKey(s beam.Scope, pcol PrivatePCollection, params MeanParams) beam.P // Get privacy parameters. spec := pcol.privacySpec var err error - if spec.usesNewPrivacyBudgetAPI { - params.AggregationEpsilon, params.AggregationDelta, err = spec.aggregationBudget.get(params.AggregationEpsilon, params.AggregationDelta) - if err != nil { - log.Fatalf("Couldn't consume aggregation budget for Mean: %v", err) - } - if params.PublicPartitions == nil { - params.PartitionSelectionParams.Epsilon, params.PartitionSelectionParams.Delta, err = spec.partitionSelectionBudget.get(params.PartitionSelectionParams.Epsilon, params.PartitionSelectionParams.Delta) - if err != nil { - log.Fatalf("Couldn't consume partition selection budget for Mean: %v", err) - } - } - } else { - params.Epsilon, params.Delta, err = spec.budget.get(params.Epsilon, params.Delta) + params.AggregationEpsilon, params.AggregationDelta, err = spec.aggregationBudget.get(params.AggregationEpsilon, params.AggregationDelta) + if err != nil { + log.Fatalf("Couldn't consume aggregation budget for Mean: %v", err) + } + if params.PublicPartitions == nil { + params.PartitionSelectionParams.Epsilon, params.PartitionSelectionParams.Delta, err = spec.partitionSelectionBudget.get(params.PartitionSelectionParams.Epsilon, params.PartitionSelectionParams.Delta) if err != nil { - log.Fatalf("Couldn't consume budget for Mean: %v", err) + log.Fatalf("Couldn't consume partition selection budget for Mean: %v", err) } } @@ -177,7 +170,7 @@ func MeanPerKey(s beam.Scope, pcol PrivatePCollection, params MeanParams) beam.P noiseKind = params.NoiseKind.toNoiseKind() } - err = checkMeanPerKeyParams(params, spec.usesNewPrivacyBudgetAPI, noiseKind, pcol.codec.KType.T) + err = checkMeanPerKeyParams(params, noiseKind, pcol.codec.KType.T) if err != nil { log.Fatalf("pbeam.MeanPerKey: %v", err) } @@ -233,20 +226,10 @@ func MeanPerKey(s beam.Scope, pcol PrivatePCollection, params MeanParams) beam.P var result beam.PCollection // Add public partitions and return the aggregation output, if public partitions are specified. if params.PublicPartitions != nil { - if spec.usesNewPrivacyBudgetAPI { - result = addPublicPartitionsForMean(s, *spec, params, noiseKind, partialKV) - } else { - result = addPublicPartitionsForMean(s, *spec, params, noiseKind, partialKV) - } + result = addPublicPartitionsForMean(s, *spec, params, noiseKind, partialKV) } else { // Compute the mean for each partition. Result is PCollection. - var boundedMeanFn *boundedMeanFn - if spec.usesNewPrivacyBudgetAPI { - boundedMeanFn, err = newBoundedMeanFnTemp(*spec, params, noiseKind, false, false) - } else { - boundedMeanFn, err = newBoundedMeanFn(params, noiseKind, false, spec.testMode, false) - } - + boundedMeanFn, err := newBoundedMeanFnTemp(*spec, params, noiseKind, false, false) if err != nil { log.Fatalf("Couldn't get boundedMeanFn for MeanPerKey: %v", err) } @@ -269,23 +252,13 @@ func addPublicPartitionsForMean(s beam.Scope, spec PrivacySpec, params MeanParam } emptyPublicPartitions := beam.ParDo(s, addEmptySliceToPublicPartitionsFloat64, publicPartitions) // Second, add noise to all public partitions (all of which are empty-valued). - var boundedMeanFn *boundedMeanFn - var err error - if spec.usesNewPrivacyBudgetAPI { - boundedMeanFn, err = newBoundedMeanFnTemp(spec, params, noiseKind, true, true) - } else { - boundedMeanFn, err = newBoundedMeanFn(params, noiseKind, true, spec.testMode, true) - } + boundedMeanFn, err := newBoundedMeanFnTemp(spec, params, noiseKind, true, true) if err != nil { log.Fatalf("Couldn't get boundedMeanFn for MeanPerKey: %v", err) } noisyEmptyPublicPartitions := beam.CombinePerKey(s, boundedMeanFn, emptyPublicPartitions) // Third, compute noisy means for partitions in the actual data. - if spec.usesNewPrivacyBudgetAPI { - boundedMeanFn, err = newBoundedMeanFnTemp(spec, params, noiseKind, true, false) - } else { - boundedMeanFn, err = newBoundedMeanFn(params, noiseKind, true, spec.testMode, false) - } + boundedMeanFn, err = newBoundedMeanFnTemp(spec, params, noiseKind, true, false) if err != nil { log.Fatalf("Couldn't get boundedMeanFn for MeanPerKey: %v", err) } @@ -297,41 +270,30 @@ func addPublicPartitionsForMean(s beam.Scope, spec PrivacySpec, params MeanParam return beam.ParDo(s, dereferenceValueFloat64, means) } -func checkMeanPerKeyParams(params MeanParams, usesNewPrivacyBudgetAPI bool, noiseKind noise.Kind, partitionType reflect.Type) error { +func checkMeanPerKeyParams(params MeanParams, noiseKind noise.Kind, partitionType reflect.Type) error { err := checkPublicPartitions(params.PublicPartitions, partitionType) if err != nil { return err } - if usesNewPrivacyBudgetAPI { - err = checks.CheckEpsilon(params.AggregationEpsilon) - if err != nil { - return err - } - err = checkAggregationDelta(params.AggregationDelta, noiseKind) - if err != nil { - return err - } - err = checkPartitionSelectionEpsilon(params.PartitionSelectionParams.Epsilon, params.PublicPartitions) - if err != nil { - return err - } - err = checkPartitionSelectionDelta(params.PartitionSelectionParams.Delta, params.PublicPartitions) - if err != nil { - return err - } - err = checkMaxPartitionsContributedPartitionSelection(params.PartitionSelectionParams.MaxPartitionsContributed) - if err != nil { - return err - } - } else { - err = checks.CheckEpsilon(params.Epsilon) - if err != nil { - return err - } - err = checkDelta(params.Delta, noiseKind, params.PublicPartitions) - if err != nil { - return err - } + err = checks.CheckEpsilon(params.AggregationEpsilon) + if err != nil { + return err + } + err = checkAggregationDelta(params.AggregationDelta, noiseKind) + if err != nil { + return err + } + err = checkPartitionSelectionEpsilon(params.PartitionSelectionParams.Epsilon, params.PublicPartitions) + if err != nil { + return err + } + err = checkPartitionSelectionDelta(params.PartitionSelectionParams.Delta, params.PublicPartitions) + if err != nil { + return err + } + err = checkMaxPartitionsContributedPartitionSelection(params.PartitionSelectionParams.MaxPartitionsContributed) + if err != nil { + return err } err = checks.CheckBoundsFloat64(params.MinValue, params.MaxValue) if err != nil { diff --git a/privacy-on-beam/pbeam/mean_test.go b/privacy-on-beam/pbeam/mean_test.go index 5baf05f9..6de8e405 100644 --- a/privacy-on-beam/pbeam/mean_test.go +++ b/privacy-on-beam/pbeam/mean_test.go @@ -31,60 +31,6 @@ import ( ) func TestNewBoundedMeanFn(t *testing.T) { - opts := []cmp.Option{ - cmpopts.EquateApprox(0, 1e-10), - cmpopts.IgnoreUnexported(boundedMeanFn{}), - } - for _, tc := range []struct { - desc string - noiseKind noise.Kind - want *boundedMeanFn - }{ - {"Laplace noise kind", noise.LaplaceNoise, - &boundedMeanFn{ - NoiseEpsilon: 0.5, - PartitionSelectionEpsilon: 0.5, - NoiseDelta: 0, - PartitionSelectionDelta: 1e-5, - MaxPartitionsContributed: 17, - MaxContributionsPerPartition: 5, - Lower: 0, - Upper: 10, - NoiseKind: noise.LaplaceNoise, - }}, - {"Gaussian noise kind", noise.GaussianNoise, - &boundedMeanFn{ - NoiseEpsilon: 0.5, - PartitionSelectionEpsilon: 0.5, - NoiseDelta: 5e-6, - PartitionSelectionDelta: 5e-6, - MaxPartitionsContributed: 17, - MaxContributionsPerPartition: 5, - Lower: 0, - Upper: 10, - NoiseKind: noise.GaussianNoise, - }}, - } { - got, err := newBoundedMeanFn(MeanParams{ - Epsilon: 1, - Delta: 1e-5, - MaxPartitionsContributed: 17, - MaxContributionsPerPartition: 5, - MinValue: 0, - MaxValue: 10, - }, tc.noiseKind, false, TestModeDisabled, false) - if err != nil { - t.Fatalf("Couldn't get newBoundedMeanFn: %v", err) - } - if diff := cmp.Diff(tc.want, got, opts...); diff != "" { - t.Errorf("newBoundedMeanFn: for %q (-want +got):\n%s", tc.desc, diff) - } - } -} - -// The logic mirrors TestNewBoundedMeanFn, but with the new privacy budget API where -// clients specify aggregation budget and partition selection budget separately. -func TestNewBoundedMeanFnTemp(t *testing.T) { opts := []cmp.Option{ cmpopts.EquateApprox(0, 1e-10), cmpopts.IgnoreUnexported(boundedMeanFn{}), @@ -1478,18 +1424,17 @@ func TestMeanPerKeyWithEmptyPartitionsNoNoise(t *testing.T) { func TestCheckMeanPerKeyParams(t *testing.T) { _, _, publicPartitions := ptest.CreateList([]int{0, 1}) for _, tc := range []struct { - desc string - params MeanParams - usesNewPrivacyBudgetAPI bool - noiseKind noise.Kind - partitionType reflect.Type - wantErr bool + desc string + params MeanParams + noiseKind noise.Kind + partitionType reflect.Type + wantErr bool }{ { desc: "valid parameters", params: MeanParams{ - Epsilon: 1.0, - Delta: 1e-5, + AggregationEpsilon: 1.0, + PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 1e-5}, MaxPartitionsContributed: 1, MaxContributionsPerPartition: 1, MinValue: -5.0, @@ -1500,10 +1445,10 @@ func TestCheckMeanPerKeyParams(t *testing.T) { wantErr: false, }, { - desc: "negative epsilon", + desc: "PartitionSelectionParams.MaxPartitionsContributed set", params: MeanParams{ - Epsilon: -1.0, - Delta: 1e-5, + AggregationEpsilon: 1.0, + PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 1e-5, MaxPartitionsContributed: 1}, MaxPartitionsContributed: 1, MaxContributionsPerPartition: 1, MinValue: -5.0, @@ -1514,9 +1459,10 @@ func TestCheckMeanPerKeyParams(t *testing.T) { wantErr: true, }, { - desc: "zero delta w/o public partitions", + desc: "negative aggregationEpsilon", params: MeanParams{ - Epsilon: 1.0, + AggregationEpsilon: -1.0, + PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 1e-5}, MaxPartitionsContributed: 1, MaxContributionsPerPartition: 1, MinValue: -5.0, @@ -1527,52 +1473,12 @@ func TestCheckMeanPerKeyParams(t *testing.T) { wantErr: true, }, { - desc: "MaxValue < MinValue", - params: MeanParams{ - Epsilon: 1.0, - Delta: 1e-5, - MaxPartitionsContributed: 1, - MaxContributionsPerPartition: 1, - MinValue: 6.0, - MaxValue: 5.0, - }, - noiseKind: noise.LaplaceNoise, - partitionType: nil, - wantErr: true, - }, - { - desc: "MaxValue = MinValue", + desc: "negative partitionSelectionEpsilon", params: MeanParams{ - Epsilon: 1.0, - Delta: 1e-5, + AggregationEpsilon: 1.0, + PartitionSelectionParams: PartitionSelectionParams{Epsilon: -1.0, Delta: 1e-5}, MaxPartitionsContributed: 1, MaxContributionsPerPartition: 1, - MinValue: 5.0, - MaxValue: 5.0, - }, - noiseKind: noise.LaplaceNoise, - partitionType: nil, - wantErr: true, - }, - { - desc: "zero MaxContributionsPerPartition", - params: MeanParams{ - Epsilon: 1.0, - Delta: 1e-5, - MaxPartitionsContributed: 1, - MinValue: -5.0, - MaxValue: 5.0, - }, - noiseKind: noise.LaplaceNoise, - partitionType: nil, - wantErr: true, - }, - { - desc: "zero MaxPartitionsContributed", - params: MeanParams{ - Epsilon: 1.0, - Delta: 1e-5, - MaxContributionsPerPartition: 1, MinValue: -5.0, MaxValue: 5.0, }, @@ -1581,98 +1487,10 @@ func TestCheckMeanPerKeyParams(t *testing.T) { wantErr: true, }, { - desc: "non-zero delta w/ public partitions & Laplace", - params: MeanParams{ - Epsilon: 1.0, - Delta: 1e-5, - MaxPartitionsContributed: 1, - MaxContributionsPerPartition: 1, - MinValue: -5.0, - MaxValue: 5.0, - PublicPartitions: publicPartitions, - }, - noiseKind: noise.LaplaceNoise, - partitionType: reflect.TypeOf(0), - wantErr: true, - }, - { - desc: "wrong partition type w/ public partitions as beam.PCollection", - params: MeanParams{ - Epsilon: 1.0, - MaxPartitionsContributed: 1, - MaxContributionsPerPartition: 1, - MinValue: -5.0, - MaxValue: 5.0, - PublicPartitions: publicPartitions, - }, - noiseKind: noise.LaplaceNoise, - partitionType: reflect.TypeOf(""), - wantErr: true, - }, - { - desc: "wrong partition type w/ public partitions as slice", - params: MeanParams{ - Epsilon: 1.0, - MaxPartitionsContributed: 1, - MaxContributionsPerPartition: 1, - MinValue: -5.0, - MaxValue: 5.0, - PublicPartitions: []int{0}, - }, - noiseKind: noise.LaplaceNoise, - partitionType: reflect.TypeOf(""), - wantErr: true, - }, - { - desc: "wrong partition type w/ public partitions as array", - params: MeanParams{ - Epsilon: 1.0, - MaxPartitionsContributed: 1, - MaxContributionsPerPartition: 1, - MinValue: -5.0, - MaxValue: 5.0, - PublicPartitions: [1]int{0}, - }, - noiseKind: noise.LaplaceNoise, - partitionType: reflect.TypeOf(""), - wantErr: true, - }, - { - desc: "public partitions as something other than beam.PCollection, slice or array", - params: MeanParams{ - Epsilon: 1.0, - MaxPartitionsContributed: 1, - MaxContributionsPerPartition: 1, - MinValue: -5.0, - MaxValue: 5.0, - PublicPartitions: "", - }, - noiseKind: noise.LaplaceNoise, - partitionType: reflect.TypeOf(""), - wantErr: true, - }, - // Test cases for the new privacy budget API. - { - desc: "new API, valid parameters", + desc: "zero partitionSelectionDelta w/o public partitions", params: MeanParams{ AggregationEpsilon: 1.0, - PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 1e-5}, - MaxPartitionsContributed: 1, - MaxContributionsPerPartition: 1, - MinValue: -5.0, - MaxValue: 5.0, - }, - usesNewPrivacyBudgetAPI: true, - noiseKind: noise.LaplaceNoise, - partitionType: nil, - wantErr: false, - }, - { - desc: "new API, PartitionSelectionParams.MaxPartitionsContributed set", - usesNewPrivacyBudgetAPI: true, - params: MeanParams{ - AggregationEpsilon: 1.0, - PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 1e-5, MaxPartitionsContributed: 1}, + PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 0}, MaxPartitionsContributed: 1, MaxContributionsPerPartition: 1, MinValue: -5.0, @@ -1683,52 +1501,7 @@ func TestCheckMeanPerKeyParams(t *testing.T) { wantErr: true, }, { - desc: "new API, negative aggregationEpsilon", - params: MeanParams{ - AggregationEpsilon: -1.0, - PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 1e-5}, - MaxPartitionsContributed: 1, - MaxContributionsPerPartition: 1, - MinValue: -5.0, - MaxValue: 5.0, - }, - usesNewPrivacyBudgetAPI: true, - noiseKind: noise.LaplaceNoise, - partitionType: nil, - wantErr: true, - }, - { - desc: "new API, negative partitionSelectionEpsilon", - params: MeanParams{ - AggregationEpsilon: 1.0, - PartitionSelectionParams: PartitionSelectionParams{Epsilon: -1.0, Delta: 1e-5}, - MaxPartitionsContributed: 1, - MaxContributionsPerPartition: 1, - MinValue: -5.0, - MaxValue: 5.0, - }, - usesNewPrivacyBudgetAPI: true, - noiseKind: noise.LaplaceNoise, - partitionType: nil, - wantErr: true, - }, - { - desc: "new API, zero partitionSelectionDelta w/o public partitions", - params: MeanParams{ - AggregationEpsilon: 1.0, - PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 0}, - MaxPartitionsContributed: 1, - MaxContributionsPerPartition: 1, - MinValue: -5.0, - MaxValue: 5.0, - }, - usesNewPrivacyBudgetAPI: true, - noiseKind: noise.LaplaceNoise, - partitionType: nil, - wantErr: true, - }, - { - desc: "new API, zero partitionSelectionEpsilon w/o public partitions", + desc: "zero partitionSelectionEpsilon w/o public partitions", params: MeanParams{ AggregationEpsilon: 1.0, PartitionSelectionParams: PartitionSelectionParams{Epsilon: 0, Delta: 1e-5}, @@ -1737,13 +1510,12 @@ func TestCheckMeanPerKeyParams(t *testing.T) { MinValue: -5.0, MaxValue: 5.0, }, - usesNewPrivacyBudgetAPI: true, - noiseKind: noise.LaplaceNoise, - partitionType: nil, - wantErr: true, + noiseKind: noise.LaplaceNoise, + partitionType: nil, + wantErr: true, }, { - desc: "new API, MaxValue < MinValue", + desc: "MaxValue < MinValue", params: MeanParams{ AggregationEpsilon: 1.0, PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 1e-5}, @@ -1752,13 +1524,12 @@ func TestCheckMeanPerKeyParams(t *testing.T) { MinValue: 6.0, MaxValue: 5.0, }, - usesNewPrivacyBudgetAPI: true, - noiseKind: noise.LaplaceNoise, - partitionType: nil, - wantErr: true, + noiseKind: noise.LaplaceNoise, + partitionType: nil, + wantErr: true, }, { - desc: "new API, MaxValue = MinValue", + desc: "MaxValue = MinValue", params: MeanParams{ AggregationEpsilon: 1.0, PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 1e-5}, @@ -1767,13 +1538,12 @@ func TestCheckMeanPerKeyParams(t *testing.T) { MinValue: 5.0, MaxValue: 5.0, }, - usesNewPrivacyBudgetAPI: true, - noiseKind: noise.LaplaceNoise, - partitionType: nil, - wantErr: true, + noiseKind: noise.LaplaceNoise, + partitionType: nil, + wantErr: true, }, { - desc: "new API, zero MaxContributionsPerPartition", + desc: "zero MaxContributionsPerPartition", params: MeanParams{ AggregationEpsilon: 1.0, PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 1e-5}, @@ -1781,13 +1551,12 @@ func TestCheckMeanPerKeyParams(t *testing.T) { MinValue: -5.0, MaxValue: 5.0, }, - usesNewPrivacyBudgetAPI: true, - noiseKind: noise.LaplaceNoise, - partitionType: nil, - wantErr: true, + noiseKind: noise.LaplaceNoise, + partitionType: nil, + wantErr: true, }, { - desc: "new API, zero MaxPartitionsContributed", + desc: "zero MaxPartitionsContributed", params: MeanParams{ AggregationEpsilon: 1.0, PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 1e-5}, @@ -1795,13 +1564,12 @@ func TestCheckMeanPerKeyParams(t *testing.T) { MinValue: -5.0, MaxValue: 5.0, }, - usesNewPrivacyBudgetAPI: true, - noiseKind: noise.LaplaceNoise, - partitionType: nil, - wantErr: true, + noiseKind: noise.LaplaceNoise, + partitionType: nil, + wantErr: true, }, { - desc: "new API, non-zero partitionSelectionDelta w/ public partitions", + desc: "non-zero partitionSelectionDelta w/ public partitions", params: MeanParams{ AggregationEpsilon: 1.0, PartitionSelectionParams: PartitionSelectionParams{Epsilon: 0, Delta: 1e-5}, @@ -1811,13 +1579,12 @@ func TestCheckMeanPerKeyParams(t *testing.T) { MaxValue: 5.0, PublicPartitions: publicPartitions, }, - usesNewPrivacyBudgetAPI: true, - noiseKind: noise.LaplaceNoise, - partitionType: reflect.TypeOf(0), - wantErr: true, + noiseKind: noise.LaplaceNoise, + partitionType: reflect.TypeOf(0), + wantErr: true, }, { - desc: "new API, non-zero partitionSelectionEpsilon w/ public partitions", + desc: "non-zero partitionSelectionEpsilon w/ public partitions", params: MeanParams{ AggregationEpsilon: 1.0, PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 0}, @@ -1827,13 +1594,12 @@ func TestCheckMeanPerKeyParams(t *testing.T) { MaxValue: 5.0, PublicPartitions: publicPartitions, }, - usesNewPrivacyBudgetAPI: true, - noiseKind: noise.LaplaceNoise, - partitionType: reflect.TypeOf(0), - wantErr: true, + noiseKind: noise.LaplaceNoise, + partitionType: reflect.TypeOf(0), + wantErr: true, }, { - desc: "new API, wrong partition type w/ public partitions as beam.PCollection", + desc: "wrong partition type w/ public partitions as beam.PCollection", params: MeanParams{ AggregationEpsilon: 1.0, MaxPartitionsContributed: 1, @@ -1842,13 +1608,12 @@ func TestCheckMeanPerKeyParams(t *testing.T) { MaxValue: 5.0, PublicPartitions: publicPartitions, }, - usesNewPrivacyBudgetAPI: true, - noiseKind: noise.LaplaceNoise, - partitionType: reflect.TypeOf(""), - wantErr: true, + noiseKind: noise.LaplaceNoise, + partitionType: reflect.TypeOf(""), + wantErr: true, }, { - desc: "new API, wrong partition type w/ public partitions as slice", + desc: "wrong partition type w/ public partitions as slice", params: MeanParams{ AggregationEpsilon: 1.0, MaxPartitionsContributed: 1, @@ -1857,13 +1622,12 @@ func TestCheckMeanPerKeyParams(t *testing.T) { MaxValue: 5.0, PublicPartitions: []int{0}, }, - usesNewPrivacyBudgetAPI: true, - noiseKind: noise.LaplaceNoise, - partitionType: reflect.TypeOf(""), - wantErr: true, + noiseKind: noise.LaplaceNoise, + partitionType: reflect.TypeOf(""), + wantErr: true, }, { - desc: "new API, wrong partition type w/ public partitions as array", + desc: "wrong partition type w/ public partitions as array", params: MeanParams{ AggregationEpsilon: 1.0, MaxPartitionsContributed: 1, @@ -1872,13 +1636,12 @@ func TestCheckMeanPerKeyParams(t *testing.T) { MaxValue: 5.0, PublicPartitions: [1]int{0}, }, - usesNewPrivacyBudgetAPI: true, - noiseKind: noise.LaplaceNoise, - partitionType: reflect.TypeOf(""), - wantErr: true, + noiseKind: noise.LaplaceNoise, + partitionType: reflect.TypeOf(""), + wantErr: true, }, { - desc: "new API, public partitions as something other than beam.PCollection, slice or array", + desc: "public partitions as something other than beam.PCollection, slice or array", params: MeanParams{ AggregationEpsilon: 1.0, MaxPartitionsContributed: 1, @@ -1887,13 +1650,12 @@ func TestCheckMeanPerKeyParams(t *testing.T) { MaxValue: 5.0, PublicPartitions: "", }, - usesNewPrivacyBudgetAPI: true, - noiseKind: noise.LaplaceNoise, - partitionType: reflect.TypeOf(""), - wantErr: true, + noiseKind: noise.LaplaceNoise, + partitionType: reflect.TypeOf(""), + wantErr: true, }, } { - if err := checkMeanPerKeyParams(tc.params, tc.usesNewPrivacyBudgetAPI, tc.noiseKind, tc.partitionType); (err != nil) != tc.wantErr { + if err := checkMeanPerKeyParams(tc.params, tc.noiseKind, tc.partitionType); (err != nil) != tc.wantErr { t.Errorf("With %s, got=%v, wantErr=%t", tc.desc, err, tc.wantErr) } } diff --git a/privacy-on-beam/pbeam/pbeam.go b/privacy-on-beam/pbeam/pbeam.go index 564d3e50..97859fe7 100644 --- a/privacy-on-beam/pbeam/pbeam.go +++ b/privacy-on-beam/pbeam/pbeam.go @@ -45,7 +45,7 @@ // icol := beam.ParDo(s, input, extractID) // icol is a PCollection // // Transforms the input PCollection into a PrivatePCollection with parameters ε=1 and δ=10⁻¹⁰. // // The privacy ID is "hidden" by the operation: pcol behaves as if it were a PCollection. -// spec, err := pbeam.NewPrivacySpecTemp(pbeam.PrivacySpecParams{ +// spec, err := pbeam.NewPrivacySpec(pbeam.PrivacySpecParams{ // AggregationEpsilon: 0.5, // PartitionSelectionEpsilon: 0.5, // PartitionSelectionDelta: 1e-10, @@ -123,7 +123,6 @@ import ( "github.com/google/differential-privacy/go/v2/checks" "github.com/google/differential-privacy/go/v2/noise" "github.com/google/differential-privacy/privacy-on-beam/v2/internal/kv" - "github.com/google/differential-privacy/privacy-on-beam/v2/internal/testoption" "github.com/apache/beam/sdks/v2/go/pkg/beam" "github.com/apache/beam/sdks/v2/go/pkg/beam/core/typex" "github.com/apache/beam/sdks/v2/go/pkg/beam/register" @@ -142,15 +141,9 @@ func init() { // a PrivatePCollection. It encapsulates a privacy budget that must be shared // between all aggregations on PrivatePCollections using this PrivacySpec. If // you have multiple pipelines in the same binary, and want them to use -// different privacy budgets, call NewPrivacySpecTemp multiple times and give a +// different privacy budgets, call NewPrivacySpec multiple times and give a // different PrivacySpec to each PrivatePCollection. type PrivacySpec struct { - budget *privacyBudget // Epsilon/Delta (ε,δ) budget available for this PrivatePCollection. - // Whether this PrivacySpec uses the new Privacy Budget API, i.e. aggregationBudget & partitionSelectionBudget - // as opposed to the old `budget`. - // - // TODO: Remove after migration is finalized. - usesNewPrivacyBudgetAPI bool aggregationBudget *privacyBudget // Epsilon/Delta (ε,δ) budget available for aggregations performed on this PrivatePCollection. partitionSelectionBudget *privacyBudget // Epsilon/Delta (ε,δ) budget available for partition selections performed on this PrivatePCollection. preThreshold int64 // Pre-threshold K applied on top of DP partition selection. @@ -210,52 +203,6 @@ type PrivacySpecParams struct { // TestModeWithoutContributionBounding if you want to enable test mode. TestMode TestMode } - -// NewPrivacySpecTemp returns a PrivacySpec from given PrivacySpecParams. This is a temporary -// constructor. Will be migrated to NewPrivacySpec once all clients are migrated to this temporary -// constructor. -// -// Uses the new privacy budget API where clients specify aggregation budget and partition selection budget separately. -func NewPrivacySpecTemp(params PrivacySpecParams) (*PrivacySpec, error) { - - err := checks.CheckEpsilon(params.AggregationEpsilon) - if err != nil { - return nil, fmt.Errorf("AggregationEpsilon: %v", err) - } - err = checks.CheckDelta(params.AggregationDelta) - if err != nil { - return nil, fmt.Errorf("AggregationDelta: %v", err) - } - err = checks.CheckEpsilon(params.PartitionSelectionEpsilon) - if err != nil { - return nil, fmt.Errorf("PartitionSelectionEpsilon: %v", err) - } - err = checks.CheckDelta(params.PartitionSelectionDelta) - if err != nil { - return nil, fmt.Errorf("PartitionSelectionDelta: %v", err) - } - err = checks.CheckPreThreshold(params.PreThreshold) - if params.PreThreshold > 0 && params.PartitionSelectionDelta == 0 { - return nil, fmt.Errorf("when PreThreshold is set, partition selection budget must also be set") - } - if err != nil { - return nil, fmt.Errorf("PreThreshold: %v", err) - } - if params.AggregationEpsilon == 0 && params.PartitionSelectionEpsilon == 0 { - return nil, fmt.Errorf("either AggregationEpsilon or PartitionSelectionEpsilon must be set to a positive value") - } - if params.PartitionSelectionEpsilon != 0 && params.PartitionSelectionDelta == 0 { - return nil, fmt.Errorf("PartitionSelectionDelta must be set to a positive value whenever PartitionSelectionEpsilon (%f) is set", params.PartitionSelectionEpsilon) - } - return &PrivacySpec{ - usesNewPrivacyBudgetAPI: true, - aggregationBudget: &privacyBudget{epsilon: params.AggregationEpsilon, delta: params.AggregationDelta}, - partitionSelectionBudget: &privacyBudget{epsilon: params.PartitionSelectionEpsilon, delta: params.PartitionSelectionDelta}, - preThreshold: params.PreThreshold, - testMode: params.TestMode, - }, nil -} - type privacyBudget struct { // Epsilon/Delta (ε,δ) budget available. epsilon, delta float64 @@ -335,20 +282,6 @@ func budgetSlightlyTooLarge(remaining, requested float64) bool { return math.Abs(diff) <= remaining/eqBudgetRelTol } -// PrivacySpecOption is used for customizing PrivacySpecs. In the typical use -// case, PrivacySpecOptions are passed into the NewPrivacySpec constructor to -// create a further customized PrivacySpec. -type PrivacySpecOption any - -func evaluatePrivacySpecOption(opt PrivacySpecOption, spec *PrivacySpec) { - switch opt { - case testoption.EnableNoNoiseWithContributionBounding{}: - spec.testMode = TestModeWithContributionBounding - case testoption.EnableNoNoiseWithoutContributionBounding{}: - spec.testMode = TestModeWithoutContributionBounding - } -} - // NoiseKind represents the kind of noise to be used in an aggregations. type NoiseKind interface { toNoiseKind() noise.Kind @@ -369,20 +302,50 @@ func (ln LaplaceNoise) toNoiseKind() noise.Kind { } // NewPrivacySpec creates a new PrivacySpec with the specified privacy budget -// and options. +// and parameters. // -// The epsilon and delta arguments are the total (ε,δ)-differential privacy -// budget for the pipeline. If there is only one aggregation, the entire budget -// will be used for this aggregation. Otherwise, the user must specify how the -// privacy budget is split across aggregations. -// -// Deprecated: Use NewPrivacySpecTemp instead. -func NewPrivacySpec(epsilon, delta float64, options ...PrivacySpecOption) *PrivacySpec { - ps := &PrivacySpec{budget: &privacyBudget{epsilon: epsilon, delta: delta}} - for _, opt := range options { - evaluatePrivacySpecOption(opt, ps) +// Aggregation(Epsilon|Delta) and PartitionSelection(Epsilon|Delta) are the total +// (ε,δ)-differential privacy budget for the pipeline. If there is only one aggregation +// or partition selection, the entire budget will be used for this operation. Otherwise, +// the user must specify how the privacy budget is split across aggregations. +func NewPrivacySpec(params PrivacySpecParams) (*PrivacySpec, error) { + + err := checks.CheckEpsilon(params.AggregationEpsilon) + if err != nil { + return nil, fmt.Errorf("AggregationEpsilon: %v", err) + } + err = checks.CheckDelta(params.AggregationDelta) + if err != nil { + return nil, fmt.Errorf("AggregationDelta: %v", err) + } + err = checks.CheckEpsilon(params.PartitionSelectionEpsilon) + if err != nil { + return nil, fmt.Errorf("PartitionSelectionEpsilon: %v", err) + } + err = checks.CheckDelta(params.PartitionSelectionDelta) + if err != nil { + return nil, fmt.Errorf("PartitionSelectionDelta: %v", err) + } + if params.PreThreshold > 0 && params.PartitionSelectionDelta == 0 { + return nil, fmt.Errorf("when PreThreshold is set, partition selection budget must also be set") + } + err = checks.CheckPreThreshold(params.PreThreshold) + if err != nil { + return nil, fmt.Errorf("PreThreshold: %v", err) + } + if params.AggregationEpsilon == 0 && params.PartitionSelectionEpsilon == 0 { + return nil, fmt.Errorf("either AggregationEpsilon or PartitionSelectionEpsilon must be set to a positive value") } - return ps + if params.PartitionSelectionEpsilon != 0 && params.PartitionSelectionDelta == 0 { + return nil, fmt.Errorf("PartitionSelectionDelta must be set to a positive value whenever PartitionSelectionEpsilon is set. "+ + "PartitionSelectionEpsilon is currently set to (%f)", params.PartitionSelectionEpsilon) + } + return &PrivacySpec{ + aggregationBudget: &privacyBudget{epsilon: params.AggregationEpsilon, delta: params.AggregationDelta}, + partitionSelectionBudget: &privacyBudget{epsilon: params.PartitionSelectionEpsilon, delta: params.PartitionSelectionDelta}, + preThreshold: params.PreThreshold, + testMode: params.TestMode, + }, nil } // A PrivatePCollection embeds a PCollection, associating each element to a diff --git a/privacy-on-beam/pbeam/pbeam_main_test.go b/privacy-on-beam/pbeam/pbeam_main_test.go index b368c202..256c8d11 100644 --- a/privacy-on-beam/pbeam/pbeam_main_test.go +++ b/privacy-on-beam/pbeam/pbeam_main_test.go @@ -31,7 +31,7 @@ var gaussianNoise = GaussianNoise{} // Helper function to create a PrivacySpec that deals with error handling. func privacySpec(t *testing.T, params PrivacySpecParams) *PrivacySpec { t.Helper() - spec, err := NewPrivacySpecTemp(params) + spec, err := NewPrivacySpec(params) if err != nil { t.Fatalf("Failed to create PrivacySpec") } diff --git a/privacy-on-beam/pbeam/pbeam_test.go b/privacy-on-beam/pbeam/pbeam_test.go index 499cd988..0649ab2a 100644 --- a/privacy-on-beam/pbeam/pbeam_test.go +++ b/privacy-on-beam/pbeam/pbeam_test.go @@ -39,7 +39,7 @@ func init() { register.Function1x2[int, int, int](addZeroIntValueFn) } -func TestNewPrivacySpecTemp(t *testing.T) { +func TestNewPrivacySpec(t *testing.T) { for _, tc := range []struct { desc string params PrivacySpecParams @@ -76,6 +76,14 @@ func TestNewPrivacySpecTemp(t *testing.T) { PrivacySpecParams{}, true, }, + { + "negative PartitionSelectionEpsilon", + PrivacySpecParams{ + PartitionSelectionEpsilon: -1, + PartitionSelectionDelta: 1e-5, + }, + true, + }, { "negative PreThreshold", PrivacySpecParams{ @@ -86,7 +94,7 @@ func TestNewPrivacySpecTemp(t *testing.T) { true, }, { - "Partition selection budget is not set when PreThreshold is set", + "partition selection budget is not set when PreThreshold is set", PrivacySpecParams{ AggregationEpsilon: 1.0, PreThreshold: 1, @@ -147,7 +155,7 @@ func TestNewPrivacySpecTemp(t *testing.T) { true, }, } { - if _, err := NewPrivacySpecTemp(tc.params); (err != nil) != tc.wantErr { + if _, err := NewPrivacySpec(tc.params); (err != nil) != tc.wantErr { t.Errorf("With %s, got=%v, wantErr=%t", tc.desc, err, tc.wantErr) } } diff --git a/privacy-on-beam/pbeam/pbeamtest/BUILD.bazel b/privacy-on-beam/pbeam/pbeamtest/BUILD.bazel index b92baaa8..d5a8d52f 100644 --- a/privacy-on-beam/pbeam/pbeamtest/BUILD.bazel +++ b/privacy-on-beam/pbeam/pbeamtest/BUILD.bazel @@ -14,8 +14,8 @@ # limitations under the License. # -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") load("@bazel_gazelle//:def.bzl", "gazelle") +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") # gazelle:prefix github.com/google/differential-privacy/privacy-on-beam/v2/pbeam/pbeamtest gazelle(name = "gazelle") @@ -27,8 +27,6 @@ go_library( importpath = "github.com/google/differential-privacy/privacy-on-beam/v2/pbeam/pbeamtest", visibility = ["//visibility:public"], deps = [ - "//internal/testoption:go_default_library", - "//pbeam:go_default_library", "@com_github_google_differential_privacy_go_v2//dpagg:go_default_library", ], ) diff --git a/privacy-on-beam/pbeam/pbeamtest/pbeamtest.go b/privacy-on-beam/pbeam/pbeamtest/pbeamtest.go index ac668ab0..9d3531f5 100644 --- a/privacy-on-beam/pbeam/pbeamtest/pbeamtest.go +++ b/privacy-on-beam/pbeam/pbeamtest/pbeamtest.go @@ -22,46 +22,8 @@ import ( "math" "github.com/google/differential-privacy/go/v2/dpagg" - "github.com/google/differential-privacy/privacy-on-beam/v2/internal/testoption" - "github.com/google/differential-privacy/privacy-on-beam/v2/pbeam" ) -// NewPrivacySpecNoNoiseWithContributionBounding creates a new PrivacySpec with -// the specified privacy budget that adds no noise and keeps all partitions but -// still does contribution bounding. -// -// The epsilon and delta arguments are the total (ε,δ)-differential privacy -// budget for the pipeline. If there is only one aggregation, the entire budget -// will be used for this aggregation. Otherwise, the user must specify how the -// privacy budget is split across aggregations. -// -// This does NOT provide any privacy protections, so should only be used in -// test code in order to avoid dealing with random noise. -// -// Deprecated: Use pbeam.NewPrivacySpecTemp() with TestMode: TestModeWithContributionBounding instead. -func NewPrivacySpecNoNoiseWithContributionBounding(epsilon, delta float64) *pbeam.PrivacySpec { - return pbeam.NewPrivacySpec(epsilon, delta, testoption.EnableNoNoiseWithContributionBounding{}) -} - -// NewPrivacySpecNoNoiseWithoutContributionBounding creates a new PrivacySpec with -// the specified privacy budget that adds no noise, keeps all partitions and disables -// contribution bounding (both per-partition and cross-partition). The contribution -// bounding parameters should still be specified for all the pbeam aggregations -// in the pipeline since they will be validated. -// -// The epsilon and delta arguments are the total (ε,δ)-differential privacy -// budget for the pipeline. If there is only one aggregation, the entire budget -// will be used for this aggregation. Otherwise, the user must specify how the -// privacy budget is split across aggregations. -// -// This does NOT provide any privacy protections, so should only be used in -// test code in order to avoid dealing with random noise. -// -// Deprecated: Use pbeam.NewPrivacySpecTemp() with TestMode: TestModeWithoutContributionBounding instead. -func NewPrivacySpecNoNoiseWithoutContributionBounding(epsilon, delta float64) *pbeam.PrivacySpec { - return pbeam.NewPrivacySpec(epsilon, delta, testoption.EnableNoNoiseWithoutContributionBounding{}) -} - // QuantilesTolerance returns a tolerance t such that the output of QuantilesPerKey is // within t of the exact result for given MinValue and MaxValue parameters of // QuantilesParams when pbeamtest is used. diff --git a/privacy-on-beam/pbeam/pbeamtest/pbeamtest_test.go b/privacy-on-beam/pbeam/pbeamtest/pbeamtest_test.go index 8ae2bca4..6ed5ea54 100644 --- a/privacy-on-beam/pbeam/pbeamtest/pbeamtest_test.go +++ b/privacy-on-beam/pbeam/pbeamtest/pbeamtest_test.go @@ -45,12 +45,12 @@ func privacySpec(t *testing.T, testMode pbeam.TestMode, publicPartitions bool) * var spec *pbeam.PrivacySpec var err error if publicPartitions { - spec, err = pbeam.NewPrivacySpecTemp(pbeam.PrivacySpecParams{ + spec, err = pbeam.NewPrivacySpec(pbeam.PrivacySpecParams{ AggregationEpsilon: tinyEpsilon, TestMode: testMode, }) } else { - spec, err = pbeam.NewPrivacySpecTemp(pbeam.PrivacySpecParams{ + spec, err = pbeam.NewPrivacySpec(pbeam.PrivacySpecParams{ AggregationEpsilon: tinyEpsilon / 2, PartitionSelectionEpsilon: tinyEpsilon / 2, PartitionSelectionDelta: tinyDelta, @@ -58,7 +58,7 @@ func privacySpec(t *testing.T, testMode pbeam.TestMode, publicPartitions bool) * }) } if err != nil { - t.Fatalf("NewPrivacySpecTemp: %v", err) + t.Fatalf("Couldn't create PrivacySpec: %v", err) } return spec } diff --git a/privacy-on-beam/pbeam/quantiles.go b/privacy-on-beam/pbeam/quantiles.go index 084992c6..bb65e00b 100644 --- a/privacy-on-beam/pbeam/quantiles.go +++ b/privacy-on-beam/pbeam/quantiles.go @@ -166,21 +166,15 @@ func QuantilesPerKey(s beam.Scope, pcol PrivatePCollection, params QuantilesPara // Get privacy parameters. spec := pcol.privacySpec var err error - if spec.usesNewPrivacyBudgetAPI { - params.AggregationEpsilon, params.AggregationDelta, err = spec.aggregationBudget.get(params.AggregationEpsilon, params.AggregationDelta) - if err != nil { - log.Fatalf("Couldn't consume aggregation budget for Quantiles: %v", err) - } - if params.PublicPartitions == nil { - params.PartitionSelectionParams.Epsilon, params.PartitionSelectionParams.Delta, err = spec.partitionSelectionBudget.get(params.PartitionSelectionParams.Epsilon, params.PartitionSelectionParams.Delta) - if err != nil { - log.Fatalf("Couldn't consume partition selection budget for Quantiles: %v", err) - } - } - } else { - params.Epsilon, params.Delta, err = spec.budget.get(params.Epsilon, params.Delta) + + params.AggregationEpsilon, params.AggregationDelta, err = spec.aggregationBudget.get(params.AggregationEpsilon, params.AggregationDelta) + if err != nil { + log.Fatalf("Couldn't consume aggregation budget for Quantiles: %v", err) + } + if params.PublicPartitions == nil { + params.PartitionSelectionParams.Epsilon, params.PartitionSelectionParams.Delta, err = spec.partitionSelectionBudget.get(params.PartitionSelectionParams.Epsilon, params.PartitionSelectionParams.Delta) if err != nil { - log.Fatalf("Couldn't consume budget for Quantiles: %v", err) + log.Fatalf("Couldn't consume partition selection budget for Quantiles: %v", err) } } @@ -192,7 +186,7 @@ func QuantilesPerKey(s beam.Scope, pcol PrivatePCollection, params QuantilesPara noiseKind = params.NoiseKind.toNoiseKind() } - err = checkQuantilesPerKeyParams(params, spec.usesNewPrivacyBudgetAPI, noiseKind, pcol.codec.KType.T) + err = checkQuantilesPerKeyParams(params, noiseKind, pcol.codec.KType.T) if err != nil { log.Fatalf("pbeam.QuantilesPerKey: %v", err) } @@ -248,19 +242,10 @@ func QuantilesPerKey(s beam.Scope, pcol PrivatePCollection, params QuantilesPara var result beam.PCollection // Add public partitions and return the aggregation output, if public partitions are specified. if params.PublicPartitions != nil { - if spec.usesNewPrivacyBudgetAPI { - result = addPublicPartitionsForQuantiles(s, *spec, params, noiseKind, partialKV) - } else { - result = addPublicPartitionsForQuantiles(s, *spec, params, noiseKind, partialKV) - } + result = addPublicPartitionsForQuantiles(s, *spec, params, noiseKind, partialKV) } else { // Compute the quantiles for each partition. Result is PCollection. - var boundedQuantilesFn *boundedQuantilesFn - if spec.usesNewPrivacyBudgetAPI { - boundedQuantilesFn, err = newBoundedQuantilesFnTemp(*spec, params, noiseKind, false) - } else { - boundedQuantilesFn, err = newBoundedQuantilesFn(params, noiseKind, false, spec.testMode) - } + boundedQuantilesFn, err := newBoundedQuantilesFn(*spec, params, noiseKind, false) if err != nil { log.Fatalf("Couldn't get boundedQuantilesFn for QuantilesPerKey: %v", err) } @@ -270,6 +255,7 @@ func QuantilesPerKey(s beam.Scope, pcol PrivatePCollection, params QuantilesPara // Finally, drop thresholded partitions. result = beam.ParDo(s, dropThresholdedPartitionsFloat64Slice, quantiles) } + return result } @@ -282,13 +268,7 @@ func addPublicPartitionsForQuantiles(s beam.Scope, spec PrivacySpec, params Quan } emptyPublicPartitions := beam.ParDo(s, addEmptySliceToPublicPartitionsFloat64, publicPartitions) // Second, add noise to all public partitions (all of which are empty-valued). - var boundedQuantilesFn *boundedQuantilesFn - var err error - if spec.usesNewPrivacyBudgetAPI { - boundedQuantilesFn, err = newBoundedQuantilesFnTemp(spec, params, noiseKind, true) - } else { - boundedQuantilesFn, err = newBoundedQuantilesFn(params, noiseKind, true, spec.testMode) - } + boundedQuantilesFn, err := newBoundedQuantilesFn(spec, params, noiseKind, true) if err != nil { log.Fatalf("Couldn't get boundedMeanFn for MeanPerKey: %v", err) } @@ -300,41 +280,30 @@ func addPublicPartitionsForQuantiles(s beam.Scope, spec PrivacySpec, params Quan return beam.ParDo(s, mergeResultWithEmptyPublicPartitionsFn, noisyQuantilesWithEmptyPublicPartitions) } -func checkQuantilesPerKeyParams(params QuantilesParams, usesNewPrivacyBudgetAPI bool, noiseKind noise.Kind, partitionType reflect.Type) error { +func checkQuantilesPerKeyParams(params QuantilesParams, noiseKind noise.Kind, partitionType reflect.Type) error { err := checkPublicPartitions(params.PublicPartitions, partitionType) if err != nil { return err } - if usesNewPrivacyBudgetAPI { - err = checks.CheckEpsilon(params.AggregationEpsilon) - if err != nil { - return err - } - err = checkAggregationDelta(params.AggregationDelta, noiseKind) - if err != nil { - return err - } - err = checkPartitionSelectionEpsilon(params.PartitionSelectionParams.Epsilon, params.PublicPartitions) - if err != nil { - return err - } - err = checkPartitionSelectionDelta(params.PartitionSelectionParams.Delta, params.PublicPartitions) - if err != nil { - return err - } - err = checkMaxPartitionsContributedPartitionSelection(params.PartitionSelectionParams.MaxPartitionsContributed) - if err != nil { - return err - } - } else { - err = checks.CheckEpsilon(params.Epsilon) - if err != nil { - return err - } - err = checkDelta(params.Delta, noiseKind, params.PublicPartitions) - if err != nil { - return err - } + err = checks.CheckEpsilon(params.AggregationEpsilon) + if err != nil { + return err + } + err = checkAggregationDelta(params.AggregationDelta, noiseKind) + if err != nil { + return err + } + err = checkPartitionSelectionEpsilon(params.PartitionSelectionParams.Epsilon, params.PublicPartitions) + if err != nil { + return err + } + err = checkPartitionSelectionDelta(params.PartitionSelectionParams.Delta, params.PublicPartitions) + if err != nil { + return err + } + err = checkMaxPartitionsContributedPartitionSelection(params.PartitionSelectionParams.MaxPartitionsContributed) + if err != nil { + return err } err = checks.CheckBoundsFloat64(params.MinValue, params.MaxValue) if err != nil { @@ -385,41 +354,8 @@ type boundedQuantilesFn struct { TestMode TestMode } -// newBoundedQuantilesFn returns a boundedQuantilesFn with the given budget and parameters. -func newBoundedQuantilesFn(params QuantilesParams, noiseKind noise.Kind, publicPartitions bool, testMode TestMode) (*boundedQuantilesFn, error) { - fn := &boundedQuantilesFn{ - MaxPartitionsContributed: params.MaxPartitionsContributed, - MaxContributionsPerPartition: params.MaxContributionsPerPartition, - Lower: params.MinValue, - Upper: params.MaxValue, - Ranks: params.Ranks, - NoiseKind: noiseKind, - PublicPartitions: publicPartitions, - TestMode: testMode, - } - if fn.PublicPartitions { - fn.NoiseEpsilon = params.Epsilon - fn.NoiseDelta = params.Delta - return fn, nil - } - fn.NoiseEpsilon = params.Epsilon / 2 - fn.PartitionSelectionEpsilon = params.Epsilon - fn.NoiseEpsilon - switch noiseKind { - case noise.GaussianNoise: - fn.NoiseDelta = params.Delta / 2 - case noise.LaplaceNoise: - fn.NoiseDelta = 0 - default: - return nil, fmt.Errorf("unknown noise.Kind (%v) is specified. Please specify a valid noise", noiseKind) - } - fn.PartitionSelectionDelta = params.Delta - fn.NoiseDelta - return fn, nil -} - // newBoundedQuantilesFnTemp returns a boundedQuantilesFn with the given budget and parameters. -// -// Uses the new privacy budget API. -func newBoundedQuantilesFnTemp(spec PrivacySpec, params QuantilesParams, noiseKind noise.Kind, publicPartitions bool) (*boundedQuantilesFn, error) { +func newBoundedQuantilesFn(spec PrivacySpec, params QuantilesParams, noiseKind noise.Kind, publicPartitions bool) (*boundedQuantilesFn, error) { if noiseKind != noise.GaussianNoise && noiseKind != noise.LaplaceNoise { return nil, fmt.Errorf("unknown noise.Kind (%v) is specified. Please specify a valid noise", noiseKind) } diff --git a/privacy-on-beam/pbeam/quantiles_test.go b/privacy-on-beam/pbeam/quantiles_test.go index c68c7826..c615c552 100644 --- a/privacy-on-beam/pbeam/quantiles_test.go +++ b/privacy-on-beam/pbeam/quantiles_test.go @@ -30,64 +30,9 @@ import ( "github.com/google/go-cmp/cmp/cmpopts" ) -func TestNewBoundedQuantilesFn(t *testing.T) { - opts := []cmp.Option{ - cmpopts.EquateApprox(0, 1e-10), - cmpopts.IgnoreUnexported(boundedQuantilesFn{}), - } - for _, tc := range []struct { - desc string - noiseKind noise.Kind - want any - }{ - {"Laplace noise kind", noise.LaplaceNoise, - &boundedQuantilesFn{ - NoiseEpsilon: 0.5, - PartitionSelectionEpsilon: 0.5, - NoiseDelta: 0, - PartitionSelectionDelta: 1e-5, - MaxPartitionsContributed: 17, - MaxContributionsPerPartition: 5, - Lower: 0, - Upper: 10, - Ranks: []float64{0.1, 0.5, 0.9}, - NoiseKind: noise.LaplaceNoise, - }}, - {"Gaussian noise kind", noise.GaussianNoise, - &boundedQuantilesFn{ - NoiseEpsilon: 0.5, - PartitionSelectionEpsilon: 0.5, - NoiseDelta: 5e-6, - PartitionSelectionDelta: 5e-6, - MaxPartitionsContributed: 17, - MaxContributionsPerPartition: 5, - Lower: 0, - Upper: 10, - Ranks: []float64{0.1, 0.5, 0.9}, - NoiseKind: noise.GaussianNoise, - }}, - } { - got, err := newBoundedQuantilesFn(QuantilesParams{ - Epsilon: 1, - Delta: 1e-5, - MaxPartitionsContributed: 17, - MaxContributionsPerPartition: 5, - MinValue: 0, - MaxValue: 10, - Ranks: []float64{0.1, 0.5, 0.9}, - }, tc.noiseKind, false, TestModeDisabled) - if err != nil { - t.Fatalf("Couldn't get newBoundedQuantilesFn: %v", err) - } - if diff := cmp.Diff(tc.want, got, opts...); diff != "" { - t.Errorf("newBoundedQuantilesFn: for %q (-want +got):\n%s", tc.desc, diff) - } - } -} - // The logic mirrors TestNewBoundedQuantilesFn, but with the new privacy budget API where // clients specify aggregation budget and partition selection budget separately. -func TestNewBoundedQuantilesFnTemp(t *testing.T) { +func TestNewBoundedQuantilesFn(t *testing.T) { opts := []cmp.Option{ cmpopts.EquateApprox(0, 1e-10), cmpopts.IgnoreUnexported(boundedQuantilesFn{}), @@ -143,7 +88,7 @@ func TestNewBoundedQuantilesFnTemp(t *testing.T) { NoiseKind: noise.GaussianNoise, }}, } { - got, err := newBoundedQuantilesFnTemp(PrivacySpec{preThreshold: tc.preThreshold, testMode: TestModeDisabled}, + got, err := newBoundedQuantilesFn(PrivacySpec{preThreshold: tc.preThreshold, testMode: TestModeDisabled}, QuantilesParams{ AggregationEpsilon: tc.aggregationEpsilon, AggregationDelta: tc.aggregationDelta, @@ -171,15 +116,20 @@ func TestBoundedQuantilesFnSetup(t *testing.T) { }{ {"Laplace noise kind", noise.LaplaceNoise, noise.Laplace()}, {"Gaussian noise kind", noise.GaussianNoise, noise.Gaussian()}} { - got, err := newBoundedQuantilesFn(QuantilesParams{ - Epsilon: 1, - Delta: 1e-5, - MaxPartitionsContributed: 17, - MaxContributionsPerPartition: 5, - MinValue: 0, - MaxValue: 10, - Ranks: []float64{0.1, 0.5, 0.9}, - }, tc.noiseKind, false, TestModeDisabled) + spec := privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1, PartitionSelectionEpsilon: 1, PartitionSelectionDelta: 1e-5}) + got, err := newBoundedQuantilesFn( + *spec, + QuantilesParams{ + Epsilon: 1, + Delta: 1e-5, + MaxPartitionsContributed: 17, + MaxContributionsPerPartition: 5, + MinValue: 0, + MaxValue: 10, + Ranks: []float64{0.1, 0.5, 0.9}, + }, + tc.noiseKind, + false) if err != nil { t.Fatalf("Couldn't get newBoundedQuantilesFn: %v", err) } @@ -200,16 +150,21 @@ func TestBoundedQuantilesFnAddInput(t *testing.T) { lower := 0.0 upper := 5.0 ranks := []float64{0.25, 0.75} + spec := privacySpec(t, PrivacySpecParams{AggregationEpsilon: epsilon, PartitionSelectionEpsilon: epsilon, PartitionSelectionDelta: delta}) // ε is split in two for noise and for partition selection, so we use 2*ε to get a Laplace noise with ε. - fn, err := newBoundedQuantilesFn(QuantilesParams{ - Epsilon: 2 * epsilon, - Delta: delta, - MaxPartitionsContributed: maxPartitionsContributed, - MaxContributionsPerPartition: maxContributionsPerPartition, - MinValue: lower, - MaxValue: upper, - Ranks: ranks, - }, noise.LaplaceNoise, false, TestModeDisabled) + fn, err := newBoundedQuantilesFn( + *spec, + QuantilesParams{ + AggregationEpsilon: epsilon, + PartitionSelectionParams: PartitionSelectionParams{Epsilon: epsilon, Delta: delta}, + MaxPartitionsContributed: maxPartitionsContributed, + MaxContributionsPerPartition: maxContributionsPerPartition, + MinValue: lower, + MaxValue: upper, + Ranks: ranks, + }, + noise.LaplaceNoise, + false) if err != nil { t.Fatalf("Couldn't get newBoundedQuantilesFn: %v", err) } @@ -247,16 +202,19 @@ func TestBoundedQuantilesFnMergeAccumulators(t *testing.T) { lower := 0.0 upper := 5.0 ranks := []float64{0.25, 0.75} - // ε is split in two for noise and for partition selection, so we use 2*ε to get a Laplace noise with ε. - fn, err := newBoundedQuantilesFn(QuantilesParams{ - Epsilon: 2 * epsilon, - Delta: delta, - MaxPartitionsContributed: maxPartitionsContributed, - MaxContributionsPerPartition: maxContributionsPerPartition, - MinValue: lower, - MaxValue: upper, - Ranks: ranks, - }, noise.LaplaceNoise, false, TestModeDisabled) + spec := privacySpec(t, PrivacySpecParams{AggregationEpsilon: epsilon, PartitionSelectionEpsilon: epsilon, PartitionSelectionDelta: delta}) + fn, err := newBoundedQuantilesFn(*spec, + QuantilesParams{ + AggregationEpsilon: epsilon, + PartitionSelectionParams: PartitionSelectionParams{Epsilon: epsilon, Delta: delta}, + MaxPartitionsContributed: maxPartitionsContributed, + MaxContributionsPerPartition: maxContributionsPerPartition, + MinValue: lower, + MaxValue: upper, + Ranks: ranks, + }, + noise.LaplaceNoise, + false) if err != nil { t.Fatalf("Couldn't get newBoundedQuantilesFn: %v", err) } @@ -300,16 +258,20 @@ func TestBoundedQuantilesFnExtractOutputReturnsNilForSmallPartitions(t *testing. {"Input with 1 privacy unit with 1 contribution", 1, 1}, } { // The choice of ε=1e100, δ=10⁻²³, and l0Sensitivity=1 gives a threshold of =2. - // ε is split in two for noise and for partition selection, so we use 2*ε to get a Laplace noise with ε. - fn, err := newBoundedQuantilesFn(QuantilesParams{ - Epsilon: 2 * 1e100, - Delta: 1e-23, - MaxPartitionsContributed: 1, - MaxContributionsPerPartition: 1, - MinValue: 0, - MaxValue: 10, - Ranks: []float64{0.5}, - }, noise.LaplaceNoise, false, TestModeDisabled) + spec := privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1e100, PartitionSelectionEpsilon: 1e100, PartitionSelectionDelta: 1e-23}) + fn, err := newBoundedQuantilesFn( + *spec, + QuantilesParams{ + AggregationEpsilon: 1e100, + PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1e100, Delta: 1e-23}, + MaxPartitionsContributed: 1, + MaxContributionsPerPartition: 1, + MinValue: 0, + MaxValue: 10, + Ranks: []float64{0.5}, + }, + noise.LaplaceNoise, + false) if err != nil { t.Fatalf("Couldn't get newBoundedQuantilesFn: %v", err) } @@ -347,14 +309,18 @@ func TestBoundedQuantilesFnWithPartitionsExtractOutputDoesNotReturnNilForSmallPa {"Empty input", 0, 0}, {"Input with 1 privacy unit with 1 contribution", 1, 1}, } { - fn, err := newBoundedQuantilesFn(QuantilesParams{ - Epsilon: 1e100, - MaxPartitionsContributed: 1, - MaxContributionsPerPartition: 1, - MinValue: 0, - MaxValue: 10, - Ranks: []float64{0.5}, - }, noise.LaplaceNoise, true, TestModeDisabled) + spec := privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1e100}) + fn, err := newBoundedQuantilesFn(*spec, + QuantilesParams{ + AggregationEpsilon: 1e100, + MaxPartitionsContributed: 1, + MaxContributionsPerPartition: 1, + MinValue: 0, + MaxValue: 10, + Ranks: []float64{0.5}, + }, + noise.LaplaceNoise, + true) if err != nil { t.Fatalf("Couldn't get newBoundedQuantilesFn: %v", err) } @@ -1109,224 +1075,14 @@ func TestQuantilesPerKeyWithPartitionsAppliesClamping(t *testing.T) { func TestCheckQuantilesPerKeyParams(t *testing.T) { _, _, publicPartitions := ptest.CreateList([]int{0, 1}) for _, tc := range []struct { - desc string - usesNewPrivacyBudgetAPI bool - params QuantilesParams - noiseKind noise.Kind - partitionType reflect.Type - wantErr bool + desc string + params QuantilesParams + noiseKind noise.Kind + partitionType reflect.Type + wantErr bool }{ { desc: "valid parameters", - params: QuantilesParams{ - Epsilon: 1.0, - Delta: 1e-5, - MaxPartitionsContributed: 1, - MaxContributionsPerPartition: 1, - MinValue: -5.0, - MaxValue: 5.0, - Ranks: []float64{0.5}, - }, - noiseKind: noise.LaplaceNoise, - partitionType: nil, - wantErr: false, - }, - { - desc: "negative epsilon", - params: QuantilesParams{ - Epsilon: -1.0, - Delta: 1e-5, - MaxPartitionsContributed: 1, - MaxContributionsPerPartition: 1, - MinValue: -5.0, - MaxValue: 5.0, - Ranks: []float64{0.5}, - }, - noiseKind: noise.LaplaceNoise, - partitionType: nil, - wantErr: true, - }, - { - desc: "zero delta w/o public partitions", - params: QuantilesParams{ - Epsilon: 1.0, - MaxPartitionsContributed: 1, - MaxContributionsPerPartition: 1, - MinValue: -5.0, - MaxValue: 5.0, - Ranks: []float64{0.5}, - }, - noiseKind: noise.LaplaceNoise, - partitionType: nil, - wantErr: true, - }, - { - desc: "MaxValue < MinValue", - params: QuantilesParams{ - Epsilon: 1.0, - Delta: 1e-5, - MaxPartitionsContributed: 1, - MaxContributionsPerPartition: 1, - MinValue: 6.0, - MaxValue: 5.0, - Ranks: []float64{0.5}, - }, - noiseKind: noise.LaplaceNoise, - partitionType: nil, - wantErr: true, - }, - { - desc: "MaxValue = MinValue", - params: QuantilesParams{ - Epsilon: 1.0, - Delta: 1e-5, - MaxPartitionsContributed: 1, - MaxContributionsPerPartition: 1, - MinValue: 5.0, - MaxValue: 5.0, - Ranks: []float64{0.5}, - }, - noiseKind: noise.LaplaceNoise, - partitionType: nil, - wantErr: true, - }, - { - desc: "zero MaxContributionsPerPartition", - params: QuantilesParams{ - Epsilon: 1.0, - Delta: 1e-5, - MaxPartitionsContributed: 1, - MinValue: -5.0, - MaxValue: 5.0, - Ranks: []float64{0.5}, - }, - noiseKind: noise.LaplaceNoise, - partitionType: nil, - wantErr: true, - }, - { - desc: "zero MaxPartitionsContributed", - params: QuantilesParams{ - Epsilon: 1.0, - Delta: 1e-5, - MaxContributionsPerPartition: 1, - MinValue: -5.0, - MaxValue: 5.0, - Ranks: []float64{0.5}, - }, - noiseKind: noise.LaplaceNoise, - partitionType: nil, - wantErr: true, - }, - { - desc: "No ranks", - params: QuantilesParams{ - Epsilon: 1.0, - Delta: 1e-5, - MaxPartitionsContributed: 1, - MaxContributionsPerPartition: 1, - MinValue: -5.0, - MaxValue: 5.0, - }, - noiseKind: noise.LaplaceNoise, - partitionType: nil, - wantErr: true, - }, - { - desc: "Out of bound (<0.0 || >1.0) ranks", - params: QuantilesParams{ - Epsilon: 1.0, - Delta: 1e-5, - MaxPartitionsContributed: 1, - MaxContributionsPerPartition: 1, - MinValue: -5.0, - MaxValue: 5.0, - Ranks: []float64{0.3, 1.5}, - }, - noiseKind: noise.LaplaceNoise, - partitionType: nil, - wantErr: true, - }, - { - desc: "non-zero delta w/ public partitions & Laplace", - params: QuantilesParams{ - Epsilon: 1.0, - Delta: 1e-5, - MaxPartitionsContributed: 1, - MaxContributionsPerPartition: 1, - MinValue: -5.0, - MaxValue: 5.0, - Ranks: []float64{0.5}, - PublicPartitions: publicPartitions, - }, - noiseKind: noise.LaplaceNoise, - partitionType: reflect.TypeOf(0), - wantErr: true, - }, - { - desc: "wrong partition type w/ public partitions as beam.PCollection", - params: QuantilesParams{ - Epsilon: 1.0, - MaxContributionsPerPartition: 1, - MaxPartitionsContributed: 1, - MinValue: -5.0, - MaxValue: 5.0, - Ranks: []float64{0.5}, - PublicPartitions: publicPartitions, - }, - noiseKind: noise.LaplaceNoise, - partitionType: reflect.TypeOf(""), - wantErr: true, - }, - { - desc: "wrong partition type w/ public partitions as slice", - params: QuantilesParams{ - Epsilon: 1.0, - MaxContributionsPerPartition: 1, - MaxPartitionsContributed: 1, - MinValue: -5.0, - MaxValue: 5.0, - Ranks: []float64{0.5}, - PublicPartitions: []int{0}, - }, - noiseKind: noise.LaplaceNoise, - partitionType: reflect.TypeOf(""), - wantErr: true, - }, - { - desc: "wrong partition type w/ public partitions as array", - params: QuantilesParams{ - Epsilon: 1.0, - MaxPartitionsContributed: 1, - MaxContributionsPerPartition: 1, - MinValue: -5.0, - MaxValue: 5.0, - Ranks: []float64{0.5}, - PublicPartitions: [1]int{0}, - }, - noiseKind: noise.LaplaceNoise, - partitionType: reflect.TypeOf(""), - wantErr: true, - }, - { - desc: "public partitions as something other than beam.PCollection, slice or array", - params: QuantilesParams{ - Epsilon: 1.0, - MaxPartitionsContributed: 1, - MaxContributionsPerPartition: 1, - MinValue: -5.0, - MaxValue: 5.0, - Ranks: []float64{0.5}, - PublicPartitions: "", - }, - noiseKind: noise.LaplaceNoise, - partitionType: reflect.TypeOf(""), - wantErr: true, - }, - // Test cases for the new privacy budget API. - { - desc: "new API, valid parameters", - usesNewPrivacyBudgetAPI: true, params: QuantilesParams{ AggregationEpsilon: 1.0, PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 1e-5}, @@ -1341,8 +1097,7 @@ func TestCheckQuantilesPerKeyParams(t *testing.T) { wantErr: false, }, { - desc: "new API, PartitionSelectionParams.MaxPartitionsContributed set", - usesNewPrivacyBudgetAPI: true, + desc: "PartitionSelectionParams.MaxPartitionsContributed set", params: QuantilesParams{ AggregationEpsilon: 1.0, PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 1e-5, MaxPartitionsContributed: 1}, @@ -1357,8 +1112,7 @@ func TestCheckQuantilesPerKeyParams(t *testing.T) { wantErr: true, }, { - desc: "new API, negative aggregationEpsilon", - usesNewPrivacyBudgetAPI: true, + desc: "negative aggregationEpsilon", params: QuantilesParams{ AggregationEpsilon: -1.0, PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 1e-5}, @@ -1373,8 +1127,7 @@ func TestCheckQuantilesPerKeyParams(t *testing.T) { wantErr: true, }, { - desc: "new API, negative partitionSelectionEpsilon", - usesNewPrivacyBudgetAPI: true, + desc: "negative partitionSelectionEpsilon", params: QuantilesParams{ AggregationEpsilon: 1.0, PartitionSelectionParams: PartitionSelectionParams{Epsilon: -1.0, Delta: 1e-5}, @@ -1389,8 +1142,7 @@ func TestCheckQuantilesPerKeyParams(t *testing.T) { wantErr: true, }, { - desc: "new API, zero partitionSelectionDelta w/o public partitions", - usesNewPrivacyBudgetAPI: true, + desc: "zero partitionSelectionDelta w/o public partitions", params: QuantilesParams{ AggregationEpsilon: 1.0, PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 0}, @@ -1405,8 +1157,7 @@ func TestCheckQuantilesPerKeyParams(t *testing.T) { wantErr: true, }, { - desc: "new API, MaxValue < MinValue", - usesNewPrivacyBudgetAPI: true, + desc: "MaxValue < MinValue", params: QuantilesParams{ AggregationEpsilon: 1.0, PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 1e-5}, @@ -1421,8 +1172,7 @@ func TestCheckQuantilesPerKeyParams(t *testing.T) { wantErr: true, }, { - desc: "new API, MaxValue = MinValue", - usesNewPrivacyBudgetAPI: true, + desc: "MaxValue = MinValue", params: QuantilesParams{ AggregationEpsilon: 1.0, PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 1e-5}, @@ -1437,8 +1187,7 @@ func TestCheckQuantilesPerKeyParams(t *testing.T) { wantErr: true, }, { - desc: "new API, zero MaxContributionsPerPartition", - usesNewPrivacyBudgetAPI: true, + desc: "zero MaxContributionsPerPartition", params: QuantilesParams{ AggregationEpsilon: 1.0, PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 1e-5}, @@ -1452,8 +1201,7 @@ func TestCheckQuantilesPerKeyParams(t *testing.T) { wantErr: true, }, { - desc: "new API, zero MaxPartitionsContributed", - usesNewPrivacyBudgetAPI: true, + desc: "zero MaxPartitionsContributed", params: QuantilesParams{ AggregationEpsilon: 1.0, PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 1e-5}, @@ -1467,8 +1215,7 @@ func TestCheckQuantilesPerKeyParams(t *testing.T) { wantErr: true, }, { - desc: "new API, no ranks", - usesNewPrivacyBudgetAPI: true, + desc: "no ranks", params: QuantilesParams{ AggregationEpsilon: 1.0, PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 1e-5}, @@ -1482,8 +1229,7 @@ func TestCheckQuantilesPerKeyParams(t *testing.T) { wantErr: true, }, { - desc: "new API, out of bound (<0.0 || >1.0) ranks", - usesNewPrivacyBudgetAPI: true, + desc: "out of bound (<0.0 || >1.0) ranks", params: QuantilesParams{ AggregationEpsilon: 1.0, PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 1e-5}, @@ -1498,8 +1244,7 @@ func TestCheckQuantilesPerKeyParams(t *testing.T) { wantErr: true, }, { - desc: "new API, non-zero partitionSelectionDelta w/ public partitions", - usesNewPrivacyBudgetAPI: true, + desc: "non-zero partitionSelectionDelta w/ public partitions", params: QuantilesParams{ AggregationEpsilon: 1.0, PartitionSelectionParams: PartitionSelectionParams{Epsilon: 0, Delta: 1e-5}, @@ -1515,8 +1260,7 @@ func TestCheckQuantilesPerKeyParams(t *testing.T) { wantErr: true, }, { - desc: "new API, non-zero partitionSelectionEpsilon w/ public partitions", - usesNewPrivacyBudgetAPI: true, + desc: "non-zero partitionSelectionEpsilon w/ public partitions", params: QuantilesParams{ AggregationEpsilon: 1.0, PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 0}, @@ -1532,7 +1276,7 @@ func TestCheckQuantilesPerKeyParams(t *testing.T) { wantErr: true, }, { - desc: "new API, wrong partition type w/ public partitions as beam.PCollection", + desc: "wrong partition type w/ public partitions as beam.PCollection", params: QuantilesParams{ AggregationEpsilon: 1.0, MaxPartitionsContributed: 1, @@ -1547,7 +1291,7 @@ func TestCheckQuantilesPerKeyParams(t *testing.T) { wantErr: true, }, { - desc: "new API, wrong partition type w/ public partitions as slice", + desc: "wrong partition type w/ public partitions as slice", params: QuantilesParams{ AggregationEpsilon: 1.0, MaxPartitionsContributed: 1, @@ -1562,7 +1306,7 @@ func TestCheckQuantilesPerKeyParams(t *testing.T) { wantErr: true, }, { - desc: "new API, wrong partition type w/ public partitions as array", + desc: "wrong partition type w/ public partitions as array", params: QuantilesParams{ AggregationEpsilon: 1.0, MaxPartitionsContributed: 1, @@ -1577,7 +1321,7 @@ func TestCheckQuantilesPerKeyParams(t *testing.T) { wantErr: true, }, { - desc: "new API, public partitions as something other than beam.PCollection, slice or array", + desc: "public partitions as something other than beam.PCollection, slice or array", params: QuantilesParams{ AggregationEpsilon: 1.0, MaxPartitionsContributed: 1, @@ -1592,7 +1336,7 @@ func TestCheckQuantilesPerKeyParams(t *testing.T) { wantErr: true, }, } { - if err := checkQuantilesPerKeyParams(tc.params, tc.usesNewPrivacyBudgetAPI, tc.noiseKind, tc.partitionType); (err != nil) != tc.wantErr { + if err := checkQuantilesPerKeyParams(tc.params, tc.noiseKind, tc.partitionType); (err != nil) != tc.wantErr { t.Errorf("With %s, got=%v, wantErr=%t", tc.desc, err, tc.wantErr) } } diff --git a/privacy-on-beam/pbeam/select_partitions.go b/privacy-on-beam/pbeam/select_partitions.go index 70259123..7f06540b 100644 --- a/privacy-on-beam/pbeam/select_partitions.go +++ b/privacy-on-beam/pbeam/select_partitions.go @@ -55,16 +55,9 @@ func SelectPartitions(s beam.Scope, pcol PrivatePCollection, params SelectPartit _, pT := beam.ValidateKVType(pcol.col) spec := pcol.privacySpec var err error - if spec.usesNewPrivacyBudgetAPI { - params.Epsilon, params.Delta, err = spec.partitionSelectionBudget.consume(params.Epsilon, params.Delta) - if err != nil { - log.Fatalf("Couldn't consume budget for SelectPartitions: %v", err) - } - } else { - params.Epsilon, params.Delta, err = spec.budget.consume(params.Epsilon, params.Delta) - if err != nil { - log.Fatalf("Couldn't consume budget for SelectPartitions: %v", err) - } + params.Epsilon, params.Delta, err = spec.partitionSelectionBudget.consume(params.Epsilon, params.Delta) + if err != nil { + log.Fatalf("Couldn't consume budget for SelectPartitions: %v", err) } err = checkSelectPartitionsParams(params) diff --git a/privacy-on-beam/pbeam/sum.go b/privacy-on-beam/pbeam/sum.go index 2f8142e9..e2e60916 100644 --- a/privacy-on-beam/pbeam/sum.go +++ b/privacy-on-beam/pbeam/sum.go @@ -147,21 +147,14 @@ func SumPerKey(s beam.Scope, pcol PrivatePCollection, params SumParams) beam.PCo // Get privacy parameters. spec := pcol.privacySpec var err error - if spec.usesNewPrivacyBudgetAPI { - params.AggregationEpsilon, params.AggregationDelta, err = spec.aggregationBudget.get(params.AggregationEpsilon, params.AggregationDelta) - if err != nil { - log.Fatalf("Couldn't consume aggregation budget for SumPerKey: %v", err) - } - if params.PublicPartitions == nil { - params.PartitionSelectionParams.Epsilon, params.PartitionSelectionParams.Delta, err = spec.partitionSelectionBudget.get(params.PartitionSelectionParams.Epsilon, params.PartitionSelectionParams.Delta) - if err != nil { - log.Fatalf("Couldn't consume partition selection budget for SumPerKey: %v", err) - } - } - } else { - params.Epsilon, params.Delta, err = spec.budget.get(params.Epsilon, params.Delta) + params.AggregationEpsilon, params.AggregationDelta, err = spec.aggregationBudget.get(params.AggregationEpsilon, params.AggregationDelta) + if err != nil { + log.Fatalf("Couldn't consume aggregation budget for SumPerKey: %v", err) + } + if params.PublicPartitions == nil { + params.PartitionSelectionParams.Epsilon, params.PartitionSelectionParams.Delta, err = spec.partitionSelectionBudget.get(params.PartitionSelectionParams.Epsilon, params.PartitionSelectionParams.Delta) if err != nil { - log.Fatalf("Couldn't consume budget for SumPerKey: %v", err) + log.Fatalf("Couldn't consume partition selection budget for SumPerKey: %v", err) } } @@ -173,7 +166,7 @@ func SumPerKey(s beam.Scope, pcol PrivatePCollection, params SumParams) beam.PCo noiseKind = params.NoiseKind.toNoiseKind() } - err = checkSumPerKeyParams(params, spec.usesNewPrivacyBudgetAPI, noiseKind, pcol.codec.KType.T) + err = checkSumPerKeyParams(params, noiseKind, pcol.codec.KType.T) if err != nil { log.Fatalf("pbeam.SumPerKey: %v", err) } @@ -226,18 +219,9 @@ func SumPerKey(s beam.Scope, pcol PrivatePCollection, params SumParams) beam.PCo var result beam.PCollection // Add public partitions and return the aggregation output, if public partitions are specified. if params.PublicPartitions != nil { - if spec.usesNewPrivacyBudgetAPI { - result = addPublicPartitionsForSum(s, params.AggregationEpsilon, params.AggregationDelta, params.MaxPartitionsContributed, params, noiseKind, vKind, partialSumKV, spec.testMode) - } else { - result = addPublicPartitionsForSum(s, params.Epsilon, params.Delta, params.MaxPartitionsContributed, params, noiseKind, vKind, partialSumKV, spec.testMode) - } + result = addPublicPartitionsForSum(s, *spec, params, noiseKind, vKind, partialSumKV) } else { - var boundedSumFn any - if spec.usesNewPrivacyBudgetAPI { - boundedSumFn, err = newBoundedSumFnTemp(*spec, params, noiseKind, vKind, false) - } else { - boundedSumFn, err = newBoundedSumFn(params.Epsilon, params.Delta, params.MaxPartitionsContributed, params.MinValue, params.MaxValue, noiseKind, vKind, false, spec.testMode) - } + boundedSumFn, err := newBoundedSumFn(*spec, params, noiseKind, vKind, false) if err != nil { log.Fatalf("Couldn't get boundedSumFn for SumPerKey: %v", err) } @@ -264,7 +248,7 @@ func SumPerKey(s beam.Scope, pcol PrivatePCollection, params SumParams) beam.PCo return result } -func addPublicPartitionsForSum(s beam.Scope, epsilon, delta float64, maxPartitionsContributed int64, params SumParams, noiseKind noise.Kind, vKind reflect.Kind, partialSumKV beam.PCollection, testMode TestMode) beam.PCollection { +func addPublicPartitionsForSum(s beam.Scope, spec PrivacySpec, params SumParams, noiseKind noise.Kind, vKind reflect.Kind, partialSumKV beam.PCollection) beam.PCollection { // Calculate sums with empty public partitions added. Result is PCollection, where vKind is either int64 or float64. // First, add zero values to all public partitions. addZeroValuesToPublicPartitions, err := newAddZeroValuesToPublicPartitionsFn(vKind) @@ -277,7 +261,7 @@ func addPublicPartitionsForSum(s beam.Scope, epsilon, delta float64, maxPartitio } publicPartitionsWithZeroValues := beam.ParDo(s, addZeroValuesToPublicPartitions, publicPartitions) // Second, add noise to all public partitions (all of which are zero-valued). - boundedSumFn, err := newBoundedSumFn(epsilon, delta, maxPartitionsContributed, params.MinValue, params.MaxValue, noiseKind, vKind, true, testMode) + boundedSumFn, err := newBoundedSumFn(spec, params, noiseKind, vKind, true) if err != nil { log.Fatalf("Couldn't get boundedSumFn for SumPerKey: %v", err) } @@ -295,41 +279,30 @@ func addPublicPartitionsForSum(s beam.Scope, epsilon, delta float64, maxPartitio return beam.ParDo(s, dereferenceValueFn, sums) } -func checkSumPerKeyParams(params SumParams, usesNewPrivacyBudgetAPI bool, noiseKind noise.Kind, partitionType reflect.Type) error { +func checkSumPerKeyParams(params SumParams, noiseKind noise.Kind, partitionType reflect.Type) error { err := checkPublicPartitions(params.PublicPartitions, partitionType) if err != nil { return err } - if usesNewPrivacyBudgetAPI { - err = checks.CheckEpsilon(params.AggregationEpsilon) - if err != nil { - return err - } - err = checkAggregationDelta(params.AggregationDelta, noiseKind) - if err != nil { - return err - } - err = checkPartitionSelectionEpsilon(params.PartitionSelectionParams.Epsilon, params.PublicPartitions) - if err != nil { - return err - } - err = checkPartitionSelectionDelta(params.PartitionSelectionParams.Delta, params.PublicPartitions) - if err != nil { - return err - } - err = checkMaxPartitionsContributedPartitionSelection(params.PartitionSelectionParams.MaxPartitionsContributed) - if err != nil { - return err - } - } else { - err = checks.CheckEpsilon(params.Epsilon) - if err != nil { - return err - } - err = checkDelta(params.Delta, noiseKind, params.PublicPartitions) - if err != nil { - return err - } + err = checks.CheckEpsilon(params.AggregationEpsilon) + if err != nil { + return err + } + err = checkAggregationDelta(params.AggregationDelta, noiseKind) + if err != nil { + return err + } + err = checkPartitionSelectionEpsilon(params.PartitionSelectionParams.Epsilon, params.PublicPartitions) + if err != nil { + return err + } + err = checkPartitionSelectionDelta(params.PartitionSelectionParams.Delta, params.PublicPartitions) + if err != nil { + return err + } + err = checkMaxPartitionsContributedPartitionSelection(params.PartitionSelectionParams.MaxPartitionsContributed) + if err != nil { + return err } err = checks.CheckBoundsFloat64(params.MinValue, params.MaxValue) if err != nil { diff --git a/privacy-on-beam/pbeam/sum_test.go b/privacy-on-beam/pbeam/sum_test.go index 7884ba50..8de4097c 100644 --- a/privacy-on-beam/pbeam/sum_test.go +++ b/privacy-on-beam/pbeam/sum_test.go @@ -1332,160 +1332,15 @@ func TestSumPerKeyNoClampingForNegativeMinValueInt64(t *testing.T) { func TestCheckSumPerKeyParams(t *testing.T) { _, _, publicPartitions := ptest.CreateList([]int{0, 1}) for _, tc := range []struct { - desc string - usesNewPrivacyBudgetAPI bool - params SumParams - noiseKind noise.Kind - partitionType reflect.Type - wantErr bool + desc string + params SumParams + noiseKind noise.Kind + partitionType reflect.Type + wantErr bool }{ - { - desc: "valid parameters", - params: SumParams{ - Epsilon: 1.0, - Delta: 1e-5, - MaxPartitionsContributed: 1, - MinValue: -5.0, - MaxValue: 5.0, - }, - noiseKind: noise.LaplaceNoise, - partitionType: nil, - wantErr: false, - }, - { - desc: "negative epsilon", - params: SumParams{ - Epsilon: -1.0, - Delta: 1e-5, - MaxPartitionsContributed: 1, - MinValue: -5.0, - MaxValue: 5.0, - }, - noiseKind: noise.LaplaceNoise, - partitionType: nil, - wantErr: true, - }, - { - desc: "zero delta w/o public partitions", - params: SumParams{ - Epsilon: 1.0, - MaxPartitionsContributed: 1, - MinValue: -5.0, - MaxValue: 5.0, - }, - noiseKind: noise.LaplaceNoise, - partitionType: nil, - wantErr: true, - }, - { - desc: "MaxPartitionsContributed unset", - params: SumParams{ - Epsilon: 1.0, - Delta: 1e-5, - MinValue: -5.0, - MaxValue: 5.0, - }, - noiseKind: noise.LaplaceNoise, - partitionType: nil, - wantErr: true, - }, - { - desc: "MaxValue < MinValue", - params: SumParams{ - Epsilon: 1.0, - Delta: 1e-5, - MaxPartitionsContributed: 1, - MinValue: 6.0, - MaxValue: 5.0, - }, - noiseKind: noise.LaplaceNoise, - partitionType: nil, - wantErr: true, - }, - { - desc: "MaxValue = MinValue", - params: SumParams{ - Epsilon: 1.0, - Delta: 1e-5, - MaxPartitionsContributed: 1, - MinValue: 5.0, - MaxValue: 5.0, - }, - noiseKind: noise.LaplaceNoise, - partitionType: nil, - wantErr: false, - }, - { - desc: "non-zero delta w/ public partitions & Laplace", - params: SumParams{ - Epsilon: 1.0, - Delta: 1e-5, - MaxPartitionsContributed: 1, - MinValue: -5.0, - MaxValue: 5.0, - PublicPartitions: publicPartitions, - }, - noiseKind: noise.LaplaceNoise, - partitionType: reflect.TypeOf(0), - wantErr: true, - }, - { - desc: "wrong partition type w/ public partitions as beam.PCollection", - params: SumParams{ - Epsilon: 1.0, - MaxPartitionsContributed: 1, - MinValue: -5.0, - MaxValue: 5.0, - PublicPartitions: publicPartitions, - }, - noiseKind: noise.LaplaceNoise, - partitionType: reflect.TypeOf(""), - wantErr: true, - }, - { - desc: "wrong partition type w/ public partitions as slice", - params: SumParams{ - Epsilon: 1.0, - MaxPartitionsContributed: 1, - MinValue: -5.0, - MaxValue: 5.0, - PublicPartitions: []int{0}, - }, - noiseKind: noise.LaplaceNoise, - partitionType: reflect.TypeOf(""), - wantErr: true, - }, - { - desc: "wrong partition type w/ public partitions as array", - params: SumParams{ - Epsilon: 1.0, - MaxPartitionsContributed: 1, - MinValue: -5.0, - MaxValue: 5.0, - PublicPartitions: [1]int{0}, - }, - noiseKind: noise.LaplaceNoise, - partitionType: reflect.TypeOf(""), - wantErr: true, - }, - { - desc: "public partitions as something other than beam.PCollection, slice or array", - params: SumParams{ - Epsilon: 1.0, - MaxPartitionsContributed: 1, - MinValue: -5.0, - MaxValue: 5.0, - PublicPartitions: "publicPartitions", - }, - noiseKind: noise.LaplaceNoise, - partitionType: reflect.TypeOf(""), - wantErr: true, - }, - // Test cases for the new privacy budget API. { - desc: "new API, valid parameters", - usesNewPrivacyBudgetAPI: true, + desc: "valid parameters", params: SumParams{ AggregationEpsilon: 1.0, PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 1e-5}, @@ -1498,8 +1353,7 @@ func TestCheckSumPerKeyParams(t *testing.T) { wantErr: false, }, { - desc: "new API, PartitionSelectionParams.MaxPartitionsContributed set", - usesNewPrivacyBudgetAPI: true, + desc: "PartitionSelectionParams.MaxPartitionsContributed set", params: SumParams{ AggregationEpsilon: 1.0, PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 1e-5, MaxPartitionsContributed: 1}, @@ -1512,8 +1366,7 @@ func TestCheckSumPerKeyParams(t *testing.T) { wantErr: true, }, { - desc: "new API, zero aggregationDelta w/ Gaussian noise", - usesNewPrivacyBudgetAPI: true, + desc: "zero aggregationDelta w/ Gaussian noise", params: SumParams{ AggregationEpsilon: 1.0, PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 1e-5}, @@ -1526,8 +1379,7 @@ func TestCheckSumPerKeyParams(t *testing.T) { wantErr: true, }, { - desc: "new API, negative aggregationEpsilon", - usesNewPrivacyBudgetAPI: true, + desc: "negative aggregationEpsilon", params: SumParams{ AggregationEpsilon: -1.0, PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 1e-5}, @@ -1540,8 +1392,7 @@ func TestCheckSumPerKeyParams(t *testing.T) { wantErr: true, }, { - desc: "new API, negative partitionSelectionEpsilon", - usesNewPrivacyBudgetAPI: true, + desc: "negative partitionSelectionEpsilon", params: SumParams{ AggregationEpsilon: 1.0, PartitionSelectionParams: PartitionSelectionParams{Epsilon: -1.0, Delta: 1e-5}, @@ -1554,8 +1405,7 @@ func TestCheckSumPerKeyParams(t *testing.T) { wantErr: true, }, { - desc: "new API, zero partitionSelectionDelta w/o public partitions", - usesNewPrivacyBudgetAPI: true, + desc: "zero partitionSelectionDelta w/o public partitions", params: SumParams{ AggregationEpsilon: 1.0, PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 0}, @@ -1568,8 +1418,7 @@ func TestCheckSumPerKeyParams(t *testing.T) { wantErr: true, }, { - desc: "new API, zero partitionSelectionEpsilon w/o public partitions", - usesNewPrivacyBudgetAPI: true, + desc: "zero partitionSelectionEpsilon w/o public partitions", params: SumParams{ AggregationEpsilon: 1.0, PartitionSelectionParams: PartitionSelectionParams{Epsilon: 0, Delta: 1e-5}, @@ -1582,7 +1431,7 @@ func TestCheckSumPerKeyParams(t *testing.T) { wantErr: true, }, { - desc: "new API, MaxPartitionsContributed unset", + desc: "MaxPartitionsContributed unset", params: SumParams{ AggregationEpsilon: 1.0, PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 1e-5}, @@ -1594,8 +1443,7 @@ func TestCheckSumPerKeyParams(t *testing.T) { wantErr: true, }, { - desc: "new API, MaxValue < MinValue", - usesNewPrivacyBudgetAPI: true, + desc: "MaxValue < MinValue", params: SumParams{ AggregationEpsilon: 1.0, PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 1e-5}, @@ -1608,8 +1456,7 @@ func TestCheckSumPerKeyParams(t *testing.T) { wantErr: true, }, { - desc: "new API, MaxValue = MinValue", - usesNewPrivacyBudgetAPI: true, + desc: "MaxValue = MinValue", params: SumParams{ AggregationEpsilon: 1.0, PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 1e-5}, @@ -1622,8 +1469,7 @@ func TestCheckSumPerKeyParams(t *testing.T) { wantErr: false, }, { - desc: "new API, non-zero partitionSelectionDelta w/ public partitions", - usesNewPrivacyBudgetAPI: true, + desc: "non-zero partitionSelectionDelta w/ public partitions", params: SumParams{ AggregationEpsilon: 1.0, PartitionSelectionParams: PartitionSelectionParams{Epsilon: 0, Delta: 1e-5}, @@ -1637,8 +1483,7 @@ func TestCheckSumPerKeyParams(t *testing.T) { wantErr: true, }, { - desc: "new API, non-zero partitionSelectionEpsilon w/ public partitions", - usesNewPrivacyBudgetAPI: true, + desc: "non-zero partitionSelectionEpsilon w/ public partitions", params: SumParams{ AggregationEpsilon: 1.0, PartitionSelectionParams: PartitionSelectionParams{Epsilon: 1.0, Delta: 0}, @@ -1652,8 +1497,7 @@ func TestCheckSumPerKeyParams(t *testing.T) { wantErr: true, }, { - desc: "new API, wrong partition type w/ public partitions as beam.PCollection", - usesNewPrivacyBudgetAPI: true, + desc: "wrong partition type w/ public partitions as beam.PCollection", params: SumParams{ AggregationEpsilon: 1.0, MaxPartitionsContributed: 1, @@ -1666,8 +1510,7 @@ func TestCheckSumPerKeyParams(t *testing.T) { wantErr: true, }, { - desc: "new API, wrong partition type w/ public partitions as slice", - usesNewPrivacyBudgetAPI: true, + desc: "wrong partition type w/ public partitions as slice", params: SumParams{ AggregationEpsilon: 1.0, MaxPartitionsContributed: 1, @@ -1680,8 +1523,7 @@ func TestCheckSumPerKeyParams(t *testing.T) { wantErr: true, }, { - desc: "new API, wrong partition type w/ public partitions as array", - usesNewPrivacyBudgetAPI: true, + desc: "wrong partition type w/ public partitions as array", params: SumParams{ AggregationEpsilon: 1.0, MaxPartitionsContributed: 1, @@ -1694,8 +1536,7 @@ func TestCheckSumPerKeyParams(t *testing.T) { wantErr: true, }, { - desc: "new API, public partitions as something other than beam.PCollection, slice or array", - usesNewPrivacyBudgetAPI: true, + desc: "public partitions as something other than beam.PCollection, slice or array", params: SumParams{ AggregationEpsilon: 1.0, MaxPartitionsContributed: 1, @@ -1708,7 +1549,7 @@ func TestCheckSumPerKeyParams(t *testing.T) { wantErr: true, }, } { - if err := checkSumPerKeyParams(tc.params, tc.usesNewPrivacyBudgetAPI, tc.noiseKind, tc.partitionType); (err != nil) != tc.wantErr { + if err := checkSumPerKeyParams(tc.params, tc.noiseKind, tc.partitionType); (err != nil) != tc.wantErr { t.Errorf("With %s, got=%v, wantErr=%t", tc.desc, err, tc.wantErr) } }