From 683a8c2c43c96528b1b27322e2ccc027fd536abc Mon Sep 17 00:00:00 2001 From: Nicolas Grislain Date: Sun, 29 Oct 2023 00:47:39 +0200 Subject: [PATCH 1/2] Update clipped noise --- CHANGELOG.md | 6 +++++- Cargo.toml | 2 +- src/differential_privacy/aggregates.rs | 5 +++-- src/differential_privacy/mod.rs | 14 +++++++------- src/relation/rewriting.rs | 22 +++++++++++++++++++++- src/rewriting/mod.rs | 2 +- tests/integration.rs | 2 +- 7 files changed, 39 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0a6005c4..90f1b9ec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,13 +7,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.4.5] - 2023-10-27 +### Changed +- added clipped noise [MR171](https://github.com/Qrlew/qrlew/pull/171) + ## [0.4.4] - 2023-10-27 ### Fixed - changed PEP compilation ## [0.4.3] - 2023-10-27 ### Fixed -- added rewrite_as_pep [MR169](https://github.com/Qrlew/qrlew/pull/169) +- added rewrite_as_pep [MR170](https://github.com/Qrlew/qrlew/pull/170) - Updates sqlparser version ## [0.4.2] - 2023-10-27 diff --git a/Cargo.toml b/Cargo.toml index 385e1714..f552b405 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Nicolas Grislain "] name = "qrlew" -version = "0.4.4" +version = "0.4.5" edition = "2021" description = "Sarus Qrlew Engine" documentation = "https://docs.rs/qrlew" diff --git a/src/differential_privacy/aggregates.rs b/src/differential_privacy/aggregates.rs index 7ff61aba..a2553201 100644 --- a/src/differential_privacy/aggregates.rs +++ b/src/differential_privacy/aggregates.rs @@ -49,7 +49,8 @@ impl Relation { .map(|(_, n)| PrivateQuery::Gaussian(*n)) .collect::>() .into(); - DPRelation::new(self.add_gaussian_noise(noise_multipliers), private_query) + // DPRelation::new(self.add_gaussian_noise(noise_multipliers), private_query) + DPRelation::new(self.add_clipped_gaussian_noise(noise_multipliers), private_query) } } @@ -98,7 +99,7 @@ impl PEPRelation { Ok(DPRelation::new(dp_clipped_relation, private_query)) } - /// Rewrite aggregations as sums and ass noise to that sums. + /// Rewrite aggregations as sums and add noise to that sums. /// The budget is equally splitted among the sums. pub fn differentially_private_aggregates( self, diff --git a/src/differential_privacy/mod.rs b/src/differential_privacy/mod.rs index 90354cab..61c23e7c 100644 --- a/src/differential_privacy/mod.rs +++ b/src/differential_privacy/mod.rs @@ -114,7 +114,7 @@ impl From<(Relation, PrivateQuery)> for DPRelation { } impl Reduce { - /// Compiles a `Reduce` into DP: + /// Rewrite a `Reduce` into DP: /// - Protect the grouping keys /// - Add noise on the aggregations pub fn differentially_private( @@ -126,7 +126,7 @@ impl Reduce { ) -> Result { let mut private_query = PrivateQuery::null(); - // DP compile group by + // DP rewrite group by let reduce_with_dp_group_by = if self.group_by().is_empty() { self } else { @@ -145,7 +145,7 @@ impl Reduce { reduce }; - // DP compile aggregates + // DP rewrite aggregates let (dp_relation, private_query_agg) = reduce_with_dp_group_by .differentially_private_aggregates(epsilon, delta)? .into(); @@ -169,7 +169,7 @@ mod tests { }; #[test] - fn test_dp_compile_reduce_without_group_by() { + fn test_dp_rewrite_reduce_without_group_by() { let mut database = postgresql::test_database(); let relations = database.relations(); @@ -232,7 +232,7 @@ mod tests { } #[test] - fn test_dp_compile_reduce_group_by_possible_values() { + fn test_dp_rewrite_reduce_group_by_possible_values() { let mut database = postgresql::test_database(); let relations = database.relations(); @@ -306,7 +306,7 @@ mod tests { } #[test] - fn test_dp_compile_reduce_group_by_tau_thresholding() { + fn test_dp_rewrite_reduce_group_by_tau_thresholding() { let mut database = postgresql::test_database(); let relations = database.relations(); @@ -380,7 +380,7 @@ mod tests { } #[test] - fn test_dp_compile_reduce_group_by_possible_both() { + fn test_dp_rewrite_reduce_group_by_possible_both() { let mut database = postgresql::test_database(); let relations = database.relations(); diff --git a/src/relation/rewriting.rs b/src/relation/rewriting.rs index 33d14a09..bcdbcb8c 100644 --- a/src/relation/rewriting.rs +++ b/src/relation/rewriting.rs @@ -4,7 +4,7 @@ use super::{Join, Map, Reduce, Relation, Set, Table, Values, Variant as _}; use crate::{ builder::{Ready, With, WithIterator}, - data_type::{self, DataTyped}, + data_type::{self, DataType, DataTyped, Variant as _}, expr::{self, aggregate, Aggregate, Expr, Value}, io, namer, relation, }; @@ -505,6 +505,26 @@ impl Relation { .build() } + /// Add gaussian noise of a given standard deviation to the given columns, while keeping the column min and max + pub fn add_clipped_gaussian_noise(self, name_sigmas: Vec<(&str, f64)>) -> Relation { + let name_sigmas: HashMap<&str, f64> = name_sigmas.into_iter().collect(); + Relation::map() + // .with_iter(name_sigmas.into_iter().map(|(name, sigma)| (name, Expr::col(name).add_gaussian_noise(sigma)))) + .with_iter(self.schema().iter().map(|f| { + if name_sigmas.contains_key(&f.name()) { + let float_data_type: data_type::Float = f.data_type().into_data_type(&DataType::float()).unwrap().try_into().unwrap(); + ( + f.name(), + Expr::least(Expr::val(*float_data_type.max().unwrap()), Expr::greatest(Expr::val(*float_data_type.min().unwrap()), Expr::col(f.name()).add_gaussian_noise(name_sigmas[f.name()]))), + ) + } else { + (f.name(), Expr::col(f.name())) + } + })) + .input(self) + .build() + } + /// Returns a `Relation::Map` that inputs `self` and filter by `predicate` pub fn filter(self, predicate: Expr) -> Relation { Relation::map() diff --git a/src/rewriting/mod.rs b/src/rewriting/mod.rs index 8355df5c..d6108214 100644 --- a/src/rewriting/mod.rs +++ b/src/rewriting/mod.rs @@ -147,7 +147,7 @@ mod tests { }; #[test] - fn test_compile() { + fn test_rewrite() { let mut database = postgresql::test_database(); let relations = database.relations(); diff --git a/tests/integration.rs b/tests/integration.rs index 65c66d59..48c09a99 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -1,6 +1,6 @@ //! # Integration tests //! -//! Various queries are tested against their compiled to Relation + decompiled counterpart. +//! Various queries are tested against their version rewriten to Relation + re-rewriten. use colored::Colorize; use itertools::Itertools; From 8822d2eb224d8b6b2637f6a84c27ca36995a4af0 Mon Sep 17 00:00:00 2001 From: Nicolas Grislain Date: Sun, 29 Oct 2023 01:31:51 +0200 Subject: [PATCH 2/2] ok --- src/rewriting/rewriting_rule.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/rewriting/rewriting_rule.rs b/src/rewriting/rewriting_rule.rs index da16a3b8..0ad2cb41 100644 --- a/src/rewriting/rewriting_rule.rs +++ b/src/rewriting/rewriting_rule.rs @@ -1382,12 +1382,19 @@ mod tests { let relations = database.relations(); let query = parse(r#" WITH order_avg_price (order_id, avg_price) AS (SELECT order_id, avg(price) AS avg_price FROM item_table GROUP BY order_id), - order_sum_abs_price (order_id, sum_abs_price) AS (SELECT order_id, 2*sum(abs(price)) AS sum_abs_price FROM item_table GROUP BY order_id), - normalized_prices AS (SELECT order_avg_price.order_id, (item_table.price-order_avg_price.avg_price)/(0.1+abs(order_sum_abs_price.sum_abs_price)) AS normalized_price + order_sum_abs_price (order_id, sum_abs_price) AS (SELECT order_id, sum(abs(price)) AS sum_abs_price FROM item_table GROUP BY order_id), + normalized_prices AS (SELECT order_avg_price.order_id, (item_table.price-order_avg_price.avg_price)/(0.1+order_sum_abs_price.sum_abs_price) AS normalized_price FROM item_table JOIN order_avg_price ON item_table.order_id=order_avg_price.order_id JOIN order_sum_abs_price ON item_table.order_id=order_sum_abs_price.order_id) SELECT order_id, sum(normalized_price) FROM normalized_prices GROUP BY order_id "#, ).unwrap(); + let query = parse(r#" + WITH order_avg_price (order_id, avg_price) AS (SELECT order_id, avg(price) AS avg_price FROM item_table GROUP BY order_id), + normalized_prices AS (SELECT order_avg_price.order_id, (item_table.price/(0.1+order_avg_price.avg_price)) AS normalized_price + FROM item_table JOIN order_avg_price ON item_table.order_id=order_avg_price.order_id) + SELECT order_id, sum(normalized_price) FROM normalized_prices GROUP BY order_id + "#, + ).unwrap(); let synthetic_data = SyntheticData::new(Hierarchy::from([ (vec!["item_table"], Identifier::from("item_table")), (vec!["order_table"], Identifier::from("order_table")),