Skip to content

Commit

Permalink
Merge pull request #171 from Qrlew/158-clip-noisy-values-with-bounds-…
Browse files Browse the repository at this point in the history
…of-the-sum

158 clip noisy values with bounds of the sum
  • Loading branch information
ngrislain authored Oct 28, 2023
2 parents cdbcf1f + 8822d2e commit 4996a28
Show file tree
Hide file tree
Showing 8 changed files with 48 additions and 16 deletions.
6 changes: 5 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

## [0.4.5] - 2023-10-27
### Changed
- added clipped noise [MR171](https://github.com/Qrlew/qrlew/pull/171)

## [0.4.4] - 2023-10-27
### Fixed
- changed PEP compilation

## [0.4.3] - 2023-10-27
### Fixed
- added rewrite_as_pep [MR169](https://github.com/Qrlew/qrlew/pull/169)
- added rewrite_as_pep [MR170](https://github.com/Qrlew/qrlew/pull/170)
- Updates sqlparser version

## [0.4.2] - 2023-10-27
Expand Down
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
authors = ["Nicolas Grislain <ng@sarus.tech>"]
name = "qrlew"
version = "0.4.4"
version = "0.4.5"
edition = "2021"
description = "Sarus Qrlew Engine"
documentation = "https://docs.rs/qrlew"
Expand Down
5 changes: 3 additions & 2 deletions src/differential_privacy/aggregates.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ impl Relation {
.map(|(_, n)| PrivateQuery::Gaussian(*n))
.collect::<Vec<_>>()
.into();
DPRelation::new(self.add_gaussian_noise(noise_multipliers), private_query)
// DPRelation::new(self.add_gaussian_noise(noise_multipliers), private_query)
DPRelation::new(self.add_clipped_gaussian_noise(noise_multipliers), private_query)
}
}

Expand Down Expand Up @@ -98,7 +99,7 @@ impl PEPRelation {
Ok(DPRelation::new(dp_clipped_relation, private_query))
}

/// Rewrite aggregations as sums and ass noise to that sums.
/// Rewrite aggregations as sums and add noise to that sums.
/// The budget is equally splitted among the sums.
pub fn differentially_private_aggregates(
self,
Expand Down
14 changes: 7 additions & 7 deletions src/differential_privacy/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ impl From<(Relation, PrivateQuery)> for DPRelation {
}

impl Reduce {
/// Compiles a `Reduce` into DP:
/// Rewrite a `Reduce` into DP:
/// - Protect the grouping keys
/// - Add noise on the aggregations
pub fn differentially_private(
Expand All @@ -126,7 +126,7 @@ impl Reduce {
) -> Result<DPRelation> {
let mut private_query = PrivateQuery::null();

// DP compile group by
// DP rewrite group by
let reduce_with_dp_group_by = if self.group_by().is_empty() {
self
} else {
Expand All @@ -145,7 +145,7 @@ impl Reduce {
reduce
};

// DP compile aggregates
// DP rewrite aggregates
let (dp_relation, private_query_agg) = reduce_with_dp_group_by
.differentially_private_aggregates(epsilon, delta)?
.into();
Expand All @@ -169,7 +169,7 @@ mod tests {
};

#[test]
fn test_dp_compile_reduce_without_group_by() {
fn test_dp_rewrite_reduce_without_group_by() {
let mut database = postgresql::test_database();
let relations = database.relations();

Expand Down Expand Up @@ -232,7 +232,7 @@ mod tests {
}

#[test]
fn test_dp_compile_reduce_group_by_possible_values() {
fn test_dp_rewrite_reduce_group_by_possible_values() {
let mut database = postgresql::test_database();
let relations = database.relations();

Expand Down Expand Up @@ -306,7 +306,7 @@ mod tests {
}

#[test]
fn test_dp_compile_reduce_group_by_tau_thresholding() {
fn test_dp_rewrite_reduce_group_by_tau_thresholding() {
let mut database = postgresql::test_database();
let relations = database.relations();

Expand Down Expand Up @@ -380,7 +380,7 @@ mod tests {
}

#[test]
fn test_dp_compile_reduce_group_by_possible_both() {
fn test_dp_rewrite_reduce_group_by_possible_both() {
let mut database = postgresql::test_database();
let relations = database.relations();

Expand Down
22 changes: 21 additions & 1 deletion src/relation/rewriting.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
use super::{Join, Map, Reduce, Relation, Set, Table, Values, Variant as _};
use crate::{
builder::{Ready, With, WithIterator},
data_type::{self, DataTyped},
data_type::{self, DataType, DataTyped, Variant as _},
expr::{self, aggregate, Aggregate, Expr, Value},
io, namer, relation,
};
Expand Down Expand Up @@ -505,6 +505,26 @@ impl Relation {
.build()
}

/// Add gaussian noise of a given standard deviation to the given columns, while keeping the column min and max
pub fn add_clipped_gaussian_noise(self, name_sigmas: Vec<(&str, f64)>) -> Relation {
let name_sigmas: HashMap<&str, f64> = name_sigmas.into_iter().collect();
Relation::map()
// .with_iter(name_sigmas.into_iter().map(|(name, sigma)| (name, Expr::col(name).add_gaussian_noise(sigma))))
.with_iter(self.schema().iter().map(|f| {
if name_sigmas.contains_key(&f.name()) {
let float_data_type: data_type::Float = f.data_type().into_data_type(&DataType::float()).unwrap().try_into().unwrap();
(
f.name(),
Expr::least(Expr::val(*float_data_type.max().unwrap()), Expr::greatest(Expr::val(*float_data_type.min().unwrap()), Expr::col(f.name()).add_gaussian_noise(name_sigmas[f.name()]))),
)
} else {
(f.name(), Expr::col(f.name()))
}
}))
.input(self)
.build()
}

/// Returns a `Relation::Map` that inputs `self` and filter by `predicate`
pub fn filter(self, predicate: Expr) -> Relation {
Relation::map()
Expand Down
2 changes: 1 addition & 1 deletion src/rewriting/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ mod tests {
};

#[test]
fn test_compile() {
fn test_rewrite() {
let mut database = postgresql::test_database();
let relations = database.relations();

Expand Down
11 changes: 9 additions & 2 deletions src/rewriting/rewriting_rule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1382,12 +1382,19 @@ mod tests {
let relations = database.relations();
let query = parse(r#"
WITH order_avg_price (order_id, avg_price) AS (SELECT order_id, avg(price) AS avg_price FROM item_table GROUP BY order_id),
order_sum_abs_price (order_id, sum_abs_price) AS (SELECT order_id, 2*sum(abs(price)) AS sum_abs_price FROM item_table GROUP BY order_id),
normalized_prices AS (SELECT order_avg_price.order_id, (item_table.price-order_avg_price.avg_price)/(0.1+abs(order_sum_abs_price.sum_abs_price)) AS normalized_price
order_sum_abs_price (order_id, sum_abs_price) AS (SELECT order_id, sum(abs(price)) AS sum_abs_price FROM item_table GROUP BY order_id),
normalized_prices AS (SELECT order_avg_price.order_id, (item_table.price-order_avg_price.avg_price)/(0.1+order_sum_abs_price.sum_abs_price) AS normalized_price
FROM item_table JOIN order_avg_price ON item_table.order_id=order_avg_price.order_id JOIN order_sum_abs_price ON item_table.order_id=order_sum_abs_price.order_id)
SELECT order_id, sum(normalized_price) FROM normalized_prices GROUP BY order_id
"#,
).unwrap();
let query = parse(r#"
WITH order_avg_price (order_id, avg_price) AS (SELECT order_id, avg(price) AS avg_price FROM item_table GROUP BY order_id),
normalized_prices AS (SELECT order_avg_price.order_id, (item_table.price/(0.1+order_avg_price.avg_price)) AS normalized_price
FROM item_table JOIN order_avg_price ON item_table.order_id=order_avg_price.order_id)
SELECT order_id, sum(normalized_price) FROM normalized_prices GROUP BY order_id
"#,
).unwrap();
let synthetic_data = SyntheticData::new(Hierarchy::from([
(vec!["item_table"], Identifier::from("item_table")),
(vec!["order_table"], Identifier::from("order_table")),
Expand Down
2 changes: 1 addition & 1 deletion tests/integration.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
//! # Integration tests
//!
//! Various queries are tested against their compiled to Relation + decompiled counterpart.
//! Various queries are tested against their version rewriten to Relation + re-rewriten.
use colored::Colorize;
use itertools::Itertools;
Expand Down

0 comments on commit 4996a28

Please sign in to comment.