diff --git a/CHANGELOG.md b/CHANGELOG.md index 0467cc97..12f675cd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] + +## [0.5.5] - 2023-12-09 ## Added +- implemented row privacy [#215](https://github.com/Qrlew/qrlew/issues/215) - Cast DP COUNT as integer [#217](https://github.com/Qrlew/qrlew/issues/217) - implemented distinct in the select clause [#216](https://github.com/Qrlew/qrlew/issues/216) diff --git a/src/privacy_unit_tracking/mod.rs b/src/privacy_unit_tracking/mod.rs index c9acb3a7..d58dbb5f 100644 --- a/src/privacy_unit_tracking/mod.rs +++ b/src/privacy_unit_tracking/mod.rs @@ -12,6 +12,7 @@ use crate::{ expr::{AggregateColumn, Expr}, hierarchy::Hierarchy, relation::{Join, Map, Reduce, Relation, Table, Values, Variant as _}, + namer, display::Dot }; pub use privacy_unit::{PrivacyUnit, PrivacyUnitPath}; use std::{error, fmt, ops::Deref, result, sync::Arc}; @@ -68,6 +69,7 @@ pub enum Strategy { pub struct PUPRelation(pub Relation); impl PUPRelation { + pub fn privacy_unit(&self) -> &str { PrivacyUnit::privacy_unit() } @@ -126,6 +128,26 @@ impl Deref for PUPRelation { } impl Relation { + /// Add the field for the row privacy + pub fn privacy_unit_row(self) -> Self { + let expr = Expr::random(namer::new_id(self.name())); + self.identity_with_field( + PrivacyUnit::privacy_unit_row(), + expr, + ) + } + /// Add the field containing the privacy unit + pub fn privacy_unit(self, referred_field: &str) -> Self { + let relation = if referred_field == PrivacyUnit::privacy_unit_row() { + self.privacy_unit_row() + } else { + self + }; + relation.identity_with_field( + PrivacyUnitPath::privacy_unit(), + Expr::col(referred_field), + ) + } /// Add a field designated with a foreign relation and a field pub fn with_referred_field( self, @@ -142,6 +164,15 @@ impl Relation { .map(|f| f.name().to_string()) .filter(|name| name != &referred_field_name) .collect(); + let referred_relation = if referred_field == PrivacyUnit::privacy_unit_row() { + Arc::new( + referred_relation.deref() + .clone() + .privacy_unit_row() + ) + } else { + referred_relation + }; let join: Relation = Relation::join() .inner() .on(Expr::eq( @@ -183,11 +214,7 @@ impl Relation { field_path: PrivacyUnitPath, ) -> Relation { if field_path.path().is_empty() { - // TODO Remove this? - self.identity_with_field( - PrivacyUnitPath::privacy_unit(), - Expr::col(field_path.referred_field()), - ) + self.privacy_unit(field_path.referred_field()) } else { field_path .into_iter() @@ -528,14 +555,16 @@ mod tests { fn test_field_path() { let mut database = postgresql::test_database(); let relations = database.relations(); + // Link orders to users let orders = relations.get(&["orders".to_string()]).unwrap().as_ref(); let relation = orders.clone().with_field_path( &relations, PrivacyUnitPath::from((vec![("user_id", "users", "id")], "id")), ); + relation.display_dot().unwrap(); assert!(relation.schema()[0].name() == PrivacyUnit::privacy_unit()); - // // Link items to orders + // Link items to orders let items = relations.get(&["items".to_string()]).unwrap().as_ref(); let relation = items.clone().with_field_path( &relations, @@ -546,7 +575,43 @@ mod tests { ); assert!(relation.schema()[0].name() == PrivacyUnit::privacy_unit()); // Produce the query - relation.display_dot(); + relation.display_dot().unwrap(); + let query: &str = &ast::Query::from(&relation).to_string(); + println!("{query}"); + println!( + "{}\n{}", + format!("{query}").yellow(), + database + .query(query) + .unwrap() + .iter() + .map(ToString::to_string) + .join("\n") + ); + let relation = relation.filter_fields(|n| n != "peid"); + assert!(relation.schema()[0].name() != "peid"); + + // with row privacy + // Link orders to users + let orders = relations.get(&["orders".to_string()]).unwrap().as_ref(); + let relation = orders.clone().with_field_path( + &relations, + PrivacyUnitPath::from((vec![("user_id", "users", "id")], PrivacyUnit::privacy_unit_row())), + ); + relation.display_dot().unwrap(); + assert!(relation.schema()[0].name() == PrivacyUnit::privacy_unit()); + // Link items to orders + let items = relations.get(&["items".to_string()]).unwrap().as_ref(); + let relation = items.clone().with_field_path( + &relations, + PrivacyUnitPath::from(( + vec![("order_id", "orders", "id"), ("user_id", "users", "id")], + PrivacyUnit::privacy_unit_row(), + )), + ); + relation.display_dot().unwrap(); + assert!(relation.schema()[0].name() == PrivacyUnit::privacy_unit()); + // Produce the query let query: &str = &ast::Query::from(&relation).to_string(); println!("{query}"); println!( @@ -559,8 +624,6 @@ mod tests { .map(ToString::to_string) .join("\n") ); - // let relation = relation.filter_fields(|n| n != "peid"); - // assert!(relation.schema()[0].name() != "peid"); } #[test] diff --git a/src/privacy_unit_tracking/privacy_unit.rs b/src/privacy_unit_tracking/privacy_unit.rs index a2e01bda..af2430cb 100644 --- a/src/privacy_unit_tracking/privacy_unit.rs +++ b/src/privacy_unit_tracking/privacy_unit.rs @@ -7,6 +7,7 @@ pub const PRIVACY_COLUMNS: usize = 2; pub const PRIVACY_UNIT: &str = "_PRIVACY_UNIT_"; pub const PRIVACY_UNIT_DEFAULT: &str = "_PRIVACY_UNIT_DEFAULT_"; pub const PRIVACY_UNIT_WEIGHT: &str = "_PRIVACY_UNIT_WEIGHT_"; +pub const PRIVACY_UNIT_ROW: &str = "_PRIVACY_UNIT_ROW_"; // A few utility objects @@ -269,6 +270,10 @@ impl PrivacyUnit { PRIVACY_COLUMNS } + pub fn privacy_unit_row() -> &'static str { + PRIVACY_UNIT_ROW + } + pub fn privacy_unit() -> &'static str { PrivacyUnitPath::privacy_unit() } @@ -411,6 +416,7 @@ mod tests { ), ("order_table", vec![("user_id", "user_table", "id")], "name"), ("user_table", vec![], "name"), + ("product_table", vec![], PRIVACY_UNIT_ROW), ]); println!("{}", privacy_unit); } diff --git a/src/rewriting/mod.rs b/src/rewriting/mod.rs index 0683a7bd..cd0225f1 100644 --- a/src/rewriting/mod.rs +++ b/src/rewriting/mod.rs @@ -194,6 +194,43 @@ mod tests { ); } + #[test] + fn test_rewrite_with_differential_privacy_with_row_privacy() { + let database = postgresql::test_database(); + let relations = database.relations(); + let query = parse("SELECT order_id, sum(price) FROM item_table GROUP BY order_id").unwrap(); + let synthetic_data = SyntheticData::new(Hierarchy::from([ + (vec!["item_table"], Identifier::from("item_table")), + (vec!["order_table"], Identifier::from("order_table")), + (vec!["user_table"], Identifier::from("user_table")), + ])); + let privacy_unit = PrivacyUnit::from(vec![ + ( + "item_table", + vec![ + ("order_id", "order_table", "id"), + ("user_id", "user_table", "id"), + ], + PrivacyUnit::privacy_unit_row(), + ), + ("order_table", vec![("user_id", "user_table", "id")], PrivacyUnit::privacy_unit_row()), + ("user_table", vec![], PrivacyUnit::privacy_unit_row()), + ]); + let budget = Budget::new(1., 1e-3); + let relation = Relation::try_from(query.with(&relations)).unwrap(); + let relation_with_private_query = relation + .rewrite_with_differential_privacy(&relations, synthetic_data, privacy_unit, budget) + .unwrap(); + relation_with_private_query + .relation() + .display_dot() + .unwrap(); + println!( + "PrivateQuery = {}", + relation_with_private_query.private_query() + ); + } + #[test] fn test_rewrite_as_privacy_unit_preserving() { let database = postgresql::test_database(); @@ -231,6 +268,43 @@ mod tests { ); } + #[test] + fn test_rewrite_as_privacy_unit_preserving_with_row_privacy() { + let database = postgresql::test_database(); + let relations = database.relations(); + let query = parse("SELECT * FROM order_table").unwrap(); + let synthetic_data = SyntheticData::new(Hierarchy::from([ + (vec!["item_table"], Identifier::from("item_table")), + (vec!["order_table"], Identifier::from("order_table")), + (vec!["user_table"], Identifier::from("user_table")), + ])); + let privacy_unit = PrivacyUnit::from(vec![ + ( + "item_table", + vec![ + ("order_id", "order_table", "id"), + ("user_id", "user_table", "id"), + ], + PrivacyUnit::privacy_unit_row(), + ), + ("order_table", vec![("user_id", "user_table", "id")], PrivacyUnit::privacy_unit_row()), + ("user_table", vec![], PrivacyUnit::privacy_unit_row()), + ]); + let budget = Budget::new(1., 1e-3); + let relation = Relation::try_from(query.with(&relations)).unwrap(); + let relation_with_private_query = relation + .rewrite_as_privacy_unit_preserving(&relations, synthetic_data, privacy_unit, budget) + .unwrap(); + relation_with_private_query + .relation() + .display_dot() + .unwrap(); + println!( + "PrivateQuery = {}", + relation_with_private_query.private_query() + ); + } + #[test] fn test_retail() { let retail_transactions: Relation = Relation::table() diff --git a/src/rewriting/rewriting_rule.rs b/src/rewriting/rewriting_rule.rs index c8cb702a..625d3a14 100644 --- a/src/rewriting/rewriting_rule.rs +++ b/src/rewriting/rewriting_rule.rs @@ -13,8 +13,8 @@ use crate::{ privacy_unit_tracking::{privacy_unit::PrivacyUnit, PrivacyUnitTracking}, relation::{Join, Map, Reduce, Relation, Set, Table, Values, Variant as _}, rewriting::relation_with_attributes::RelationWithAttributes, - synthetic_data::{self, SyntheticData}, - visitor::{Acceptor, Dependencies, Visited, Visitor}, + synthetic_data::SyntheticData, + visitor::{Acceptor, Visited, Visitor}, }; /// A simple Property object to tag Relations properties