Skip to content

Commit

Permalink
Merge pull request #277 from Qrlew/add_rewriting_rules
Browse files Browse the repository at this point in the history
Choose weight column for the Privacy Unit
  • Loading branch information
ngrislain authored May 27, 2024
2 parents c9c7104 + 22bd1a3 commit f4bfbc3
Show file tree
Hide file tree
Showing 8 changed files with 1,170 additions and 413 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]
## [0.9.18] - 2024-05-23
### Added
- use a column present in the schema as privacy unit weight
- added flag to PrivacyUnit to deactivate the md5 of the privacy_unit field
- added new rewriting rules: PUP -> PUP for the Reduce and (Pub, PUP) -> PUP, (PUP, Pub) -> PUP for the Join

## [0.9.17] - 2024-03-28
### Fixed
Expand Down
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
authors = ["Nicolas Grislain <ng@sarus.tech>"]
name = "qrlew"
version = "0.9.17"
version = "0.9.18"
edition = "2021"
description = "Sarus Qrlew Engine"
documentation = "https://docs.rs/qrlew"
Expand Down Expand Up @@ -37,7 +37,7 @@ sqlx = { version = "0.6.3", features = ["mssql", "runtime-tokio-native-tls", "of
tokio = { version = "1", features = ["full"], optional = true }

# bigquery dependencies
gcp-bigquery-client = { version = "0.18", optional = true }
gcp-bigquery-client = { version = "=0.18.0", optional = true }
wiremock = { version = "0.5.19", optional = true }
tempfile = { version = "3.6.0", optional = true }
yup-oauth2 = { version = "=8.3.2", optional = true } # 8.3.3 makes the compiling of gcp-bigquery-client fail
Expand Down
2 changes: 1 addition & 1 deletion src/dialect_translation/postgresql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ pub struct PostgreSqlTranslator;

impl RelationToQueryTranslator for PostgreSqlTranslator {
fn first(&self, expr: &expr::Expr) -> ast::Expr {
ast::Expr::from(expr)
self.expr(expr)
}

fn mean(&self, expr: &expr::Expr) -> ast::Expr {
Expand Down
211 changes: 157 additions & 54 deletions src/privacy_unit_tracking/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@ use crate::{
builder::{Ready, With, WithIterator},
expr::{AggregateColumn, Expr},
hierarchy::Hierarchy,
namer,
relation::{Join, Map, Reduce, Relation, Table, Values, Variant as _},
namer, display::Dot
};
pub use privacy_unit::{PrivacyUnit, PrivacyUnitPath};
use std::{error, fmt, ops::Deref, result, sync::Arc};
use std::{collections::HashMap, error, fmt, ops::Deref, result, sync::Arc};

#[derive(Debug, Clone)]
pub enum Error {
Expand Down Expand Up @@ -69,7 +69,6 @@ pub enum Strategy {
pub struct PupRelation(pub Relation);

impl PupRelation {

pub fn privacy_unit(&self) -> &str {
PrivacyUnit::privacy_unit()
}
Expand Down Expand Up @@ -131,10 +130,7 @@ impl Relation {
/// Add the field for the row privacy
pub fn privacy_unit_row(self) -> Self {
let expr = Expr::random(namer::new_id(self.name()));
self.identity_with_field(
PrivacyUnit::privacy_unit_row(),
expr,
)
self.identity_with_field(PrivacyUnit::privacy_unit_row(), expr)
}
/// Add the field containing the privacy unit
pub fn privacy_unit(self, referred_field: &str) -> Self {
Expand All @@ -143,36 +139,54 @@ impl Relation {
} else {
self
};
relation.identity_with_field(
PrivacyUnitPath::privacy_unit(),
Expr::col(referred_field),
)
relation.identity_with_field(PrivacyUnitPath::privacy_unit(), Expr::col(referred_field))
}
/// Create a Relation with the privacy unit weight field if the referred_weight_field is Some
/// and if the field is not already in the schema. If referred_weight_field is None
/// then a privacy unit weight with 1s is added to self.
pub fn with_privacy_unit_weight(self, referred_weight_field: Option<String>) -> Self {
let weight_col_already_exists = self
.schema()
.field(PrivacyUnit::privacy_unit_weight())
.is_ok();
if let Some(field_name) = referred_weight_field {
if weight_col_already_exists {
self
} else {
self.with_field(PrivacyUnit::privacy_unit_weight(), Expr::col(field_name))
}
} else {
self.with_field(PrivacyUnit::privacy_unit_weight(), Expr::val(1))
}
}
/// Add a field designated with a foreign relation and a field
pub fn with_referred_field(
/// Add fields designated with a foreign relation and a field
pub fn with_referred_fields(
self,
referring_id: String,
referred_relation: Arc<Relation>,
referred_id: String,
referred_field: String,
referred_field_name: String,
referred_fields: Vec<String>,
referred_fields_names: Vec<String>,
) -> Relation {
let left_size = referred_relation.schema().len();
let names: Vec<String> = self
.schema()
.iter()
.map(|f| f.name().to_string())
.filter(|name| name != &referred_field_name)
.filter(|name| !referred_fields_names.contains(name))
.collect();
let referred_relation =
if referred_fields.contains(&PrivacyUnit::privacy_unit_row().to_string()) {
Arc::new(referred_relation.deref().clone().privacy_unit_row())
} else {
referred_relation
};

let lookup_fields_to_names: HashMap<String, String> = referred_fields
.into_iter()
.zip(referred_fields_names)
.map(|(field, name)| (field, name))
.collect();
let referred_relation = if referred_field == PrivacyUnit::privacy_unit_row() {
Arc::new(
referred_relation.deref()
.clone()
.privacy_unit_row()
)
} else {
referred_relation
};
let join: Relation = Relation::join()
.inner(Expr::eq(
Expr::qcol(Join::right_name(), &referring_id),
Expand All @@ -194,9 +208,10 @@ impl Relation {
.skip(left_size)
.collect();
Relation::map()
.with_iter(left.into_iter().find_map(|(o, i)| {
(referred_field == i.name())
.then_some((referred_field_name.clone(), Expr::col(o.name())))
.with_iter(left.into_iter().filter_map(|(o, i)| {
lookup_fields_to_names
.get(i.name())
.and_then(|name| Some((name.clone(), Expr::col(o.name()))))
}))
.with_iter(right.into_iter().filter_map(|(o, i)| {
names
Expand All @@ -212,23 +227,26 @@ impl Relation {
relations: &Hierarchy<Arc<Relation>>,
field_path: PrivacyUnitPath,
) -> Relation {
let referred_weight_field = field_path.referred_weight_field().clone();
if field_path.path().is_empty() {
self.privacy_unit(field_path.referred_field())
.with_privacy_unit_weight(referred_weight_field)
} else {
field_path
.into_iter()
.fold(self, |relation, referred_field| {
relation.with_referred_field(
referred_field.referring_id,
.fold(self, |relation, referred_fields| {
relation.with_referred_fields(
referred_fields.referring_id,
relations
.get(&[referred_field.referred_relation.to_string()])
.get(&[referred_fields.referred_relation.to_string()])
.unwrap()
.clone(),
referred_field.referred_id,
referred_field.referred_field,
referred_field.referred_field_name,
referred_fields.referred_id,
referred_fields.referred_fields,
referred_fields.referred_fields_names,
)
})
.with_privacy_unit_weight(referred_weight_field)
}
}
}
Expand Down Expand Up @@ -260,18 +278,16 @@ impl<'a> PrivacyUnitTracking<'a> {
.iter()
.find(|(name, _field_path)| table.name() == self.relations[name.as_str()].name())
.ok_or(Error::no_private_table(table.path()))?;
PupRelation::try_from(
Relation::from(table.clone())
.with_field_path(self.relations, field_path.clone())
.map_fields(|name, expr| {
if name == PrivacyUnit::privacy_unit() {
Expr::md5(Expr::cast_as_text(expr))
} else {
expr
}
})
.insert_field(1, PrivacyUnit::privacy_unit_weight(), Expr::val(1)),
)
let relation = Relation::from(table.clone())
.with_field_path(self.relations, field_path.clone())
.map_fields(|name, expr| {
if name == PrivacyUnit::privacy_unit() && self.privacy_unit.hash_privacy_unit() {
Expr::md5(Expr::cast_as_text(expr))
} else {
expr
}
});
PupRelation::try_from(relation)
}

/// Map privacy tracking from another PUP relation
Expand Down Expand Up @@ -560,7 +576,7 @@ mod tests {
PrivacyUnitPath::from((vec![("user_id", "users", "id")], "id")),
);
relation.display_dot().unwrap();
assert!(relation.schema()[0].name() == PrivacyUnit::privacy_unit());
assert!(relation.schema()[1].name() == PrivacyUnit::privacy_unit());
// Link items to orders
let items = relations.get(&["items".to_string()]).unwrap().as_ref();
let relation = items.clone().with_field_path(
Expand All @@ -570,7 +586,7 @@ mod tests {
"name",
)),
);
assert!(relation.schema()[0].name() == PrivacyUnit::privacy_unit());
assert!(relation.schema()[1].name() == PrivacyUnit::privacy_unit());
// Produce the query
relation.display_dot().unwrap();
let query: &str = &ast::Query::from(&relation).to_string();
Expand All @@ -586,17 +602,20 @@ mod tests {
.join("\n")
);
let relation = relation.filter_fields(|n| n != "peid");
assert!(relation.schema()[0].name() != "peid");
assert!(relation.schema()[1].name() != "peid");

// with row privacy
// Link orders to users
let orders = relations.get(&["orders".to_string()]).unwrap().as_ref();
let relation = orders.clone().with_field_path(
&relations,
PrivacyUnitPath::from((vec![("user_id", "users", "id")], PrivacyUnit::privacy_unit_row())),
PrivacyUnitPath::from((
vec![("user_id", "users", "id")],
PrivacyUnit::privacy_unit_row(),
)),
);
relation.display_dot().unwrap();
assert!(relation.schema()[0].name() == PrivacyUnit::privacy_unit());
assert!(relation.schema()[1].name() == PrivacyUnit::privacy_unit());
// Link items to orders
let items = relations.get(&["items".to_string()]).unwrap().as_ref();
let relation = items.clone().with_field_path(
Expand All @@ -607,7 +626,7 @@ mod tests {
)),
);
relation.display_dot().unwrap();
assert!(relation.schema()[0].name() == PrivacyUnit::privacy_unit());
assert!(relation.schema()[1].name() == PrivacyUnit::privacy_unit());
// Produce the query
let query: &str = &ast::Query::from(&relation).to_string();
println!("{query}");
Expand Down Expand Up @@ -648,7 +667,7 @@ mod tests {
table.display_dot().unwrap();
println!("Schema privacy_tracked = {}", table.schema());
println!("Query privacy tracked = {}", ast::Query::from(&*table));
assert_eq!(table.schema()[0].name(), PrivacyUnit::privacy_unit())
assert_eq!(table.schema()[1].name(), PrivacyUnit::privacy_unit())
}

#[test]
Expand Down Expand Up @@ -725,6 +744,90 @@ mod tests {
.join("\n");
}

#[test]
fn test_join_privacy_tracking_without_hashing_pu() {
let table1: Table = Relation::table()
.schema(
Schema::empty()
.with((
"sarus_privacy_unit".to_string(),
DataType::optional(DataType::id()),
))
.with((
"sarus_weight".to_string(),
DataType::float_interval(0.0, 20.0),
))
.with(("id", DataType::id()))
.with(("a", DataType::float())),
)
.name("table1")
.size(10)
.build();
let table2: Table = Relation::table()
.schema(
Schema::empty()
.with((
"sarus_privacy_unit".to_string(),
DataType::optional(DataType::id()),
))
.with((
"sarus_weight".to_string(),
DataType::float_interval(0.0, 20.0),
))
.with(("b", DataType::integer())),
)
.name("table2")
.size(20)
.build();
let tables = vec![table1, table2];
let relations: Hierarchy<Arc<Relation>> = tables
.iter()
.map(|t| (Identifier::from(t.name()), Arc::new(t.clone().into()))) // Tables can be accessed from their name or path
.chain(
tables
.iter()
.map(|t| (t.path().clone(), Arc::new(t.clone().into()))),
)
.collect();

let privacy_unit = PrivacyUnit::from((
vec![
("table1", vec![], "sarus_privacy_unit"),
("table2", vec![("b", "table1", "id")], "sarus_privacy_unit"),
],
false,
));
let privacy_unit_tracking =
PrivacyUnitTracking::new(&relations, PrivacyUnit::from(privacy_unit), Strategy::Hard);
for table in tables.clone() {
let pup_table = privacy_unit_tracking
.table(&table.clone().try_into().unwrap())
.unwrap();
pup_table.deref().display_dot().unwrap();
}

let privacy_unit = PrivacyUnit::from((
vec![
("table1", vec![], "sarus_privacy_unit", "sarus_weight"),
(
"table2",
vec![("b", "table1", "id")],
"sarus_privacy_unit",
"sarus_weight",
),
],
false,
));
let privacy_unit_tracking =
PrivacyUnitTracking::new(&relations, PrivacyUnit::from(privacy_unit), Strategy::Hard);
for table in tables {
let pup_table = privacy_unit_tracking
.table(&table.clone().try_into().unwrap())
.unwrap();
pup_table.deref().display_dot().unwrap();
}
}

#[test]
fn test_auto_join_privacy_tracking() {
let mut database = postgresql::test_database();
Expand Down
Loading

0 comments on commit f4bfbc3

Please sign in to comment.