From a3ea372ebbb777c0d98b0484c876bb78f44a782c Mon Sep 17 00:00:00 2001 From: victoria de sainte agathe Date: Fri, 15 Sep 2023 09:19:17 +0200 Subject: [PATCH] fmt --- src/differential_privacy/mod.rs | 116 ++++++++++++------ .../protect_grouping_keys.rs | 2 +- src/expr/identifier.rs | 4 +- src/expr/mod.rs | 6 +- src/expr/split.rs | 85 ++++++++----- src/hierarchy.rs | 5 +- src/protection/mod.rs | 2 +- src/relation/builder.rs | 10 +- src/relation/mod.rs | 35 ++++-- src/sampling_adjustment/mod.rs | 6 +- 10 files changed, 181 insertions(+), 90 deletions(-) diff --git a/src/differential_privacy/mod.rs b/src/differential_privacy/mod.rs index 5efb3198..071cd8f1 100644 --- a/src/differential_privacy/mod.rs +++ b/src/differential_privacy/mod.rs @@ -7,15 +7,14 @@ pub mod mechanisms; pub mod protect_grouping_keys; use crate::{ - Ready, + builder::With, data_type::DataTyped, - expr::{self, aggregate, Expr, AggregateColumn}, - hierarchy::Hierarchy, - relation::{field::Field, transforms, Map, Reduce, Relation, Variant as _}, - DataType, display::Dot, + expr::{self, aggregate, AggregateColumn, Expr}, + hierarchy::Hierarchy, protection::PEPRelation, - builder::With, + relation::{field::Field, transforms, Map, Reduce, Relation, Variant as _}, + DataType, Ready, }; use std::collections::{HashMap, HashSet}; use std::ops::Deref; @@ -75,7 +74,6 @@ impl Deref for DPRelation { } } - impl Field { pub fn clipping_value(self, multiplicity: i64) -> f64 { match self.data_type() { @@ -107,16 +105,27 @@ impl PEPRelation { // } /// Compile a protected Relation into DP - pub fn dp_compile(self, epsilon: f64, delta: f64) -> Result {// Return a DP relation + pub fn dp_compile(self, epsilon: f64, delta: f64) -> Result { + // Return a DP relation let protected_entity_id = self.protected_entity_id().to_string(); let protected_entity_weight = self.protected_entity_weight().to_string(); match Relation::from(self) { Relation::Map(map) => { let dp_input = PEPRelation(map.input().clone()).dp_compile(epsilon, delta)?; - Ok(DPRelation(Map::builder().with(map).input(Relation::from(dp_input)).build())) - }, - Relation::Reduce(reduce) => reduce.dp_compile_sums(&protected_entity_id, &protected_entity_weight, epsilon, delta), - relation => Err(Error::invalid_relation(relation)) + Ok(DPRelation( + Map::builder() + .with(map) + .input(Relation::from(dp_input)) + .build(), + )) + } + Relation::Reduce(reduce) => reduce.dp_compile_sums( + &protected_entity_id, + &protected_entity_weight, + epsilon, + delta, + ), + relation => Err(Error::invalid_relation(relation)), } } } @@ -125,7 +134,13 @@ impl PEPRelation { */ impl Reduce { /// DP compile the sums - fn dp_compile_sums(self, protected_entity_id: &str, protected_entity_weight: &str, epsilon: f64, delta: f64) -> Result { + fn dp_compile_sums( + self, + protected_entity_id: &str, + protected_entity_weight: &str, + epsilon: f64, + delta: f64, + ) -> Result { // Collect groups let mut input_entities: Option<&str> = None; let mut input_groups: HashSet<&str> = self.group_by_names().into_iter().collect(); @@ -140,21 +155,28 @@ impl Reduce { // remove pe group input_groups.remove(&input_name); input_entities = Some(input_name); - } else if aggregate.aggregate() == &aggregate::Aggregate::Sum && name != protected_entity_weight {// add aggregate + } else if aggregate.aggregate() == &aggregate::Aggregate::Sum + && name != protected_entity_weight + { + // add aggregate let input_data_type = self.input().schema()[input_name].data_type(); let absolute_bound = input_data_type.absolute_upper_bound().unwrap_or(1.0); input_values_bound.push((input_name, absolute_bound)); } - }; + } // Check that groups are public - if !input_groups.iter().all(|e| match self.input().schema()[*e].data_type() {// TODO improve this - DataType::Boolean(b) if b.all_values() => true, - DataType::Integer(i) if i.all_values() => true, - DataType::Enum(e) => true, - DataType::Float(f) if f.all_values() => true, - DataType::Text(t) if t.all_values() => true, - _ => false, - }) { + if !input_groups + .iter() + .all(|e| match self.input().schema()[*e].data_type() { + // TODO improve this + DataType::Boolean(b) if b.all_values() => true, + DataType::Integer(i) if i.all_values() => true, + DataType::Enum(e) => true, + DataType::Float(f) if f.all_values() => true, + DataType::Text(t) if t.all_values() => true, + _ => false, + }) + { //return Err(Error::invalid_relation(self)); println!("GROUPS SHOULD BE PUBLIC") }; @@ -165,40 +187,62 @@ impl Reduce { input_values_bound.iter().cloned().collect(), ); let noise_multiplier = 1.; // TODO set this properly - let dp_clipped_relation = clipped_relation.add_gaussian_noise(input_values_bound.into_iter().map(|(name, bound)| (name,noise_multiplier*bound)).collect()); - let renamed_dp_clipped_relation = dp_clipped_relation.rename_fields(|n, e| names.get(n).unwrap_or(&n).to_string()); + let dp_clipped_relation = clipped_relation.add_gaussian_noise( + input_values_bound + .into_iter() + .map(|(name, bound)| (name, noise_multiplier * bound)) + .collect(), + ); + let renamed_dp_clipped_relation = + dp_clipped_relation.rename_fields(|n, e| names.get(n).unwrap_or(&n).to_string()); Ok(DPRelation(renamed_dp_clipped_relation)) } /// Rewrite aggregations as sums and compile sums - pub fn dp_compile(self, protected_entity_id: &str, protected_entity_weight: &str, epsilon: f64, delta: f64) -> Result { + pub fn dp_compile( + self, + protected_entity_id: &str, + protected_entity_weight: &str, + epsilon: f64, + delta: f64, + ) -> Result { let mut output = Map::builder(); let mut sums = Reduce::builder(); // Add aggregate colums for (name, aggregate) in self.named_aggregates().into_iter() { match aggregate.aggregate() { aggregate::Aggregate::First => { - sums = sums.with((aggregate.column_name()?, AggregateColumn::col(aggregate.column_name()?))); - }, + sums = sums.with(( + aggregate.column_name()?, + AggregateColumn::col(aggregate.column_name()?), + )); + } aggregate::Aggregate::Mean => { let sum_col = &format!("_SUM_{}", aggregate.column_name()?); let count_col = &format!("_COUNT_{}", aggregate.column_name()?); sums = sums .with((count_col, Expr::sum(Expr::val(1.)))) .with((sum_col, Expr::sum(Expr::col(aggregate.column_name()?)))); - output = output - .with((name, Expr::divide(Expr::col(sum_col), Expr::greatest(Expr::val(1.), Expr::col(count_col))))) - }, + output = output.with(( + name, + Expr::divide( + Expr::col(sum_col), + Expr::greatest(Expr::val(1.), Expr::col(count_col)), + ), + )) + } aggregate::Aggregate::Count => { let count_col = &format!("_COUNT_{}", aggregate.column_name()?); sums = sums.with((count_col, Expr::sum(Expr::val(1.)))); output = output.with((name, Expr::col(count_col))); - }, - aggregate::Aggregate::Sum if aggregate.column_name()? != protected_entity_weight => { + } + aggregate::Aggregate::Sum + if aggregate.column_name()? != protected_entity_weight => + { let sum_col = &format!("_SUM_{}", aggregate.column_name()?); sums = sums.with((sum_col, Expr::sum(Expr::col(aggregate.column_name()?)))); output = output.with((name, Expr::col(sum_col))); - }, + } aggregate::Aggregate::Std => todo!(), aggregate::Aggregate::Var => todo!(), _ => (), @@ -206,7 +250,9 @@ impl Reduce { } sums = sums.group_by_iter(self.group_by().iter().cloned()); let sums: Reduce = sums.input(self.input().clone()).build(); - let dp_sums: Relation = sums.dp_compile_sums(protected_entity_id, protected_entity_weight, epsilon, delta)?.into(); + let dp_sums: Relation = sums + .dp_compile_sums(protected_entity_id, protected_entity_weight, epsilon, delta)? + .into(); Ok(DPRelation(output.input(dp_sums).build())) } } diff --git a/src/differential_privacy/protect_grouping_keys.rs b/src/differential_privacy/protect_grouping_keys.rs index e948ab35..d07a0b50 100644 --- a/src/differential_privacy/protect_grouping_keys.rs +++ b/src/differential_privacy/protect_grouping_keys.rs @@ -207,7 +207,7 @@ impl Relation { #[cfg(test)] mod tests { use super::*; - use crate::{display::Dot, relation::Schema, expr::AggregateColumn}; + use crate::{display::Dot, expr::AggregateColumn, relation::Schema}; use std::rc::Rc; #[test] diff --git a/src/expr/identifier.rs b/src/expr/identifier.rs index 0489b7b3..4160ad33 100644 --- a/src/expr/identifier.rs +++ b/src/expr/identifier.rs @@ -18,7 +18,7 @@ impl Identifier { pub fn head(&self) -> Result<&str> { self.0.get(0).map_or_else( || Err(Error::invalid_expression("Identifier too short")), - |h| Ok(h.as_str()) + |h| Ok(h.as_str()), ) } @@ -36,7 +36,7 @@ impl Identifier { pub fn last(&self) -> Result<&str> { self.0.last().map_or_else( || Err(Error::invalid_expression("Identifier too short")), - |h| Ok(h.as_str()) + |h| Ok(h.as_str()), ) } diff --git a/src/expr/mod.rs b/src/expr/mod.rs index 93729475..bdd2f51f 100644 --- a/src/expr/mod.rs +++ b/src/expr/mod.rs @@ -22,7 +22,7 @@ use std::{ collections::BTreeMap, convert::identity, error, fmt, hash, - ops::{Add, BitAnd, BitOr, BitXor, Div, Mul, Neg, Not, Rem, Sub, Deref}, + ops::{Add, BitAnd, BitOr, BitXor, Deref, Div, Mul, Neg, Not, Rem, Sub}, rc::Rc, result, }; @@ -732,7 +732,7 @@ impl AggregateColumn { AggregateColumn { aggregate, column: column.clone(), - expr: Expr::Aggregate(Aggregate::new(aggregate, Rc::new(Expr::Column(column)))) + expr: Expr::Aggregate(Aggregate::new(aggregate, Rc::new(Expr::Column(column)))), } } /// Access aggregate @@ -782,7 +782,7 @@ impl TryFrom for AggregateColumn { } else { Err(Error::invalid_conversion(argument, "Column")) } - }, + } _ => Err(Error::invalid_conversion(value, "AggregateColumn")), } } diff --git a/src/expr/split.rs b/src/expr/split.rs index c44a6641..87d1e876 100644 --- a/src/expr/split.rs +++ b/src/expr/split.rs @@ -1,8 +1,8 @@ //! The splits with some improvements //! Each split has named Expr and anonymous exprs use super::{ - aggregate, function, visitor::Acceptor, Aggregate, Column, Expr, Function, Identifier, Value, - Visitor, AggregateColumn, + aggregate, function, visitor::Acceptor, Aggregate, AggregateColumn, Column, Expr, Function, + Identifier, Value, Visitor, }; use crate::{ namer::{self, FIELD}, @@ -32,7 +32,7 @@ impl Split { } pub fn reduce>(name: S, aggregate: AggregateColumn) -> Reduce { - Reduce::new(vec![(name.into(), aggregate)], vec![], None) + Reduce::new(vec![(name.into(), aggregate)], vec![], None) } pub fn group_by(expr: Expr) -> Reduce { @@ -170,19 +170,17 @@ impl Map { order_by, reduce, } = self; - let (named_aliases, aliased_expr): (Vec<(String, AggregateColumn)>, Vec<(String, Expr)>) = named_exprs - .into_iter() - .map(|(name, expr)| { - let alias = namer::name_from_content(FIELD, &expr); - ( + let (named_aliases, aliased_expr): (Vec<(String, AggregateColumn)>, Vec<(String, Expr)>) = + named_exprs + .into_iter() + .map(|(name, expr)| { + let alias = namer::name_from_content(FIELD, &expr); ( - name, - AggregateColumn::new(aggregate, alias.clone().into()), - ), - (alias, expr), - ) - }) - .unzip(); + (name, AggregateColumn::new(aggregate, alias.clone().into())), + (alias, expr), + ) + }) + .unzip(); Reduce::new( named_aliases, vec![], @@ -279,7 +277,7 @@ impl fmt::Display for Map { /// Concatenate two Reduce split into one impl And for Map { type Product = Self; - + fn and(self, other: Self) -> Self::Product { match (self.reduce, other.reduce) { (None, None) => Map::new( @@ -426,7 +424,11 @@ pub struct Reduce { } impl Reduce { - pub fn new(named_aggregates: Vec<(String, AggregateColumn)>, group_by: Vec, map: Option) -> Self { + pub fn new( + named_aggregates: Vec<(String, AggregateColumn)>, + group_by: Vec, + map: Option, + ) -> Self { Reduce { named_aggregates: named_aggregates.into_iter().unique().collect(), group_by: group_by.into_iter().unique().collect(), @@ -452,13 +454,14 @@ impl Reduce { group_by, map, } = self; - let (named_aliases, aliased_expr): (Vec<(String, Expr)>, Vec<(String, AggregateColumn)>) = named_aggregates - .into_iter() - .map(|(name, aggregate)| { - let alias = namer::name_from_content(FIELD, &aggregate); - ((name, Expr::col(alias.clone())), (alias.clone(), aggregate)) - }) - .unzip(); + let (named_aliases, aliased_expr): (Vec<(String, Expr)>, Vec<(String, AggregateColumn)>) = + named_aggregates + .into_iter() + .map(|(name, aggregate)| { + let alias = namer::name_from_content(FIELD, &aggregate); + ((name, Expr::col(alias.clone())), (alias.clone(), aggregate)) + }) + .unzip(); // If the reduce is empty, remove it if aliased_expr.is_empty() && group_by.is_empty() { Map::new(named_aliases, None, vec![], None) @@ -492,7 +495,11 @@ impl Reduce { pub fn map_last_map Map>(self, f: F) -> Self { match self.map { - Some(map) => Reduce::new(self.named_aggregates, self.group_by, Some(map.map_last_map(f))), + Some(map) => Reduce::new( + self.named_aggregates, + self.group_by, + Some(map.map_last_map(f)), + ), None => self, } } @@ -505,7 +512,11 @@ impl Reduce { self.group_by, Some(map.map_last_reduce(f)), ), - None => f(Reduce::new(self.named_aggregates, self.group_by, Some(*map))), + None => f(Reduce::new( + self.named_aggregates, + self.group_by, + Some(*map), + )), }, None => f(self), } @@ -576,7 +587,10 @@ impl And for Reduce { (map, group_by) }); Reduce::new( - named_aggregates.into_iter().chain(other.named_aggregates).collect(), + named_aggregates + .into_iter() + .chain(other.named_aggregates) + .collect(), group_by.into_iter().chain(other.group_by).collect(), Some(map), ) @@ -600,7 +614,10 @@ impl And for Reduce { (map, group_by) }); Reduce::new( - self.named_aggregates.into_iter().chain(named_aggregates).collect(), + self.named_aggregates + .into_iter() + .chain(named_aggregates) + .collect(), self.group_by.into_iter().chain(group_by).collect(), Some(map), ) @@ -655,7 +672,7 @@ impl And for Reduce { // Express matched sub-expressions as aggregates let matched: Vec<_> = matched .into_iter() - .map(|(n, e)| (n, AggregateColumn::try_from(e).unwrap()))// We know the expression is an Aggregate Column + .map(|(n, e)| (n, AggregateColumn::try_from(e).unwrap())) // We know the expression is an Aggregate Column .collect(); // Add matched sub-expressions ( @@ -797,7 +814,10 @@ mod tests { #[test] fn test_reduce() { let reduce = Reduce::new( - vec![("a".into(), expr!(count(x)).try_into().unwrap()), ("b".into(), expr!(sum(y)).try_into().unwrap())], + vec![ + ("a".into(), expr!(count(x)).try_into().unwrap()), + ("b".into(), expr!(sum(y)).try_into().unwrap()), + ], vec![], None, ); @@ -880,7 +900,10 @@ mod tests { #[test] fn test_reduce_and_where() { let reduce = Reduce::new( - vec![("a".into(), expr!(count(x)).try_into().unwrap()), ("b".into(), expr!(sum(y)).try_into().unwrap())], + vec![ + ("a".into(), expr!(count(x)).try_into().unwrap()), + ("b".into(), expr!(sum(y)).try_into().unwrap()), + ], vec![], None, ); diff --git a/src/hierarchy.rs b/src/hierarchy.rs index f66e4111..8662120d 100644 --- a/src/hierarchy.rs +++ b/src/hierarchy.rs @@ -248,7 +248,10 @@ impl<'a, P: Path, T: Clone, I: IntoIterator> With for Hierarch } /// Index -impl Index

for Hierarchy where T: fmt::Display { +impl Index

for Hierarchy +where + T: fmt::Display, +{ type Output = T; fn index(&self, index: P) -> &Self::Output { diff --git a/src/protection/mod.rs b/src/protection/mod.rs index f7022dce..3ffae495 100644 --- a/src/protection/mod.rs +++ b/src/protection/mod.rs @@ -6,7 +6,7 @@ use crate::{ builder::{self, Ready, With}, display::Dot, - expr::{identifier::Identifier, Expr, aggregate::Aggregate, AggregateColumn}, + expr::{aggregate::Aggregate, identifier::Identifier, AggregateColumn, Expr}, hierarchy::{Hierarchy, Path}, relation::{Join, Map, Reduce, Relation, Set, Table, Values, Variant as _, Visitor}, visitor::Acceptor, diff --git a/src/relation/builder.rs b/src/relation/builder.rs index ef43f6eb..9de4d108 100644 --- a/src/relation/builder.rs +++ b/src/relation/builder.rs @@ -10,7 +10,7 @@ use crate::{ ast, builder::{Ready, With, WithIterator}, data_type::{Integer, Value}, - expr::{self, Expr, Identifier, Split, aggregate, AggregateColumn}, + expr::{self, aggregate, AggregateColumn, Expr, Identifier, Split}, namer::{self, FIELD, JOIN, MAP, REDUCE, SET}, And, }; @@ -233,7 +233,11 @@ impl MapBuilder { } /// Initialize a builder with filtered existing map - pub fn rename_with String>(self, map: Map, f: F) -> MapBuilder { + pub fn rename_with String>( + self, + map: Map, + f: F, + ) -> MapBuilder { let Map { name, projection, @@ -1042,7 +1046,7 @@ impl Ready for ValuesBuilder { #[cfg(test)] mod tests { use super::*; - use crate::{data_type::DataTyped, display::Dot, DataType, expr::aggregate::Aggregate}; + use crate::{data_type::DataTyped, display::Dot, expr::aggregate::Aggregate, DataType}; #[test] fn test_map_building() { diff --git a/src/relation/mod.rs b/src/relation/mod.rs index b7705a46..e467de57 100644 --- a/src/relation/mod.rs +++ b/src/relation/mod.rs @@ -11,7 +11,12 @@ pub mod schema; pub mod sql; pub mod transforms; -use std::{cmp, error, fmt, hash, ops::{Index, Deref}, rc::Rc, result}; +use std::{ + cmp, error, fmt, hash, + ops::{Deref, Index}, + rc::Rc, + result, +}; use colored::Colorize; use itertools::Itertools; @@ -22,7 +27,7 @@ use crate::{ self, function::Function, intervals::Bound, DataType, DataTyped, Integer, Struct, Value, Variant as _, }, - expr::{self, Expr, Identifier, Split, Aggregate, aggregate, Column, AggregateColumn}, + expr::{self, aggregate, Aggregate, AggregateColumn, Column, Expr, Identifier, Split}, hierarchy::Hierarchy, namer, visitor::{self, Acceptor, Dependencies, Visited}, @@ -479,7 +484,10 @@ impl Reduce { } /// Compute the schema and exprs of the reduce - fn schema_aggregate(named_aggregate_columns: Vec<(String, AggregateColumn)>, input: &Relation) -> (Schema, Vec) { + fn schema_aggregate( + named_aggregate_columns: Vec<(String, AggregateColumn)>, + input: &Relation, + ) -> (Schema, Vec) { // The input schema HAS to be a Struct let input_data_type: Struct = input.data_type().try_into().unwrap(); let input_columns_data_type: DataType = @@ -490,7 +498,9 @@ impl Reduce { ( Field::new( name, - aggregate_column.super_image(&input_columns_data_type).unwrap(), + aggregate_column + .super_image(&input_columns_data_type) + .unwrap(), None, ), aggregate_column, @@ -538,7 +548,11 @@ impl Reduce { } /// Get names and expressions pub fn named_aggregates(&self) -> Vec<(&str, &AggregateColumn)> { - self.schema.iter().map(|f| f.name()).zip(self.aggregate.iter()).collect() + self.schema + .iter() + .map(|f| f.name()) + .zip(self.aggregate.iter()) + .collect() } /// Return a new builder pub fn builder() -> ReduceBuilder { @@ -546,10 +560,13 @@ impl Reduce { } /// Get group_by_names pub fn group_by_names(&self) -> Vec<&str> { - self.group_by.iter().filter_map(|e| match e { - Expr::Column(col) => col.last().ok(), - _ => None, - }).collect() + self.group_by + .iter() + .filter_map(|e| match e { + Expr::Column(col) => col.last().ok(), + _ => None, + }) + .collect() } } diff --git a/src/sampling_adjustment/mod.rs b/src/sampling_adjustment/mod.rs index a211bcad..e4190a80 100644 --- a/src/sampling_adjustment/mod.rs +++ b/src/sampling_adjustment/mod.rs @@ -5,7 +5,7 @@ use crate::{ builder::{Ready, With}, - expr::{aggregate, identifier::Identifier, Expr, AggregateColumn}, + expr::{aggregate, identifier::Identifier, AggregateColumn, Expr}, hierarchy::Hierarchy, relation::{Join, Map, Reduce, Relation, Set, Table, Values, Variant as _, Visitor}, visitor::Acceptor, @@ -218,9 +218,7 @@ impl<'a, F: Fn(&Table) -> RelationWithWeight> Visitor<'a, RelationWithWeight> name, Expr::divide(Expr::col(name), Expr::col(CORRECTION_FACTOR)), ), - aggregate::Aggregate::First | aggregate::Aggregate::Last => { - (name, Expr::col(name)) - } + aggregate::Aggregate::First | aggregate::Aggregate::Last => (name, Expr::col(name)), // todo for aggregation function that we don't know how to correct yet such as MIN and MAX. _ => todo!(), })