Skip to content

Commit

Permalink
merge with main
Browse files Browse the repository at this point in the history
  • Loading branch information
victoria de sainte agathe committed Dec 8, 2023
2 parents fa37290 + 5afd113 commit 1169cdf
Show file tree
Hide file tree
Showing 10 changed files with 261 additions and 68 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]

## [0.5.5] - 2023-12-09
## Added
- implemented row privacy [#215](https://github.com/Qrlew/qrlew/issues/215)
- Cast DP COUNT as integer [#217](https://github.com/Qrlew/qrlew/issues/217)
- implemented distinct in the select clause [#216](https://github.com/Qrlew/qrlew/issues/216)

## [0.5.4] - 2023-12-05
- implemented `STD` and `VAR`aggregations in the dp rewritting [#205](https://github.com/Qrlew/qrlew/issues/205)
Expand Down
94 changes: 56 additions & 38 deletions src/data_type/function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1156,51 +1156,69 @@ Conversion function
pub fn cast(into: DataType) -> impl Function {
match into {
DataType::Text(t) if t == data_type::Text::full() => {
Pointwise::univariate(
DataType::Any,
DataType::text(),
|v| v.to_string().into()
)
Polymorphic::default()
.with(
Pointwise::univariate(
DataType::Any,
DataType::text(),
|v| v.to_string().into()
)
)
}
DataType::Float(f) if f == data_type::Float::full() => {
Pointwise::univariate(
DataType::text(),
DataType::float(),
|v| v.to_string().parse::<f64>().unwrap().into()
)
Polymorphic::from((
PartitionnedMonotonic::univariate(
data_type::Integer::default(),
|v| v as f64
),
Pointwise::univariate(
DataType::text(),
DataType::float(),
|v| v.to_string().parse::<f64>().unwrap().into()
)
))
}
DataType::Integer(i) if i == data_type::Integer::full() => {
Pointwise::univariate(
DataType::text(),
DataType::integer(),
|v| v.to_string().parse::<i64>().unwrap().into()
)
Polymorphic::from((
PartitionnedMonotonic::univariate(
data_type::Float::default(),
|v| v.round() as i64
),
Pointwise::univariate(
DataType::text(),
DataType::integer(),
|v| v.to_string().parse::<i64>().unwrap().into()
)
))
}
DataType::Boolean(b) if b == data_type::Boolean::full() => {
Pointwise::univariate(
DataType::text(),
DataType::boolean(),
|v| {
let true_list = vec![
"t".to_string(), "tr".to_string(), "tru".to_string(), "true".to_string(),
"y".to_string(), "ye".to_string(), "yes".to_string(),
"on".to_string(),
"1".to_string()
];
let false_list = vec![
"f".to_string(), "fa".to_string(), "fal".to_string(), "fals".to_string(), "false".to_string(),
"n".to_string(), "no".to_string(),
"off".to_string(),
"0".to_string()
];
if true_list.contains(&v.to_string().to_lowercase()) {
true.into()
} else if false_list.contains(&v.to_string().to_lowercase()) {
false.into()
} else {
panic!()
Polymorphic::default()
.with(
Pointwise::univariate(
DataType::text(),
DataType::boolean(),
|v| {
let true_list = vec![
"t".to_string(), "tr".to_string(), "tru".to_string(), "true".to_string(),
"y".to_string(), "ye".to_string(), "yes".to_string(),
"on".to_string(),
"1".to_string()
];
let false_list = vec![
"f".to_string(), "fa".to_string(), "fal".to_string(), "fals".to_string(), "false".to_string(),
"n".to_string(), "no".to_string(),
"off".to_string(),
"0".to_string()
];
if true_list.contains(&v.to_string().to_lowercase()) {
true.into()
} else if false_list.contains(&v.to_string().to_lowercase()) {
false.into()
} else {
panic!()
}
}
}
)
)
}
_ => todo!(),
Expand Down
12 changes: 6 additions & 6 deletions src/differential_privacy/aggregates.rs
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ impl PUPRelation {
aggregate::Aggregate::Count => {
input_b = input_b.with((one_col.as_str(), Expr::val(1.)));
sums.push((count_col.clone(), one_col));
output_b = output_b.with((name, Expr::col(count_col)));
output_b = output_b.with((name, Expr::cast_as_integer(Expr::col(count_col))));
}
aggregate::Aggregate::Sum => {
input_b = input_b.with((col_name.as_str(), Expr::col(col_name.as_str())));
Expand Down Expand Up @@ -1059,14 +1059,14 @@ mod tests {
.with(("std_distinct_b", AggregateColumn::std_distinct("b")))
.build();
let dp_relation = reduce.differentially_private_aggregates(epsilon.clone(), delta.clone()).unwrap();
//dp_relation.relation().display_dot().unwrap();
dp_relation.relation().display_dot().unwrap();
assert_eq!(
dp_relation.relation().data_type(),
DataType::structured([
("sum_a", DataType::float_interval(-2000., 2000.)),
("sum_distinct_a", DataType::float_interval(-2000., 2000.)),
("count_b", DataType::float_interval(0., 1000.)),
("count_distinct_b", DataType::float_interval(0., 1000.)),
("count_b", DataType::integer_interval(0, 1000)),
("count_distinct_b", DataType::integer_interval(0, 1000)),
("avg_distinct_b", DataType::float_interval(0., 10000.)),
("var_distinct_b", DataType::float_interval(0., 100000.)),
("std_distinct_b", DataType::float_interval(0., 316.22776601683796)),
Expand All @@ -1093,8 +1093,8 @@ mod tests {
DataType::structured([
("sum_a", DataType::float_interval(-2000., 2000.)),
("sum_distinct_a", DataType::float_interval(-2000., 2000.)),
("count_b", DataType::float_interval(0., 1000.)),
("count_distinct_b", DataType::float_interval(0., 1000.)),
("count_b", DataType::integer_interval(0, 1000)),
("count_distinct_b", DataType::integer_interval(0, 1000)),
("my_c", DataType::float_interval(10., 20.)),
("avg_distinct_b", DataType::float_interval(0., 10000.)),
("var_distinct_b", DataType::float_interval(0., 100000.)),
Expand Down
76 changes: 76 additions & 0 deletions src/expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2989,6 +2989,82 @@ mod tests {
);
}

#[test]
fn test_cast_float_integer() {
println!("float => integer");
let expression = Expr::cast_as_integer(
Expr::col("col1".to_string())
);
println!("expression = {}", expression);
println!("expression domain = {}", expression.domain());
println!("expression co domain = {}", expression.co_domain());
println!("expression data type = {}", expression.data_type());
let set = DataType::structured([
("col1", DataType::float_values([1.1, 1.9, 5.49])),
]);
println!(
"expression super image = {}",
expression.super_image(&set).unwrap()
);
assert_eq!(
expression.super_image(&set).unwrap(),
DataType::integer_values([1, 2, 5])
);
let set = DataType::structured([
("col1", DataType::float_interval(1.1, 5.49)),
]);
println!(
"expression super image = {}",
expression.super_image(&set).unwrap()
);
assert_eq!(
expression.super_image(&set).unwrap(),
DataType::integer_interval(1, 5)
);
let set = DataType::structured([
("col1", DataType::float_interval(1.1, 1.49)),
]);
println!(
"expression super image = {}",
expression.super_image(&set).unwrap()
);
assert_eq!(
expression.super_image(&set).unwrap(),
DataType::integer_value(1)
);

println!("integer => float");
let expression = Expr::cast_as_float(
Expr::col("col1".to_string())
);
println!("expression = {}", expression);
println!("expression domain = {}", expression.domain());
println!("expression co domain = {}", expression.co_domain());
println!("expression data type = {}", expression.data_type());
let set = DataType::structured([
("col1", DataType::integer_values([1, 4, 7])),
]);
println!(
"expression super image = {}",
expression.super_image(&set).unwrap()
);
assert_eq!(
expression.super_image(&set).unwrap(),
DataType::float_values([1., 4., 7.])
);
let set = DataType::structured([
("col1", DataType::integer_interval(1, 7)),
]);
println!(
"expression super image = {}",
expression.super_image(&set).unwrap()
);
assert_eq!(
expression.super_image(&set).unwrap(),
DataType::float_interval(1., 7.)
);
}

#[test]
fn test_cast_float_text() {
println!("float => text");
Expand Down
20 changes: 4 additions & 16 deletions src/privacy_unit_tracking/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,8 @@ impl Deref for PUPRelation {

impl Relation {
/// Add the field for the row privacy
pub fn add_row_privacy(self) -> Self {
let expr = Expr::random_id((1e6 as i64) * self.size().max().unwrap());
pub fn privacy_unit_row(self) -> Self {
let expr = Expr::random(namer::new_id(self.name()));
self.identity_with_field(
PrivacyUnit::per_row_privacy(),
expr,
Expand All @@ -139,7 +139,7 @@ impl Relation {
/// Add the field containing the privacy unit
pub fn add_privacy_unit(self, referred_field: &str) -> Self {
let relation = if referred_field == PrivacyUnit::per_row_privacy() {
self.add_row_privacy()
self.privacy_unit_row()
} else {
self
};
Expand Down Expand Up @@ -168,7 +168,7 @@ impl Relation {
Arc::new(
referred_relation.deref()
.clone()
.add_row_privacy()
.privacy_unit_row()
)
} else {
referred_relation
Expand Down Expand Up @@ -234,18 +234,6 @@ impl Relation {
}
}

impl Expr {
fn random_id(size: i64) -> Expr {
let n = namer::new_id(PrivacyUnit::per_row_privacy());
Expr::cast_as_integer(
Expr::multiply(
Expr::random(n),
Expr::val(size)
)
)
}
}

/// Implements the privacy tracking of various relations
pub struct PrivacyUnitTracking<'a> {
relations: &'a Hierarchy<Arc<Relation>>,
Expand Down
6 changes: 3 additions & 3 deletions src/privacy_unit_tracking/privacy_unit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ pub const PRIVACY_COLUMNS: usize = 2;
pub const PRIVACY_UNIT: &str = "_PRIVACY_UNIT_";
pub const PRIVACY_UNIT_DEFAULT: &str = "_PRIVACY_UNIT_DEFAULT_";
pub const PRIVACY_UNIT_WEIGHT: &str = "_PRIVACY_UNIT_WEIGHT_";
pub const PER_ROW_PRIVACY: &str = "_RANDOM_";
pub const PRIVACY_UNIT_ROW: &str = "_PRIVACY_UNIT_ROW_";

// A few utility objects

Expand Down Expand Up @@ -271,7 +271,7 @@ impl PrivacyUnit {
}

pub fn per_row_privacy() -> &'static str {
PER_ROW_PRIVACY
PRIVACY_UNIT_ROW
}

pub fn privacy_unit() -> &'static str {
Expand Down Expand Up @@ -416,7 +416,7 @@ mod tests {
),
("order_table", vec![("user_id", "user_table", "id")], "name"),
("user_table", vec![], "name"),
("product_table", vec![], PER_ROW_PRIVACY),
("product_table", vec![], PRIVACY_UNIT_ROW),
]);
println!("{}", privacy_unit);
}
Expand Down
5 changes: 4 additions & 1 deletion src/relation/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -500,7 +500,10 @@ impl Reduce {
aggregate_column
.super_image(&input_columns_data_type)
.unwrap(),
if has_one_group && aggregate_column.aggregate() == &Aggregate::First {
if aggregate_column.aggregate() == &Aggregate::First && (
has_one_group ||
input.schema().field(aggregate_column.column_name().unwrap()).unwrap().constraint() == Some(Constraint::Unique)
){
Some(Constraint::Unique)
} else {
None
Expand Down
Loading

0 comments on commit 1169cdf

Please sign in to comment.