Skip to content

Commit

Permalink
merge with main
Browse files Browse the repository at this point in the history
  • Loading branch information
victoria de sainte agathe committed Dec 8, 2023
2 parents ad4af36 + bf3ea48 commit 6b6eb30
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 10 deletions.
9 changes: 6 additions & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,18 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]
## Added
- implemented row protection [#215](https://github.com/Qrlew/qrlew/issues/215)

## [0.5.4] - 2023-12-05
- implemented `STD` and `VAR`aggregations in the dp rewritting [#205](https://github.com/Qrlew/qrlew/issues/205)
- `Expr::filter_by_function`: if the filtered datatype cannot be determined, keep the original data [#209](https://github.com/Qrlew/qrlew/issues/209)
## Added
- implemented Public -> Synthetic rewritting rule for table [#209](https://github.com/Qrlew/qrlew/issues/209)

## [0.5.3] - 2023-12-02
## Changed
- some cleaning in the translation of Expr -> ast::Expr [#204](https://github.com/Qrlew/qrlew/issues/204)
- `Expr::filter_by_function`: if the filtered datatype cannot be determined, keep the original data [#209](https://github.com/Qrlew/qrlew/issues/209)
## Added
- implemented Public -> Synthetic rewritting rule for table [#209](https://github.com/Qrlew/qrlew/issues/209)
- implemented `BETWEEN`, `IS TRUE`, `IS FALSE`, `IS NULL`, `CHOOSE`, `LIKE` and `ILIKE` [#203](https://github.com/Qrlew/qrlew/issues/203)
- implemented `DAYNAME`, `FROM_UNIXTIME`, `UNIX_TIMESTAMP`, `DATETIME_DIFF`, `QUARTER` and `DATE_FORMAT` [#202](https://github.com/Qrlew/qrlew/issues/202)
- implemented `CURRENT_DATE`, `CURRENT_TIME`, `CURRENT_TIMESTAMP` and `EXTRACT(datepart FROM col)` [#200](https://github.com/Qrlew/qrlew/issues/200)
Expand Down
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
authors = ["Nicolas Grislain <ng@sarus.tech>"]
name = "qrlew"
version = "0.5.3"
version = "0.5.4"
edition = "2021"
description = "Sarus Qrlew Engine"
documentation = "https://docs.rs/qrlew"
Expand Down
82 changes: 76 additions & 6 deletions src/differential_privacy/aggregates.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,8 @@ impl PUPRelation {
let one_col = format!("_ONE_{}", col_name);
let sum_col = format!("_SUM_{}", col_name);
let count_col = format!("_COUNT_{}", col_name);
let square_col = format!("_SQUARE_{}", col_name);
let sum_square_col = format!("_SUM_{}", square_col);
match aggregate.aggregate() {
aggregate::Aggregate::First => {
assert!(group_by_names.contains(&col_name.as_str()));
Expand Down Expand Up @@ -179,8 +181,56 @@ impl PUPRelation {
sums.push((sum_col.clone(), col_name));
output_b = output_b.with((name, Expr::col(sum_col)));
}
aggregate::Aggregate::Std => todo!(),
aggregate::Aggregate::Var => todo!(),
aggregate::Aggregate::Std => {
input_b = input_b
.with((col_name.as_str(), Expr::col(col_name.as_str())))
.with((square_col.as_str(), Expr::pow(Expr::col(col_name.as_str()), Expr::val(2))))
.with((one_col.as_str(), Expr::val(1.)));
sums.push((count_col.clone(), one_col));
sums.push((sum_col.clone(), col_name));
sums.push((sum_square_col.clone(), square_col));
output_b = output_b.with((
name,
Expr::sqrt(Expr::greatest(
Expr::val(0.),
Expr::minus(
Expr::divide(
Expr::col(sum_square_col),
Expr::greatest(Expr::val(1.), Expr::col(count_col.clone())),
),
Expr::divide(
Expr::col(sum_col),
Expr::greatest(Expr::val(1.), Expr::col(count_col)),
),
)
))
))
}
aggregate::Aggregate::Var => {
input_b = input_b
.with((col_name.as_str(), Expr::col(col_name.as_str())))
.with((square_col.as_str(), Expr::pow(Expr::col(col_name.as_str()), Expr::val(2))))
.with((one_col.as_str(), Expr::val(1.)));
sums.push((count_col.clone(), one_col));
sums.push((sum_col.clone(), col_name));
sums.push((sum_square_col.clone(), square_col));
output_b = output_b.with((
name,
Expr::greatest(
Expr::val(0.),
Expr::minus(
Expr::divide(
Expr::col(sum_square_col),
Expr::greatest(Expr::val(1.), Expr::col(count_col.clone())),
),
Expr::divide(
Expr::col(sum_col),
Expr::greatest(Expr::val(1.), Expr::col(count_col)),
),
)
)
))
}
_ => (),
}
(input_b, sums, output_b)
Expand Down Expand Up @@ -531,6 +581,8 @@ mod tests {
("count_price".to_string(), AggregateColumn::count("price")),
("sum_price".to_string(), AggregateColumn::sum("price")),
("avg_price".to_string(), AggregateColumn::mean("price")),
("var_price".to_string(), AggregateColumn::var("price")),
("std_price".to_string(), AggregateColumn::std("price")),
],
vec![],
pup_table.deref().clone().into(),
Expand All @@ -542,17 +594,19 @@ mod tests {
.differentially_private_aggregates(epsilon, delta)
.unwrap();
dp_relation.display_dot().unwrap();
assert_eq!(dp_relation.schema().len(), 3);
assert_eq!(dp_relation.schema().len(), 5);
println!("data_type = {}", dp_relation.data_type());
assert!(dp_relation
.data_type()
.is_subset_of(&DataType::structured(vec![
("count_price", DataType::float()),
("sum_price", DataType::float()),
("avg_price", DataType::float()),
("var_price", DataType::float_min(0.)),
("std_price", DataType::float_min(0.)),
])));

let query: &str = &ast::Query::from(&relation).to_string();
println!("{query}");
println!("\n{query}");
_ = database.query(query).unwrap();
}

Expand Down Expand Up @@ -590,6 +644,8 @@ mod tests {
("count_price".to_string(), AggregateColumn::count("price")),
("sum_price".to_string(), AggregateColumn::sum("price")),
("avg_price".to_string(), AggregateColumn::mean("price")),
("var_price".to_string(), AggregateColumn::var("price")),
("std_price".to_string(), AggregateColumn::std("price")),
],
vec![expr!(item)],
pup_table.deref().clone().into(),
Expand All @@ -601,13 +657,15 @@ mod tests {
.differentially_private_aggregates(epsilon, delta)
.unwrap();
dp_relation.display_dot().unwrap();
assert_eq!(dp_relation.schema().len(), 3);
assert_eq!(dp_relation.schema().len(), 5);
assert!(dp_relation
.data_type()
.is_subset_of(&DataType::structured(vec![
("count_price", DataType::float()),
("sum_price", DataType::float()),
("avg_price", DataType::float()),
("var_price", DataType::float_min(0.)),
("std_price", DataType::float_min(0.)),
])));

let query: &str = &ast::Query::from(&relation).to_string();
Expand Down Expand Up @@ -996,6 +1054,9 @@ mod tests {
.with(("sum_distinct_a", AggregateColumn::sum_distinct("a")))
.with(("count_b", AggregateColumn::count("b")))
.with(("count_distinct_b", AggregateColumn::count_distinct("b")))
.with(("avg_distinct_b", AggregateColumn::mean_distinct("b")))
.with(("var_distinct_b", AggregateColumn::var_distinct("b")))
.with(("std_distinct_b", AggregateColumn::std_distinct("b")))
.build();
let dp_relation = reduce.differentially_private_aggregates(epsilon.clone(), delta.clone()).unwrap();
//dp_relation.relation().display_dot().unwrap();
Expand All @@ -1006,6 +1067,9 @@ mod tests {
("sum_distinct_a", DataType::float_interval(-2000., 2000.)),
("count_b", DataType::float_interval(0., 1000.)),
("count_distinct_b", DataType::float_interval(0., 1000.)),
("avg_distinct_b", DataType::float_interval(0., 10000.)),
("var_distinct_b", DataType::float_interval(0., 100000.)),
("std_distinct_b", DataType::float_interval(0., 316.22776601683796)),
])
);

Expand All @@ -1017,6 +1081,9 @@ mod tests {
.with(("count_b", AggregateColumn::count("b")))
.with(("count_distinct_b", AggregateColumn::count_distinct("b")))
.with(("my_c", AggregateColumn::first("c")))
.with(("avg_distinct_b", AggregateColumn::mean_distinct("b")))
.with(("var_distinct_b", AggregateColumn::var_distinct("b")))
.with(("std_distinct_b", AggregateColumn::std_distinct("b")))
.group_by(expr!(c))
.build();
let dp_relation = reduce.differentially_private_aggregates(epsilon.clone(), delta.clone()).unwrap();
Expand All @@ -1029,6 +1096,9 @@ mod tests {
("count_b", DataType::float_interval(0., 1000.)),
("count_distinct_b", DataType::float_interval(0., 1000.)),
("my_c", DataType::float_interval(10., 20.)),
("avg_distinct_b", DataType::float_interval(0., 10000.)),
("var_distinct_b", DataType::float_interval(0., 100000.)),
("std_distinct_b", DataType::float_interval(0., 316.22776601683796)),
])
);
}
Expand Down

0 comments on commit 6b6eb30

Please sign in to comment.