Skip to content

Commit

Permalink
Merge pull request #216 from Qrlew/impl_distinct_in_select
Browse files Browse the repository at this point in the history
Impl distinct in select
  • Loading branch information
ngrislain authored Dec 8, 2023
2 parents bf3ea48 + ca42888 commit 93da6f0
Show file tree
Hide file tree
Showing 5 changed files with 115 additions and 5 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]
## Added
- implemented distinct in the select clause [#216](https://github.com/Qrlew/qrlew/issues/216)

## [0.5.4] - 2023-12-05
- implemented `STD` and `VAR`aggregations in the dp rewritting [#205](https://github.com/Qrlew/qrlew/issues/205)
- `Expr::filter_by_function`: if the filtered datatype cannot be determined, keep the original data [#209](https://github.com/Qrlew/qrlew/issues/209)
Expand Down
5 changes: 4 additions & 1 deletion src/relation/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -500,7 +500,10 @@ impl Reduce {
aggregate_column
.super_image(&input_columns_data_type)
.unwrap(),
if has_one_group && aggregate_column.aggregate() == &Aggregate::First {
if aggregate_column.aggregate() == &Aggregate::First && (
has_one_group ||
input.schema().field(aggregate_column.column_name().unwrap()).unwrap().constraint() == Some(Constraint::Unique)
){
Some(Constraint::Unique)
} else {
None
Expand Down
65 changes: 65 additions & 0 deletions src/relation/rewriting.rs
Original file line number Diff line number Diff line change
Expand Up @@ -663,6 +663,20 @@ impl Relation {
}
}

/// GROUP BY all the fields. This mimicks the sql `DISTINCT` in the
/// `SELECT` clause.
pub fn distinct(self) -> Relation {
let fields = self.schema()
.iter()
.map(|f| f.name().to_string())
.collect::<Vec<_>>();
Relation::reduce()
.input(self)
.with_iter(fields.iter().map(|f| (f, Expr::first(Expr::col(f)))))
.group_by_iter(fields.iter().map(|f| Expr::col(f)))
.build()
}

/// Build a relation whose output fields are to the aggregations in `aggregates`
/// applied on the UNIQUE values of the column `column` and grouped by the columns in `group_by`.
/// If `grouping_by` is not empty, we order by the grouping expressions.
Expand Down Expand Up @@ -1924,4 +1938,55 @@ mod tests {
names_aggs
);
}

#[test]
fn test_distinct() {
let table: Relation = Relation::table()
.name("table")
.schema(
Schema::builder()
.with(("a", DataType::integer_range(1..=10)))
.with(("b", DataType::integer_values([1, 2, 5, 6, 7, 8])))
.with(("c", DataType::integer_range(5..=20)))
.build(),
)
.build();

// Table
let distinct_relation = table.clone().distinct();
assert_eq!(distinct_relation.schema(), table.schema());
assert!(matches!(distinct_relation, Relation::Reduce(_)));
if let Relation::Reduce(red) = distinct_relation {
assert_eq!(red.group_by.len(), table.schema().len())
}

// Map
let relation: Relation = Relation::map()
.input(table.clone())
.with(expr!(a * b))
.with(("my_c", expr!(c)))
.build();
let distinct_relation = relation.clone().distinct();
assert_eq!(distinct_relation.schema(), relation.schema());
assert!(matches!(distinct_relation, Relation::Reduce(_)));
if let Relation::Reduce(red) = distinct_relation {
assert_eq!(red.group_by.len(), relation.schema().len())
}

// Reduce
let relation: Relation = Relation::reduce()
.input(table.clone())
.with(expr!(count(a)))
//.with_group_by_column("c")
.with(("twice_c", expr!(first(2*c))))
.group_by(expr!(c))
.build();
let distinct_relation = relation.clone().distinct();
distinct_relation.display_dot();
assert_eq!(distinct_relation.schema(), relation.schema());
assert!(matches!(distinct_relation, Relation::Reduce(_)));
if let Relation::Reduce(red) = distinct_relation {
assert_eq!(red.group_by.len(), relation.schema().len())
}
}
}
43 changes: 39 additions & 4 deletions src/sql/relation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,7 @@ impl<'a> VisitedQueryRelations<'a> {
group_by: &'a ast::GroupByExpr,
from: Arc<Relation>,
having: &'a Option<ast::Expr>,
distinct: &'a Option<ast::Distinct>,
) -> Result<Arc<Relation>> {
// Collect all expressions with their aliases
let mut named_exprs: Vec<(String, Expr)> = vec![];
Expand Down Expand Up @@ -372,6 +373,12 @@ impl<'a> VisitedQueryRelations<'a> {
.input(relation)
.build();
}
if let Some(distinct) = distinct {
if matches!(distinct, ast::Distinct::On(_)) {
return Err(Error::other("DISTINCT IN is not supported"));
}
relation = relation.distinct()
}
Ok(Arc::new(relation))
}

Expand All @@ -393,9 +400,6 @@ impl<'a> VisitedQueryRelations<'a> {
named_window,
qualify,
} = select;
if distinct.is_some() {
return Err(Error::other("DISTINCT is not supported"));
}
if top.is_some() {
return Err(Error::other("TOP is not supported"));
}
Expand Down Expand Up @@ -428,6 +432,7 @@ impl<'a> VisitedQueryRelations<'a> {
group_by,
from,
having,
distinct
)?;
Ok(RelationWithColumns::new(relation, columns))
}
Expand Down Expand Up @@ -1111,13 +1116,13 @@ mod tests {
&Hierarchy::from([(["schema", "table_1"], Arc::new(table_1))]),
))
.unwrap();
relation.display_dot().unwrap();
println!("relation = {relation}");
assert_eq!(
relation.data_type(),
DataType::structured(vec![("my_sum", DataType::float_interval(0., 1000.))])
);

//relation.display_dot().unwrap();
let q = ast::Query::from(&relation);
println!("query = {q}");

Expand Down Expand Up @@ -1200,4 +1205,34 @@ mod tests {
DataType::structured(vec![("my_sum", DataType::float().try_empty().unwrap())])
);
}

#[test]
fn test_distinct_in_select() {
let query = parse("SELECT DISTINCT a, b FROM table_1;").unwrap();
let schema_1: Schema = vec![
("a", DataType::integer_interval(0, 10)),
("b", DataType::float_interval(0., 10.)),
]
.into_iter()
.collect();
let table_1 = Relation::table()
.name("table_1")
.schema(schema_1.clone())
.size(100)
.build();
let relation = Relation::try_from(QueryWithRelations::new(
&query,
&Hierarchy::from([(["schema", "table_1"], Arc::new(table_1))]),
))
.unwrap();
relation.display_dot().unwrap();
println!("relation = {relation}");
assert_eq!(
relation.data_type(),
DataType::structured(vec![
("a", DataType::integer_interval(0, 10)),
("b", DataType::float_interval(0., 10.)),
])
);
}
}
4 changes: 4 additions & 0 deletions tests/integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,10 @@ const QUERIES: &[&str] = &[
// Some string functions
"SELECT UPPER(z) FROM table_2 LIMIT 5",
"SELECT LOWER(z) FROM table_2 LIMIT 5",
// distinct
"SELECT DISTINCT COUNT(*) FROM table_1 GROUP BY d",
"SELECT DISTINCt c, d FROM table_1",
"SELECT c, COUNT(DISTINCT d) AS count_d, SUM(DISTINCT d) AS sum_d FROM table_1 GROUP BY c ORDER BY c"
];

#[cfg(feature = "sqlite")]
Expand Down

0 comments on commit 93da6f0

Please sign in to comment.