From 8a98dce9a4c03b9dba282785da236c2cff8f172b Mon Sep 17 00:00:00 2001 From: Andi Cuko Date: Mon, 19 Feb 2024 22:59:43 +0100 Subject: [PATCH 01/18] wip --- src/relation/rewriting.rs | 33 ++++++++-- src/sql/relation.rs | 132 +++++++++++++++++++++++++++++++++++--- 2 files changed, 149 insertions(+), 16 deletions(-) diff --git a/src/relation/rewriting.rs b/src/relation/rewriting.rs index d74adffb..c04480c0 100644 --- a/src/relation/rewriting.rs +++ b/src/relation/rewriting.rs @@ -208,14 +208,21 @@ impl Join { self, vec: Vec, columns: &Hierarchy, + preserve_input_names: bool, ) -> Relation { - let fields = self + + let fields_in_vec = self .field_inputs() .filter_map(|(name, id)| { let col = id.as_ref().last().unwrap(); if id.as_ref().first().unwrap().as_str() == LEFT_INPUT_NAME && vec.contains(col) { + let final_col_name = if preserve_input_names { + col.to_string() + } else { + name + }; Some(( - name, + final_col_name, Expr::coalesce( Expr::col(columns[[LEFT_INPUT_NAME, col]].as_ref().last().unwrap()), Expr::col(columns[[RIGHT_INPUT_NAME, col]].as_ref().last().unwrap()), @@ -224,12 +231,26 @@ impl Join { } else { None } - }) - .chain(self.field_inputs().filter_map(|(name, id)| { + }).collect::>(); + + let fields_not_in_vec = self + .field_inputs() + .filter_map(|(name, id)| { let col = id.as_ref().last().unwrap(); - (!vec.contains(col)).then_some((name.clone(), Expr::col(name))) - })) + let final_col_name = if preserve_input_names && columns.get(&[col.clone()]).is_some() { + col.to_string() + } else { + name.clone() + }; + (!vec.contains(col)).then_some((final_col_name, Expr::col(name))) + }) + .collect::>(); + + let fields = fields_in_vec + .into_iter() + .chain(fields_not_in_vec.into_iter()) .collect::>(); + Relation::map() .input(Relation::from(self)) .with_iter(fields) diff --git a/src/sql/relation.rs b/src/sql/relation.rs index 4dbfb4ae..8ba76770 100644 --- a/src/sql/relation.rs +++ b/src/sql/relation.rs @@ -251,6 +251,7 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, fn try_from_table_with_joins( &self, table_with_joins: &'a ast::TableWithJoins, + preserve_input_names: bool, ) -> Result { // Process the relation // Then the JOIN if needed @@ -276,6 +277,7 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, // &all_columns.filter_map(|i| Some(i.split_last().ok()?.0)),//TODO remove this &all_columns, )?; + // We build a Join let join: Join = Relation::join() .operator(operator) @@ -284,11 +286,13 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, .build(); // We collect column mapping inputs should map to new names (hence the inversion) - let new_columns: Hierarchy = - join.field_inputs().map(|(f, i)| (i, f.into())).collect(); - let composed_columns = all_columns.and_then(new_columns.clone()); + let join_columns: Hierarchy =join + .field_inputs() + .map(|(f, i)| (i, f.into())) + .collect(); + let composed_columns = all_columns.and_then(join_columns.clone()); - // If the join contraint is of type "USING" or "NATURAL", add a map to coalesce the duplicate columns + // If the join constraint is of type "USING" or "NATURAL", add a map to coalesce the duplicate columns let relation = match &ast_join.join_operator { ast::JoinOperator::Inner(ast::JoinConstraint::Using(v)) | ast::JoinOperator::LeftOuter(ast::JoinConstraint::Using(v)) @@ -296,7 +300,8 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, | ast::JoinOperator::FullOuter(ast::JoinConstraint::Using(v)) => { join.remove_duplicates_and_coalesce( v.into_iter().map(|id| id.value.to_string()).collect(), - &new_columns + &join_columns, + preserve_input_names ) }, ast::JoinOperator::Inner(ast::JoinConstraint::Natural) @@ -307,16 +312,31 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, .into_iter() .filter_map(|f| join.right().schema().field(f.name()).is_ok().then_some(f.name().to_string())) .collect(); - join.remove_duplicates_and_coalesce(v,&new_columns) + join.remove_duplicates_and_coalesce(v, &join_columns, preserve_input_names) }, ast::JoinOperator::LeftSemi(_) => todo!(), ast::JoinOperator::RightSemi(_) => todo!(), ast::JoinOperator::LeftAnti(_) => todo!(), ast::JoinOperator::RightAnti(_) => todo!(), - _ => Relation::from(join), + _ => join.remove_duplicates_and_coalesce(vec![], &join_columns, preserve_input_names), + }; + + //relation.display_dot().unwrap(); + let composed_columns = if preserve_input_names { + // join_columns + let join_to_original_columns: Hierarchy = join_columns + .iter() + .map(|(key, value)| { + let original_col_name = key.last().unwrap().as_str(); + let join_col_name = value.head().unwrap(); + (Identifier::from(join_col_name), original_col_name.into()) + }) + .collect(); + composed_columns.and_then(join_to_original_columns) + } else { + composed_columns }; - // We should compose hierarchies Ok(RelationWithColumns::new(Arc::new(relation), composed_columns)) }, ); @@ -327,11 +347,15 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, fn try_from_tables_with_joins( &self, tables_with_joins: &'a Vec, + preserve_input_names: bool ) -> Result { // TODO consider more tables // For now, only consider the first element // It should eventually be cross joined as described in: https://www.postgresql.org/docs/current/queries-table-expressions.html - self.try_from_table_with_joins(&tables_with_joins[0]) + self.try_from_table_with_joins( + &tables_with_joins[0], + preserve_input_names + ) } /// Build a relation from the @@ -528,7 +552,22 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, if qualify.is_some() { return Err(Error::other("QUALIFY is not supported")); } - let RelationWithColumns(from, columns) = self.try_from_tables_with_joins(from)?; + + // If projection contains a Wildcard (SELECT *) the table with joins should + // preserver columns names. + let RelationWithColumns(from, columns) = self.try_from_tables_with_joins( + from, + projection.contains( + &ast::SelectItem::Wildcard( + ast::WildcardAdditionalOptions { + opt_exclude: None, + opt_except: None, + opt_rename: None, + opt_replace: None + } + ) + ) + )?; let relation = self.try_from_select_items_selection_and_group_by( &columns.filter_map(|i| Some(i.split_last().ok()?.0)), projection, @@ -1407,6 +1446,79 @@ mod tests { .map(ToString::to_string); } + #[test] + fn test_select_all_with_joins() { + let mut database = postgresql::test_database(); + let relations = database.relations(); + + let query_str = r#" + WITH my_tab AS (SELECT * FROM user_table u JOIN order_table o USING (id)) + SELECT * FROM my_tab WHERE id > 50; + "#; + let query = parse(query_str).unwrap(); + let relation = Relation::try_from(QueryWithRelations::new( + &query, + &relations + )) + .unwrap(); + relation.display_dot().unwrap(); + let query: &str = &ast::Query::from(&relation).to_string(); + println!("{query}"); + _ = database + .query(query) + .unwrap() + .iter() + .map(ToString::to_string); + + let query_str = r#" + WITH my_tab AS (SELECT * FROM user_table u JOIN order_table o ON (u.id=o.user_id)) + SELECT * FROM my_tab WHERE user_id > 50; + "#; + let query = parse(query_str).unwrap(); + let relation = Relation::try_from(QueryWithRelations::new( + &query, + &relations + )) + .unwrap(); + // id becomes an ambiguous column since is present in both tables + assert!(relation.schema().field("id").is_err()); + relation.display_dot().unwrap(); + println!("relation = {relation}"); + let query: &str = &ast::Query::from(&relation).to_string(); + println!("{query}"); + _ = database + .query(query) + .unwrap() + .iter() + .map(ToString::to_string); + } + + + #[test] + fn test_fix_with_joins() { + let mut database = postgresql::test_database(); + let relations = database.relations(); + let query_str = r#" + WITH t1 AS (SELECT a,d FROM table_1), + t2 AS (SELECT * FROM table_2) + SELECT * FROM t1 INNER JOIN t2 ON t1.d = t2.x INNER JOIN table_2 ON t1.d=table_2.x ORDER BY t1.a, t2.x, t2.y, t2.z + "#; + let query = parse(query_str).unwrap(); + let relation = Relation::try_from(QueryWithRelations::new( + &query, + &relations + )) + .unwrap(); + relation.display_dot().unwrap(); + let query: &str = &ast::Query::from(&relation).to_string(); + println!("{query}"); + _ = database + .query(query) + .unwrap() + .iter() + .map(ToString::to_string); + } + #[test] fn test_distinct_in_select() { let query = parse("SELECT DISTINCT a, b FROM table_1;").unwrap(); From 0b483a8a35a802bb0673adcd1a84d0ce22b4ff86 Mon Sep 17 00:00:00 2001 From: Andi Cuko Date: Tue, 20 Feb 2024 09:31:24 +0100 Subject: [PATCH 02/18] ok --- src/sql/relation.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/sql/relation.rs b/src/sql/relation.rs index 8ba76770..35997d2c 100644 --- a/src/sql/relation.rs +++ b/src/sql/relation.rs @@ -292,6 +292,7 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, .collect(); let composed_columns = all_columns.and_then(join_columns.clone()); + //let preserve_input_names = false; // If the join constraint is of type "USING" or "NATURAL", add a map to coalesce the duplicate columns let relation = match &ast_join.join_operator { ast::JoinOperator::Inner(ast::JoinConstraint::Using(v)) @@ -329,13 +330,17 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, .map(|(key, value)| { let original_col_name = key.last().unwrap().as_str(); let join_col_name = value.head().unwrap(); - (Identifier::from(join_col_name), original_col_name.into()) + match join_columns.get(&[original_col_name.to_string()]) { + Some(_) => (Identifier::from(join_col_name), original_col_name.into()), + None => (Identifier::from(join_col_name), join_col_name.into()), + } }) .collect(); composed_columns.and_then(join_to_original_columns) } else { composed_columns }; + println!("COMPOSED NAMES:\n{composed_columns}"); Ok(RelationWithColumns::new(Arc::new(relation), composed_columns)) }, From b6e943375925aeb4c5a1f1965617910d8796ab50 Mon Sep 17 00:00:00 2001 From: Andi Cuko Date: Tue, 20 Feb 2024 09:53:44 +0100 Subject: [PATCH 03/18] clean --- src/relation/rewriting.rs | 4 +++- src/sql/relation.rs | 38 +++++--------------------------------- 2 files changed, 8 insertions(+), 34 deletions(-) diff --git a/src/relation/rewriting.rs b/src/relation/rewriting.rs index c04480c0..1f340a5a 100644 --- a/src/relation/rewriting.rs +++ b/src/relation/rewriting.rs @@ -203,7 +203,9 @@ impl Join { } /// Replace the duplicates fields specified in `columns` by their coalesce expression - /// Its mimicks teh behaviour of USING in SQL + /// It mimics the behavior of USING in SQL + /// If preserve_input_names is True, the fields of the resulting relation + /// will be named as in the JOINs inputs if they are not ambiguous. pub fn remove_duplicates_and_coalesce( self, vec: Vec, diff --git a/src/sql/relation.rs b/src/sql/relation.rs index 35997d2c..69678670 100644 --- a/src/sql/relation.rs +++ b/src/sql/relation.rs @@ -274,7 +274,6 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, let all_columns = left_columns.with(right_columns); let operator = self.try_from_join_operator_with_columns( &ast_join.join_operator, - // &all_columns.filter_map(|i| Some(i.split_last().ok()?.0)),//TODO remove this &all_columns, )?; @@ -292,7 +291,6 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, .collect(); let composed_columns = all_columns.and_then(join_columns.clone()); - //let preserve_input_names = false; // If the join constraint is of type "USING" or "NATURAL", add a map to coalesce the duplicate columns let relation = match &ast_join.join_operator { ast::JoinOperator::Inner(ast::JoinConstraint::Using(v)) @@ -322,10 +320,11 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, _ => join.remove_duplicates_and_coalesce(vec![], &join_columns, preserve_input_names), }; - //relation.display_dot().unwrap(); + let composed_columns = if preserve_input_names { - // join_columns - let join_to_original_columns: Hierarchy = join_columns + // relation fields are renamed to those of the join relation inputs + // if no name collision is generated. + let original_not_ambiguous_columns: Hierarchy = join_columns .iter() .map(|(key, value)| { let original_col_name = key.last().unwrap().as_str(); @@ -336,11 +335,10 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, } }) .collect(); - composed_columns.and_then(join_to_original_columns) + composed_columns.and_then(original_not_ambiguous_columns) } else { composed_columns }; - println!("COMPOSED NAMES:\n{composed_columns}"); Ok(RelationWithColumns::new(Arc::new(relation), composed_columns)) }, @@ -1498,32 +1496,6 @@ mod tests { .map(ToString::to_string); } - - #[test] - fn test_fix_with_joins() { - let mut database = postgresql::test_database(); - let relations = database.relations(); - let query_str = r#" - WITH t1 AS (SELECT a,d FROM table_1), - t2 AS (SELECT * FROM table_2) - SELECT * FROM t1 INNER JOIN t2 ON t1.d = t2.x INNER JOIN table_2 ON t1.d=table_2.x ORDER BY t1.a, t2.x, t2.y, t2.z - "#; - let query = parse(query_str).unwrap(); - let relation = Relation::try_from(QueryWithRelations::new( - &query, - &relations - )) - .unwrap(); - relation.display_dot().unwrap(); - let query: &str = &ast::Query::from(&relation).to_string(); - println!("{query}"); - _ = database - .query(query) - .unwrap() - .iter() - .map(ToString::to_string); - } - #[test] fn test_distinct_in_select() { let query = parse("SELECT DISTINCT a, b FROM table_1;").unwrap(); From dd24c34161733f927fc333b2d0c20f7bbff8662e Mon Sep 17 00:00:00 2001 From: Andi Cuko Date: Tue, 20 Feb 2024 10:20:28 +0100 Subject: [PATCH 04/18] version and changelog --- CHANGELOG.md | 3 +++ Cargo.toml | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2aeab867..80e9ed7a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +## [0.9.15] - 2024-02-20 ### Changed +- SELECT * with JOINs preserve the column names when there is no [#268](https://github.com/Qrlew/qrlew/pull/268) + ## [0.9.14] - 2024-01-30 ### Added diff --git a/Cargo.toml b/Cargo.toml index a855c3cf..e06bfe7e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Nicolas Grislain "] name = "qrlew" -version = "0.9.14" +version = "0.9.15" edition = "2021" description = "Sarus Qrlew Engine" documentation = "https://docs.rs/qrlew" From 5b1c5fd688b8a38a0d1969c1c51f92f04048e321 Mon Sep 17 00:00:00 2001 From: Andi Cuko Date: Tue, 20 Feb 2024 10:27:45 +0100 Subject: [PATCH 05/18] changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 80e9ed7a..1d5a4569 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ## [0.9.15] - 2024-02-20 ### Changed -- SELECT * with JOINs preserve the column names when there is no [#268](https://github.com/Qrlew/qrlew/pull/268) +- SELECT * with JOINs preserve the column names when there is no ambiguity [#268](https://github.com/Qrlew/qrlew/pull/268) ## [0.9.14] - 2024-01-30 From 59e21fed9e66032e3a64d80246ee068027754411 Mon Sep 17 00:00:00 2001 From: Andi Cuko Date: Fri, 1 Mar 2024 18:33:43 +0100 Subject: [PATCH 06/18] wip. debugging the test test_rewrite_with_differential_privacy --- src/relation/field.rs | 15 +++ src/relation/rewriting.rs | 38 ++----- src/rewriting/mod.rs | 39 +++---- src/sql/relation.rs | 229 +++++++++++++++++++++++++------------- 4 files changed, 201 insertions(+), 120 deletions(-) diff --git a/src/relation/field.rs b/src/relation/field.rs index 34392ded..6562f665 100644 --- a/src/relation/field.rs +++ b/src/relation/field.rs @@ -22,12 +22,19 @@ impl fmt::Display for Constraint { } } +#[derive(Clone, Debug, Hash, PartialEq, Eq)] +pub struct Properties { + pub underlying_type: String +} + /// A Field as in https://github.com/apache/arrow-datafusion/blob/5b23180cf75ea7155d7c35a40f224ce4d5ad7fb8/datafusion/src/logical_plan/dfschema.rs#L413 #[derive(Clone, Debug, Hash, PartialEq, Eq)] pub struct Field { name: String, data_type: DataType, constraint: Option, + // contains the type used in python + properties: Option, } impl Field { @@ -37,6 +44,14 @@ impl Field { name, data_type, constraint, + properties: None, + } + } + + pub fn with_properties(self, properties: Properties) -> Field { + Field { + properties: Some(properties), + ..self } } diff --git a/src/relation/rewriting.rs b/src/relation/rewriting.rs index 1f340a5a..16a95d76 100644 --- a/src/relation/rewriting.rs +++ b/src/relation/rewriting.rs @@ -1,6 +1,5 @@ //! A few transforms for relations //! - use super::{Join, Map, Reduce, Relation, Set, Table, Values, Variant as _}; use crate::{ builder::{Ready, With, WithIterator}, @@ -204,27 +203,19 @@ impl Join { /// Replace the duplicates fields specified in `columns` by their coalesce expression /// It mimics the behavior of USING in SQL - /// If preserve_input_names is True, the fields of the resulting relation - /// will be named as in the JOINs inputs if they are not ambiguous. pub fn remove_duplicates_and_coalesce( self, - vec: Vec, + duplicates: Vec, columns: &Hierarchy, - preserve_input_names: bool, ) -> Relation { - - let fields_in_vec = self + + let coalesced_fields = self .field_inputs() - .filter_map(|(name, id)| { + .filter_map(|(_, id)| { let col = id.as_ref().last().unwrap(); - if id.as_ref().first().unwrap().as_str() == LEFT_INPUT_NAME && vec.contains(col) { - let final_col_name = if preserve_input_names { - col.to_string() - } else { - name - }; + if id.as_ref().first().unwrap().as_str() == LEFT_INPUT_NAME && duplicates.contains(col) { Some(( - final_col_name, + col.clone(), Expr::coalesce( Expr::col(columns[[LEFT_INPUT_NAME, col]].as_ref().last().unwrap()), Expr::col(columns[[RIGHT_INPUT_NAME, col]].as_ref().last().unwrap()), @@ -234,25 +225,20 @@ impl Join { None } }).collect::>(); - - let fields_not_in_vec = self + + let remaining_fields = self .field_inputs() .filter_map(|(name, id)| { let col = id.as_ref().last().unwrap(); - let final_col_name = if preserve_input_names && columns.get(&[col.clone()]).is_some() { - col.to_string() - } else { - name.clone() - }; - (!vec.contains(col)).then_some((final_col_name, Expr::col(name))) + (!duplicates.contains(col)).then_some((name.clone(), Expr::col(name))) }) .collect::>(); - let fields = fields_in_vec + let fields = coalesced_fields .into_iter() - .chain(fields_not_in_vec.into_iter()) + .chain(remaining_fields.into_iter()) .collect::>(); - + Relation::map() .input(Relation::from(self)) .with_iter(fields) diff --git a/src/rewriting/mod.rs b/src/rewriting/mod.rs index 4b444816..b1d776f7 100644 --- a/src/rewriting/mod.rs +++ b/src/rewriting/mod.rs @@ -101,6 +101,7 @@ impl Relation { privacy_unit, dp_parameters, )); + println!("Check rewrite_with_differential_privacy"); let relation_with_rules = relation_with_rules.map_rewriting_rules(RewritingRulesEliminator); relation_with_rules .select_rewriting_rules(RewritingRulesSelector) @@ -185,35 +186,35 @@ mod tests { let dp_parameters = DpParameters::from_epsilon_delta(1., 1e-3); let queries = [ - "SELECT order_id, sum(price) FROM item_table GROUP BY order_id", - "SELECT order_id, sum(price), sum(distinct price) FROM item_table GROUP BY order_id HAVING count(*) > 2", - "SELECT order_id, sum(order_id) FROM item_table GROUP BY order_id", - "SELECT order_id As my_order, sum(price) FROM item_table GROUP BY my_order", - "SELECT order_id, MAX(order_id), sum(price) FROM item_table GROUP BY order_id", + // "SELECT order_id, sum(price) FROM item_table GROUP BY order_id", + // "SELECT order_id, sum(price), sum(distinct price) FROM item_table GROUP BY order_id HAVING count(*) > 2", + // "SELECT order_id, sum(order_id) FROM item_table GROUP BY order_id", + // "SELECT order_id As my_order, sum(price) FROM item_table GROUP BY my_order", + // "SELECT order_id, MAX(order_id), sum(price) FROM item_table GROUP BY order_id", "WITH my_avg AS (SELECT AVG(price) AS avg_price, STDDEV(price) AS std_price FROM item_table WHERE price > 1.) SELECT AVG((price - avg_price) / std_price) FROM item_table CROSS JOIN my_avg WHERE std_price > 1.", - "WITH my_avg AS (SELECT MIN(price) AS min_price, MAX(price) AS max_price FROM item_table WHERE price > 1.) SELECT AVG(price - min_price) FROM item_table CROSS JOIN my_avg", + //"WITH my_avg AS (SELECT MIN(price) AS min_price, MAX(price) AS max_price FROM item_table WHERE price > 1.) SELECT AVG(price - min_price) FROM item_table CROSS JOIN my_avg", ]; for q in queries { println!("=================================\n{q}"); let query = parse(q).unwrap(); let relation = Relation::try_from(query.with(&relations)).unwrap(); - relation.display_dot().unwrap(); + //relation.display_dot().unwrap(); let relation_with_dp_event = relation .rewrite_with_differential_privacy(&relations, synthetic_data.clone(), privacy_unit.clone(), dp_parameters.clone()) .unwrap(); - relation_with_dp_event - .relation() - .display_dot() - .unwrap(); - let dp_query = ast::Query::from(&relation_with_dp_event.relation().clone()).to_string(); - println!("\n{dp_query}"); - _ = database - .query(dp_query.as_str()) - .unwrap() - .iter() - .map(ToString::to_string) - .join("\n"); + // relation_with_dp_event + // .relation() + // .display_dot() + // .unwrap(); + // let dp_query = ast::Query::from(&relation_with_dp_event.relation().clone()).to_string(); + // println!("\n{dp_query}"); + // _ = database + // .query(dp_query.as_str()) + // .unwrap() + // .iter() + // .map(ToString::to_string) + // .join("\n"); } } diff --git a/src/sql/relation.rs b/src/sql/relation.rs index 69678670..4e2a9fce 100644 --- a/src/sql/relation.rs +++ b/src/sql/relation.rs @@ -247,50 +247,82 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, } } + /// Build a RelationWithColumns with a JOIN from RelationWithColumns and an + /// ast Join. Preserve input names for non ambigous columns + fn try_from_join( + &self, + left: RelationWithColumns, + ast_join: &'a ast::Join + ) -> Result { + let RelationWithColumns(left_relation, left_columns) = left; + let RelationWithColumns(right_relation, right_columns) = + self.try_from_table_factor(&ast_join.relation)?; + + let left_columns: Hierarchy = left_columns.map(|i| { + let mut v = vec![Join::left_name().to_string()]; + v.extend(i.to_vec()); + v.into() + }); + let right_columns = right_columns.map(|i| { + let mut v = vec![Join::right_name().to_string()]; + v.extend(i.to_vec()); + v.into() + }); + let all_columns = left_columns.with(right_columns); + + // We collect column mapping inputs should map to new names (hence the inversion) + let desired_join_col_names: Hierarchy = all_columns + .iter() + .map(|(f, i)| (i.clone(), f.clone().into())) + .collect(); + + // We want to preserve the names during the JOIN build except for those + // columns that are ambiguous + let ambiguous_cols = ambiguous_columns(&desired_join_col_names); + + let non_ambiguous_col_names: Hierarchy = desired_join_col_names + .iter() + .filter_map(|(k, v)| { + if !ambiguous_cols.contains(&Identifier::from(k.clone())) { + Some((k.clone(), v.clone().last().unwrap().to_string())) + } else { + None + } + }) + .collect(); + let operator = self.try_from_join_operator_with_columns( + &ast_join.join_operator, + &all_columns, + )?; + // We build a Join. Preserve non ambiguous col names where and rename + // the ambiguous ones. + let join: Join = Relation::join() + .operator(operator) + .left(left_relation) + .right(right_relation) + .names(non_ambiguous_col_names) + .build(); + Ok(RelationWithColumns::new(Arc::new(Relation::from(join)), all_columns)) + } + /// Convert a TableWithJoins into a RelationWithColumns fn try_from_table_with_joins( &self, table_with_joins: &'a ast::TableWithJoins, - preserve_input_names: bool, ) -> Result { // Process the relation // Then the JOIN if needed let result = table_with_joins.joins.iter().fold( self.try_from_table_factor(&table_with_joins.relation), |left, ast_join| { - let RelationWithColumns(left_relation, left_columns) = left?; - let RelationWithColumns(right_relation, right_columns) = - self.try_from_table_factor(&ast_join.relation)?; - let left_columns = left_columns.map(|i| { - let mut v = vec![Join::left_name().to_string()]; - v.extend(i.to_vec()); - v.into() - }); - let right_columns = right_columns.map(|i| { - let mut v = vec![Join::right_name().to_string()]; - v.extend(i.to_vec()); - v.into() - }); - let all_columns = left_columns.with(right_columns); - let operator = self.try_from_join_operator_with_columns( - &ast_join.join_operator, - &all_columns, - )?; - - // We build a Join - let join: Join = Relation::join() - .operator(operator) - .left(left_relation) - .right(right_relation) - .build(); - - // We collect column mapping inputs should map to new names (hence the inversion) - let join_columns: Hierarchy =join + let RelationWithColumns(join_relation, columns) = self.try_from_join(left?, &ast_join)?; + let join = Join::try_from(join_relation.deref().clone())?; + + let join_columns: Hierarchy = join .field_inputs() .map(|(f, i)| (i, f.into())) .collect(); - let composed_columns = all_columns.and_then(join_columns.clone()); - + println!("CHECK!!!!"); // If the join constraint is of type "USING" or "NATURAL", add a map to coalesce the duplicate columns let relation = match &ast_join.join_operator { ast::JoinOperator::Inner(ast::JoinConstraint::Using(v)) @@ -299,8 +331,7 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, | ast::JoinOperator::FullOuter(ast::JoinConstraint::Using(v)) => { join.remove_duplicates_and_coalesce( v.into_iter().map(|id| id.value.to_string()).collect(), - &join_columns, - preserve_input_names + &join_columns ) }, ast::JoinOperator::Inner(ast::JoinConstraint::Natural) @@ -311,36 +342,18 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, .into_iter() .filter_map(|f| join.right().schema().field(f.name()).is_ok().then_some(f.name().to_string())) .collect(); - join.remove_duplicates_and_coalesce(v, &join_columns, preserve_input_names) + join.remove_duplicates_and_coalesce(v, &join_columns) }, ast::JoinOperator::LeftSemi(_) => todo!(), ast::JoinOperator::RightSemi(_) => todo!(), ast::JoinOperator::LeftAnti(_) => todo!(), ast::JoinOperator::RightAnti(_) => todo!(), - _ => join.remove_duplicates_and_coalesce(vec![], &join_columns, preserve_input_names), - }; - - - let composed_columns = if preserve_input_names { - // relation fields are renamed to those of the join relation inputs - // if no name collision is generated. - let original_not_ambiguous_columns: Hierarchy = join_columns - .iter() - .map(|(key, value)| { - let original_col_name = key.last().unwrap().as_str(); - let join_col_name = value.head().unwrap(); - match join_columns.get(&[original_col_name.to_string()]) { - Some(_) => (Identifier::from(join_col_name), original_col_name.into()), - None => (Identifier::from(join_col_name), join_col_name.into()), - } - }) - .collect(); - composed_columns.and_then(original_not_ambiguous_columns) - } else { - composed_columns + _ => Relation::from(join), }; - - Ok(RelationWithColumns::new(Arc::new(relation), composed_columns)) + let columns = columns.and_then(join_columns); + println!("columns: \n{}", columns); + //relation.display_dot().unwrap(); + Ok(RelationWithColumns::new(Arc::new(relation), columns)) }, ); result @@ -350,14 +363,12 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, fn try_from_tables_with_joins( &self, tables_with_joins: &'a Vec, - preserve_input_names: bool ) -> Result { // TODO consider more tables // For now, only consider the first element // It should eventually be cross joined as described in: https://www.postgresql.org/docs/current/queries-table-expressions.html self.try_from_table_with_joins( - &tables_with_joins[0], - preserve_input_names + &tables_with_joins[0] ) } @@ -376,6 +387,7 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, let mut named_exprs: Vec<(String, Expr)> = vec![]; // Columns from names let columns = &names.map(|s| s.clone().into()); + println!("cols in select items{}", columns); for select_item in select_items { match select_item { ast::SelectItem::UnnamedExpr(expr) => named_exprs.push(( @@ -476,6 +488,8 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, .as_ref() .map(|e| e.with(columns).try_into()) .map_or(Ok(None), |r| r.map(Some))?; + + println!("Check before building the relation!!!"); // Build a Relation let mut relation: Relation = match split { Split::Map(map) => { @@ -491,6 +505,7 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, builder.input(from).build() } }; + println!("Check after building the relation!!!"); if let Some(h) = having { relation = Relation::map() .with_iter( @@ -556,20 +571,8 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, return Err(Error::other("QUALIFY is not supported")); } - // If projection contains a Wildcard (SELECT *) the table with joins should - // preserver columns names. let RelationWithColumns(from, columns) = self.try_from_tables_with_joins( - from, - projection.contains( - &ast::SelectItem::Wildcard( - ast::WildcardAdditionalOptions { - opt_exclude: None, - opt_except: None, - opt_rename: None, - opt_replace: None - } - ) - ) + from )?; let relation = self.try_from_select_items_selection_and_group_by( &columns.filter_map(|i| Some(i.split_last().ok()?.0)), @@ -580,6 +583,8 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, having, distinct )?; + // println!("print dot."); + // relation.display_dot().unwrap(); Ok(RelationWithColumns::new(relation, columns)) } @@ -618,6 +623,8 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, // Build a relation with ORDER BY and LIMIT if needed let relation_builder = Relation::map(); // We add all the columns + // println!("print dot. check try_from_query"); + // relation.display_dot().unwrap(); let relation_builder = relation .schema() .iter() @@ -764,7 +771,21 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> TryFrom<(QueryWithRelation } } - +/// It returns a vector of identifiers of ambiguous columns in a hierarchy of columns +/// It uses the properties of the Hierarchy: For ambiguous columns it checks +/// that Hierarchy::get returns None if using only the suffix of the path +pub fn ambiguous_columns(columns: &Hierarchy) -> Vec { + columns + .iter() + .filter_map(|(key, _)| { + if let Some(v) = columns.get(&[key.last().unwrap().as_str().to_string()]) { + None + } else { + Some(key.clone().into()) + } + }) + .collect() +} /// A simple SQL query parser with dialect pub fn parse_with_dialect(query: &str, dialect: D) -> Result { @@ -1474,7 +1495,45 @@ mod tests { .map(ToString::to_string); let query_str = r#" - WITH my_tab AS (SELECT * FROM user_table u JOIN order_table o ON (u.id=o.user_id)) + WITH my_tab AS (SELECT id, age FROM user_table u JOIN order_table o USING (id)) + SELECT * FROM my_tab WHERE id > 50; + "#; + let query = parse(query_str).unwrap(); + let relation = Relation::try_from(QueryWithRelations::new( + &query, + &relations + )) + .unwrap(); + relation.display_dot().unwrap(); + let query: &str = &ast::Query::from(&relation).to_string(); + println!("{query}"); + _ = database + .query(query) + .unwrap() + .iter() + .map(ToString::to_string); + + // let query_str = r#" + // WITH my_tab AS (SELECT u.id AS uid, id, age FROM user_table u JOIN order_table o USING (id)) + // SELECT * FROM my_tab WHERE id > 50; + // "#; + // let query = parse(query_str).unwrap(); + // let relation = Relation::try_from(QueryWithRelations::new( + // &query, + // &relations + // )) + // .unwrap(); + // relation.display_dot().unwrap(); + // let query: &str = &ast::Query::from(&relation).to_string(); + // println!("{query}"); + // _ = database + // .query(query) + // .unwrap() + // .iter() + // .map(ToString::to_string); + + let query_str = r#" + WITH my_tab AS (SELECT * FROM user_table u JOIN order_table o ON (u.id=o.id)) SELECT * FROM my_tab WHERE user_id > 50; "#; let query = parse(query_str).unwrap(); @@ -1494,6 +1553,26 @@ mod tests { .unwrap() .iter() .map(ToString::to_string); + + let query_str = r#" + WITH my_tab AS (SELECT u.id, user_id, age FROM user_table u JOIN order_table o ON (u.id=o.id)) + SELECT * FROM my_tab WHERE user_id > 50; + "#; + let query = parse(query_str).unwrap(); + let relation = Relation::try_from(QueryWithRelations::new( + &query, + &relations + )) + .unwrap(); + relation.display_dot().unwrap(); + println!("relation = {relation}"); + let query: &str = &ast::Query::from(&relation).to_string(); + println!("{query}"); + _ = database + .query(query) + .unwrap() + .iter() + .map(ToString::to_string); } #[test] From fb645951bd52adc54ab2788087e50624c5758411 Mon Sep 17 00:00:00 2001 From: Andi Cuko Date: Mon, 4 Mar 2024 15:58:11 +0100 Subject: [PATCH 07/18] tests ok --- src/dialect_translation/postgresql.rs | 2 +- src/rewriting/mod.rs | 38 ++++++++-------- src/rewriting/rewriting_rule.rs | 1 + src/sql/relation.rs | 64 ++++++--------------------- 4 files changed, 34 insertions(+), 71 deletions(-) diff --git a/src/dialect_translation/postgresql.rs b/src/dialect_translation/postgresql.rs index df8a74ab..afed1b32 100644 --- a/src/dialect_translation/postgresql.rs +++ b/src/dialect_translation/postgresql.rs @@ -183,7 +183,7 @@ mod tests { SELECT "Id" AS "Id", "normal_col" AS "normal_col", "Na.Me" AS "Na.Me" FROM "map_mou5" ORDER BY "Id" ASC ) SELECT * FROM "map_0swv" "#; - assert_same_query_str(&retranslated.to_string(), translated); + // assert_same_query_str(&retranslated.to_string(), translated); Ok(()) } } diff --git a/src/rewriting/mod.rs b/src/rewriting/mod.rs index b1d776f7..192c4624 100644 --- a/src/rewriting/mod.rs +++ b/src/rewriting/mod.rs @@ -101,7 +101,6 @@ impl Relation { privacy_unit, dp_parameters, )); - println!("Check rewrite_with_differential_privacy"); let relation_with_rules = relation_with_rules.map_rewriting_rules(RewritingRulesEliminator); relation_with_rules .select_rewriting_rules(RewritingRulesSelector) @@ -186,35 +185,34 @@ mod tests { let dp_parameters = DpParameters::from_epsilon_delta(1., 1e-3); let queries = [ - // "SELECT order_id, sum(price) FROM item_table GROUP BY order_id", - // "SELECT order_id, sum(price), sum(distinct price) FROM item_table GROUP BY order_id HAVING count(*) > 2", - // "SELECT order_id, sum(order_id) FROM item_table GROUP BY order_id", - // "SELECT order_id As my_order, sum(price) FROM item_table GROUP BY my_order", - // "SELECT order_id, MAX(order_id), sum(price) FROM item_table GROUP BY order_id", + "SELECT order_id, sum(price) FROM item_table GROUP BY order_id", + "SELECT order_id, sum(price), sum(distinct price) FROM item_table GROUP BY order_id HAVING count(*) > 2", + "SELECT order_id, sum(order_id) FROM item_table GROUP BY order_id", + "SELECT order_id As my_order, sum(price) FROM item_table GROUP BY my_order", + "SELECT order_id, MAX(order_id), sum(price) FROM item_table GROUP BY order_id", "WITH my_avg AS (SELECT AVG(price) AS avg_price, STDDEV(price) AS std_price FROM item_table WHERE price > 1.) SELECT AVG((price - avg_price) / std_price) FROM item_table CROSS JOIN my_avg WHERE std_price > 1.", - //"WITH my_avg AS (SELECT MIN(price) AS min_price, MAX(price) AS max_price FROM item_table WHERE price > 1.) SELECT AVG(price - min_price) FROM item_table CROSS JOIN my_avg", + "WITH my_avg AS (SELECT MIN(price) AS min_price, MAX(price) AS max_price FROM item_table WHERE price > 1.) SELECT AVG(price - min_price) FROM item_table CROSS JOIN my_avg", ]; for q in queries { println!("=================================\n{q}"); let query = parse(q).unwrap(); let relation = Relation::try_from(query.with(&relations)).unwrap(); - //relation.display_dot().unwrap(); let relation_with_dp_event = relation .rewrite_with_differential_privacy(&relations, synthetic_data.clone(), privacy_unit.clone(), dp_parameters.clone()) .unwrap(); - // relation_with_dp_event - // .relation() - // .display_dot() - // .unwrap(); - // let dp_query = ast::Query::from(&relation_with_dp_event.relation().clone()).to_string(); - // println!("\n{dp_query}"); - // _ = database - // .query(dp_query.as_str()) - // .unwrap() - // .iter() - // .map(ToString::to_string) - // .join("\n"); + relation_with_dp_event + .relation() + .display_dot() + .unwrap(); + let dp_query = ast::Query::from(&relation_with_dp_event.relation().clone()).to_string(); + println!("\n{dp_query}"); + _ = database + .query(dp_query.as_str()) + .unwrap() + .iter() + .map(ToString::to_string) + .join("\n"); } } diff --git a/src/rewriting/rewriting_rule.rs b/src/rewriting/rewriting_rule.rs index 06d71e03..b5abdcee 100644 --- a/src/rewriting/rewriting_rule.rs +++ b/src/rewriting/rewriting_rule.rs @@ -1277,6 +1277,7 @@ impl<'a> RewriteVisitor<'a> for Rewriter<'a> { } _ => Relation::join() .with(join.clone()) + .names(join.names()) // .left_names(names[0..rewritten_left]) .left(relation_left) .right(relation_right) diff --git a/src/sql/relation.rs b/src/sql/relation.rs index 4e2a9fce..e0d09a8c 100644 --- a/src/sql/relation.rs +++ b/src/sql/relation.rs @@ -257,7 +257,6 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, let RelationWithColumns(left_relation, left_columns) = left; let RelationWithColumns(right_relation, right_columns) = self.try_from_table_factor(&ast_join.relation)?; - let left_columns: Hierarchy = left_columns.map(|i| { let mut v = vec![Join::left_name().to_string()]; v.extend(i.to_vec()); @@ -268,41 +267,35 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, v.extend(i.to_vec()); v.into() }); - let all_columns = left_columns.with(right_columns); - - // We collect column mapping inputs should map to new names (hence the inversion) - let desired_join_col_names: Hierarchy = all_columns - .iter() - .map(|(f, i)| (i.clone(), f.clone().into())) - .collect(); - - // We want to preserve the names during the JOIN build except for those - // columns that are ambiguous - let ambiguous_cols = ambiguous_columns(&desired_join_col_names); - - let non_ambiguous_col_names: Hierarchy = desired_join_col_names + + // fully qualified input names -> fully qualified JOIN names + let all_fully_qualified_columns: Hierarchy = left_columns.with(right_columns); + let ambiguous_cols= ambiguous_columns(&all_fully_qualified_columns); + // fully qualified JOIN names -> non_ambiguous col names + let non_ambiguous_join_col_names: Hierarchy = all_fully_qualified_columns .iter() .filter_map(|(k, v)| { - if !ambiguous_cols.contains(&Identifier::from(k.clone())) { - Some((k.clone(), v.clone().last().unwrap().to_string())) - } else { + if ambiguous_cols.contains(&Identifier::from(k.clone()) ) { None + } else { + Some((v.clone(), k.clone().last().unwrap().to_string())) } }) .collect(); + let operator = self.try_from_join_operator_with_columns( &ast_join.join_operator, - &all_columns, + &all_fully_qualified_columns, )?; - // We build a Join. Preserve non ambiguous col names where and rename + // We build a Join. Preserve non ambiguous col names where non ambiguous and rename // the ambiguous ones. let join: Join = Relation::join() .operator(operator) .left(left_relation) .right(right_relation) - .names(non_ambiguous_col_names) + .names(non_ambiguous_join_col_names) .build(); - Ok(RelationWithColumns::new(Arc::new(Relation::from(join)), all_columns)) + Ok(RelationWithColumns::new(Arc::new(Relation::from(join)), all_fully_qualified_columns)) } /// Convert a TableWithJoins into a RelationWithColumns @@ -322,7 +315,6 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, .field_inputs() .map(|(f, i)| (i, f.into())) .collect(); - println!("CHECK!!!!"); // If the join constraint is of type "USING" or "NATURAL", add a map to coalesce the duplicate columns let relation = match &ast_join.join_operator { ast::JoinOperator::Inner(ast::JoinConstraint::Using(v)) @@ -351,8 +343,6 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, _ => Relation::from(join), }; let columns = columns.and_then(join_columns); - println!("columns: \n{}", columns); - //relation.display_dot().unwrap(); Ok(RelationWithColumns::new(Arc::new(relation), columns)) }, ); @@ -387,7 +377,6 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, let mut named_exprs: Vec<(String, Expr)> = vec![]; // Columns from names let columns = &names.map(|s| s.clone().into()); - println!("cols in select items{}", columns); for select_item in select_items { match select_item { ast::SelectItem::UnnamedExpr(expr) => named_exprs.push(( @@ -489,7 +478,6 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, .map(|e| e.with(columns).try_into()) .map_or(Ok(None), |r| r.map(Some))?; - println!("Check before building the relation!!!"); // Build a Relation let mut relation: Relation = match split { Split::Map(map) => { @@ -505,7 +493,6 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, builder.input(from).build() } }; - println!("Check after building the relation!!!"); if let Some(h) = having { relation = Relation::map() .with_iter( @@ -583,8 +570,6 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, having, distinct )?; - // println!("print dot."); - // relation.display_dot().unwrap(); Ok(RelationWithColumns::new(relation, columns)) } @@ -623,8 +608,6 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, // Build a relation with ORDER BY and LIMIT if needed let relation_builder = Relation::map(); // We add all the columns - // println!("print dot. check try_from_query"); - // relation.display_dot().unwrap(); let relation_builder = relation .schema() .iter() @@ -1513,25 +1496,6 @@ mod tests { .iter() .map(ToString::to_string); - // let query_str = r#" - // WITH my_tab AS (SELECT u.id AS uid, id, age FROM user_table u JOIN order_table o USING (id)) - // SELECT * FROM my_tab WHERE id > 50; - // "#; - // let query = parse(query_str).unwrap(); - // let relation = Relation::try_from(QueryWithRelations::new( - // &query, - // &relations - // )) - // .unwrap(); - // relation.display_dot().unwrap(); - // let query: &str = &ast::Query::from(&relation).to_string(); - // println!("{query}"); - // _ = database - // .query(query) - // .unwrap() - // .iter() - // .map(ToString::to_string); - let query_str = r#" WITH my_tab AS (SELECT * FROM user_table u JOIN order_table o ON (u.id=o.id)) SELECT * FROM my_tab WHERE user_id > 50; From 7ba2477a8085087732d62d44addaa89b2c74cb9e Mon Sep 17 00:00:00 2001 From: Andi Cuko Date: Mon, 4 Mar 2024 20:46:08 +0100 Subject: [PATCH 08/18] ok tests --- src/dialect_translation/postgresql.rs | 40 +++++--------------- src/hierarchy.rs | 54 +++++++++++++++++++++++++++ src/relation/rewriting.rs | 4 +- src/rewriting/mod.rs | 1 + src/sql/relation.rs | 20 +--------- 5 files changed, 69 insertions(+), 50 deletions(-) diff --git a/src/dialect_translation/postgresql.rs b/src/dialect_translation/postgresql.rs index afed1b32..1fe094fe 100644 --- a/src/dialect_translation/postgresql.rs +++ b/src/dialect_translation/postgresql.rs @@ -99,13 +99,7 @@ mod tests { use super::*; use crate::{ - builder::{Ready, With}, - data_type::{DataType, Value as _}, - display::Dot, - expr::Expr, - namer, - relation::{schema::Schema, Relation, TableBuilder}, - sql::{parse, relation::QueryWithRelations}, + builder::{Ready, With}, data_type::{DataType, Value as _}, display::Dot, expr::Expr, io::{postgresql, Database as _}, namer, relation::{schema::Schema, Relation, TableBuilder}, sql::{parse, relation::QueryWithRelations} }; use std::sync::Arc; @@ -154,19 +148,8 @@ mod tests { #[test] fn test_table_special() -> Result<()> { - let table: Relation = TableBuilder::new() - .path(["MY SPECIAL TABLE"]) - .name("my_table") - .size(100) - .schema( - Schema::empty() - .with(("Id", DataType::integer_interval(0, 1000))) - .with(("Na.Me", DataType::text())) - .with(("inc&ome", DataType::float_interval(100.0, 200000.0))) - .with(("normal_col", DataType::text())), - ) - .build(); - let relations = Hierarchy::from([(["schema", "MY SPECIAL TABLE"], Arc::new(table))]); + let mut database = postgresql::test_database(); + let relations = database.relations(); let query_str = r#"SELECT "Id", NORMAL_COL, "Na.Me" FROM "MY SPECIAL TABLE" ORDER BY "Id" "#; let translator = PostgreSqlTranslator; let query = parse_with_dialect(query_str, translator.dialect())?; @@ -174,16 +157,13 @@ mod tests { let relation = Relation::try_from((query_with_relation, translator))?; println!("\n {} \n", relation); let rel_with_traslator = RelationWithTranslator(&relation, translator); - let retranslated = ast::Query::from(rel_with_traslator); - print!("{}", retranslated); - let translated = r#" - WITH "map_mou5" ("Id","normal_col","Na.Me") AS ( - SELECT "Id" AS "Id", "normal_col" AS "normal_col", "Na.Me" AS "Na.Me" FROM "MY SPECIAL TABLE" - ), "map_0swv"("Id","normal_col","Na.Me") AS ( - SELECT "Id" AS "Id", "normal_col" AS "normal_col", "Na.Me" AS "Na.Me" FROM "map_mou5" ORDER BY "Id" ASC - ) SELECT * FROM "map_0swv" - "#; - // assert_same_query_str(&retranslated.to_string(), translated); + let translated = ast::Query::from(rel_with_traslator); + print!("{}", translated); + _ = database + .query(translated.to_string().as_str()) + .unwrap() + .iter() + .map(ToString::to_string); Ok(()) } } diff --git a/src/hierarchy.rs b/src/hierarchy.rs index 03a36056..520cf501 100644 --- a/src/hierarchy.rs +++ b/src/hierarchy.rs @@ -211,6 +211,25 @@ impl Hierarchy { .filter_map(|(p, o)| Some((p.clone(), f(o)?))) .collect() } + + /// It checks whether the path without the head is ambiguous or not. + /// It returns the full paths if the suffix is ambiguous. + pub fn ambiguous_subpaths(&self) -> Vec> { + self.iter() + .filter_map(|(qualified_key, _)| { + let headless_path = if qualified_key.len() > 1 { + &qualified_key[1..] + } else { + &qualified_key[..] + }; + if let Some(_) = self.get(&headless_path) { + None + } else { + Some(qualified_key.clone()) + } + }) + .collect() + } } impl Hierarchy

{ @@ -468,4 +487,39 @@ mod tests { )) ); } + + #[test] + fn test_ambiguous_paths() { + let values = Hierarchy::from([ + (vec!["a", "b", "c"], 1), + (vec!["a", "b", "d"], 2), + (vec!["a", "c"], 3), + (vec!["a", "e"], 4), + (vec!["a", "e", "f"], 5), + (vec!["b", "c"], 6), + ]); + let ambiguous = values.ambiguous_subpaths(); + assert_eq!(ambiguous, vec![ + vec!["a", "c"], + vec!["b", "c"], + ]); + + let values = Hierarchy::from([ + (vec!["a", "b", "d"], 2), + (vec!["a", "b"], 4), + (vec!["a", "e", "f"], 5), + ]); + let ambiguous: Vec> = values.ambiguous_subpaths(); + let empty: Vec> = vec![]; + assert_eq!(ambiguous, empty); + + let values = Hierarchy::from([ + (vec![], 2), + (vec!["b"], 4), + (vec!["c"], 5), + ]); + let ambiguous: Vec> = values.ambiguous_subpaths(); + let empty: Vec> = vec![]; + assert_eq!(ambiguous, empty); + } } diff --git a/src/relation/rewriting.rs b/src/relation/rewriting.rs index 16a95d76..416f071a 100644 --- a/src/relation/rewriting.rs +++ b/src/relation/rewriting.rs @@ -234,14 +234,14 @@ impl Join { }) .collect::>(); - let fields = coalesced_fields + let all_fields = coalesced_fields .into_iter() .chain(remaining_fields.into_iter()) .collect::>(); Relation::map() .input(Relation::from(self)) - .with_iter(fields) + .with_iter(all_fields) .build() } } diff --git a/src/rewriting/mod.rs b/src/rewriting/mod.rs index 192c4624..4b444816 100644 --- a/src/rewriting/mod.rs +++ b/src/rewriting/mod.rs @@ -198,6 +198,7 @@ mod tests { println!("=================================\n{q}"); let query = parse(q).unwrap(); let relation = Relation::try_from(query.with(&relations)).unwrap(); + relation.display_dot().unwrap(); let relation_with_dp_event = relation .rewrite_with_differential_privacy(&relations, synthetic_data.clone(), privacy_unit.clone(), dp_parameters.clone()) .unwrap(); diff --git a/src/sql/relation.rs b/src/sql/relation.rs index e0d09a8c..2f85db23 100644 --- a/src/sql/relation.rs +++ b/src/sql/relation.rs @@ -270,12 +270,12 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, // fully qualified input names -> fully qualified JOIN names let all_fully_qualified_columns: Hierarchy = left_columns.with(right_columns); - let ambiguous_cols= ambiguous_columns(&all_fully_qualified_columns); + let ambiguous_cols= all_fully_qualified_columns.ambiguous_subpaths(); // fully qualified JOIN names -> non_ambiguous col names let non_ambiguous_join_col_names: Hierarchy = all_fully_qualified_columns .iter() .filter_map(|(k, v)| { - if ambiguous_cols.contains(&Identifier::from(k.clone()) ) { + if ambiguous_cols.contains(k) { None } else { Some((v.clone(), k.clone().last().unwrap().to_string())) @@ -754,22 +754,6 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> TryFrom<(QueryWithRelation } } -/// It returns a vector of identifiers of ambiguous columns in a hierarchy of columns -/// It uses the properties of the Hierarchy: For ambiguous columns it checks -/// that Hierarchy::get returns None if using only the suffix of the path -pub fn ambiguous_columns(columns: &Hierarchy) -> Vec { - columns - .iter() - .filter_map(|(key, _)| { - if let Some(v) = columns.get(&[key.last().unwrap().as_str().to_string()]) { - None - } else { - Some(key.clone().into()) - } - }) - .collect() -} - /// A simple SQL query parser with dialect pub fn parse_with_dialect(query: &str, dialect: D) -> Result { let mut tokenizer = Tokenizer::new(&dialect, query); From 1dc5e5427818e3177ffec622daac765f7f948e3d Mon Sep 17 00:00:00 2001 From: Andi Cuko Date: Tue, 5 Mar 2024 09:56:13 +0100 Subject: [PATCH 09/18] ok --- src/hierarchy.rs | 27 ++++++++------------------- src/sql/relation.rs | 2 +- 2 files changed, 9 insertions(+), 20 deletions(-) diff --git a/src/hierarchy.rs b/src/hierarchy.rs index 520cf501..eaa66ac8 100644 --- a/src/hierarchy.rs +++ b/src/hierarchy.rs @@ -212,16 +212,13 @@ impl Hierarchy { .collect() } - /// It checks whether the path without the head is ambiguous or not. - /// It returns the full paths if the suffix is ambiguous. - pub fn ambiguous_subpaths(&self) -> Vec> { + /// It checks whether the tail in the hierarchy's paths is ambiguous or not. + /// It returns the full paths which tail is is ambiguous. + /// It assumes that no empty paths are present in the hierarchy. + pub fn ambiguous_tail_paths(&self) -> Vec> { self.iter() .filter_map(|(qualified_key, _)| { - let headless_path = if qualified_key.len() > 1 { - &qualified_key[1..] - } else { - &qualified_key[..] - }; + let headless_path = [qualified_key.last().unwrap().clone()]; if let Some(_) = self.get(&headless_path) { None } else { @@ -498,8 +495,9 @@ mod tests { (vec!["a", "e", "f"], 5), (vec!["b", "c"], 6), ]); - let ambiguous = values.ambiguous_subpaths(); + let ambiguous = values.ambiguous_tail_paths(); assert_eq!(ambiguous, vec![ + vec!["a", "b", "c"], vec!["a", "c"], vec!["b", "c"], ]); @@ -509,16 +507,7 @@ mod tests { (vec!["a", "b"], 4), (vec!["a", "e", "f"], 5), ]); - let ambiguous: Vec> = values.ambiguous_subpaths(); - let empty: Vec> = vec![]; - assert_eq!(ambiguous, empty); - - let values = Hierarchy::from([ - (vec![], 2), - (vec!["b"], 4), - (vec!["c"], 5), - ]); - let ambiguous: Vec> = values.ambiguous_subpaths(); + let ambiguous: Vec> = values.ambiguous_tail_paths(); let empty: Vec> = vec![]; assert_eq!(ambiguous, empty); } diff --git a/src/sql/relation.rs b/src/sql/relation.rs index 2f85db23..b85cde7f 100644 --- a/src/sql/relation.rs +++ b/src/sql/relation.rs @@ -270,7 +270,7 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, // fully qualified input names -> fully qualified JOIN names let all_fully_qualified_columns: Hierarchy = left_columns.with(right_columns); - let ambiguous_cols= all_fully_qualified_columns.ambiguous_subpaths(); + let ambiguous_cols= all_fully_qualified_columns.ambiguous_tail_paths(); // fully qualified JOIN names -> non_ambiguous col names let non_ambiguous_join_col_names: Hierarchy = all_fully_qualified_columns .iter() From 399eacc30cad1cdf16e9e43d2d8bde2d44616780 Mon Sep 17 00:00:00 2001 From: Andi Cuko Date: Wed, 13 Mar 2024 16:53:27 +0100 Subject: [PATCH 10/18] wip --- src/hierarchy.rs | 2 +- src/relation/field.rs | 15 --- src/relation/rewriting.rs | 41 ++++---- src/rewriting/rewriting_rule.rs | 1 - src/sql/mod.rs | 11 +++ src/sql/relation.rs | 167 +++++++++++++++++--------------- 6 files changed, 118 insertions(+), 119 deletions(-) diff --git a/src/hierarchy.rs b/src/hierarchy.rs index eaa66ac8..ba164f39 100644 --- a/src/hierarchy.rs +++ b/src/hierarchy.rs @@ -486,7 +486,7 @@ mod tests { } #[test] - fn test_ambiguous_paths() { + fn test_ambiguous_tail_paths() { let values = Hierarchy::from([ (vec!["a", "b", "c"], 1), (vec!["a", "b", "d"], 2), diff --git a/src/relation/field.rs b/src/relation/field.rs index 6562f665..34392ded 100644 --- a/src/relation/field.rs +++ b/src/relation/field.rs @@ -22,19 +22,12 @@ impl fmt::Display for Constraint { } } -#[derive(Clone, Debug, Hash, PartialEq, Eq)] -pub struct Properties { - pub underlying_type: String -} - /// A Field as in https://github.com/apache/arrow-datafusion/blob/5b23180cf75ea7155d7c35a40f224ce4d5ad7fb8/datafusion/src/logical_plan/dfschema.rs#L413 #[derive(Clone, Debug, Hash, PartialEq, Eq)] pub struct Field { name: String, data_type: DataType, constraint: Option, - // contains the type used in python - properties: Option, } impl Field { @@ -44,14 +37,6 @@ impl Field { name, data_type, constraint, - properties: None, - } - } - - pub fn with_properties(self, properties: Properties) -> Field { - Field { - properties: Some(properties), - ..self } } diff --git a/src/relation/rewriting.rs b/src/relation/rewriting.rs index 416f071a..a132ecb3 100644 --- a/src/relation/rewriting.rs +++ b/src/relation/rewriting.rs @@ -1,5 +1,6 @@ //! A few transforms for relations //! + use super::{Join, Map, Reduce, Relation, Set, Table, Values, Variant as _}; use crate::{ builder::{Ready, With, WithIterator}, @@ -202,20 +203,21 @@ impl Join { } /// Replace the duplicates fields specified in `columns` by their coalesce expression - /// It mimics the behavior of USING in SQL + /// Its mimicks teh behaviour of USING in SQL pub fn remove_duplicates_and_coalesce( self, - duplicates: Vec, + vec: Vec, columns: &Hierarchy, - ) -> Relation { - - let coalesced_fields = self + ) -> (Relation, Vec<(Identifier, Identifier)>) { + let mut coalesced_cols: Vec<(Identifier, Identifier)> = vec![]; + let fields = self .field_inputs() - .filter_map(|(_, id)| { + .filter_map(|(name, id)| { let col = id.as_ref().last().unwrap(); - if id.as_ref().first().unwrap().as_str() == LEFT_INPUT_NAME && duplicates.contains(col) { + if id.as_ref().first().unwrap().as_str() == LEFT_INPUT_NAME && vec.contains(col) { + coalesced_cols.push((col[..].into(), name.as_str().into())); Some(( - col.clone(), + name, Expr::coalesce( Expr::col(columns[[LEFT_INPUT_NAME, col]].as_ref().last().unwrap()), Expr::col(columns[[RIGHT_INPUT_NAME, col]].as_ref().last().unwrap()), @@ -224,25 +226,16 @@ impl Join { } else { None } - }).collect::>(); - - let remaining_fields = self - .field_inputs() - .filter_map(|(name, id)| { - let col = id.as_ref().last().unwrap(); - (!duplicates.contains(col)).then_some((name.clone(), Expr::col(name))) }) + .chain(self.field_inputs().filter_map(|(name, id)| { + let col = id.as_ref().last().unwrap(); + (!vec.contains(col)).then_some((name.clone(), Expr::col(name))) + })) .collect::>(); - - let all_fields = coalesced_fields - .into_iter() - .chain(remaining_fields.into_iter()) - .collect::>(); - - Relation::map() + (Relation::map() .input(Relation::from(self)) - .with_iter(all_fields) - .build() + .with_iter(fields) + .build(), coalesced_cols) } } diff --git a/src/rewriting/rewriting_rule.rs b/src/rewriting/rewriting_rule.rs index b5abdcee..06d71e03 100644 --- a/src/rewriting/rewriting_rule.rs +++ b/src/rewriting/rewriting_rule.rs @@ -1277,7 +1277,6 @@ impl<'a> RewriteVisitor<'a> for Rewriter<'a> { } _ => Relation::join() .with(join.clone()) - .names(join.names()) // .left_names(names[0..rewritten_left]) .left(relation_left) .right(relation_right) diff --git a/src/sql/mod.rs b/src/sql/mod.rs index e68dff94..220812d8 100644 --- a/src/sql/mod.rs +++ b/src/sql/mod.rs @@ -118,6 +118,17 @@ mod tests { use itertools::Itertools; use sqlparser::dialect::BigQueryDialect; + #[test] + fn test_display_test() { + let database = postgresql::test_database(); + let relations = database.relations(); + let query = r#" + WITH tab AS (SELECT x FROM table_2 AS t1 JOIN table_2 AS t2 USING(x) JOIN table_2 AS t3 USING(x)) + SELECT * from tab"#; + let relation = Relation::try_from(parse(query).unwrap().with(&relations)).unwrap(); + relation.display_dot().unwrap(); + } + #[test] fn test_display() { let database = postgresql::test_database(); diff --git a/src/sql/relation.rs b/src/sql/relation.rs index b85cde7f..2ab1c255 100644 --- a/src/sql/relation.rs +++ b/src/sql/relation.rs @@ -8,23 +8,13 @@ use super::{ Error, Result, }; use crate::{ - ast, - builder::{Ready, With, WithIterator, WithoutContext}, - dialect::{Dialect, GenericDialect}, - expr::{Expr, Identifier, Split, Reduce}, - hierarchy::{Hierarchy, Path}, - namer::{self, FIELD}, - parser::Parser, - relation::{ + ast, builder::{Ready, With, WithIterator, WithoutContext}, data_type::injection::Composed, dialect::{Dialect, GenericDialect}, dialect_translation::{postgresql::PostgreSqlTranslator, QueryToRelationTranslator}, display::Dot, expr::{Expr, Identifier, Reduce, Split}, hierarchy::{Hierarchy, Path}, namer::{self, FIELD}, parser::Parser, relation::{ Join, JoinOperator, MapBuilder, Relation, SetOperator, SetQuantifier, Variant as _, WithInput, LEFT_INPUT_NAME, RIGHT_INPUT_NAME - }, - tokenizer::Tokenizer, - visitor::{Acceptor, Dependencies, Visited}, - dialect_translation::{QueryToRelationTranslator, postgresql::PostgreSqlTranslator}, - types::And, display::Dot + }, tokenizer::Tokenizer, types::And, visitor::{Acceptor, Dependencies, Visited} }; +use dot::Id; use itertools::Itertools; use std::{ convert::TryFrom, @@ -247,8 +237,7 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, } } - /// Build a RelationWithColumns with a JOIN from RelationWithColumns and an - /// ast Join. Preserve input names for non ambigous columns + /// Build a RelationWithColumns with a JOIN fn try_from_join( &self, left: RelationWithColumns, @@ -267,35 +256,66 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, v.extend(i.to_vec()); v.into() }); - + println!("L COLS {}", left_columns); + println!("R COLS {}", right_columns); // fully qualified input names -> fully qualified JOIN names - let all_fully_qualified_columns: Hierarchy = left_columns.with(right_columns); - let ambiguous_cols= all_fully_qualified_columns.ambiguous_tail_paths(); - // fully qualified JOIN names -> non_ambiguous col names - let non_ambiguous_join_col_names: Hierarchy = all_fully_qualified_columns - .iter() - .filter_map(|(k, v)| { - if ambiguous_cols.contains(k) { - None - } else { - Some((v.clone(), k.clone().last().unwrap().to_string())) - } - }) - .collect(); - + let all_columns: Hierarchy = left_columns.with(right_columns); let operator = self.try_from_join_operator_with_columns( &ast_join.join_operator, - &all_fully_qualified_columns, + &all_columns, )?; - // We build a Join. Preserve non ambiguous col names where non ambiguous and rename - // the ambiguous ones. + + println!("COLS {}", all_columns); + left_relation.display_dot().unwrap(); let join: Join = Relation::join() .operator(operator) .left(left_relation) .right(right_relation) - .names(non_ambiguous_join_col_names) .build(); - Ok(RelationWithColumns::new(Arc::new(Relation::from(join)), all_fully_qualified_columns)) + + let join_columns: Hierarchy = join + .field_inputs() + .map(|(f, i)| (i, f.into())) + .collect(); + + // let composed_columns = all_columns.and_then(join_columns); + + // If the join constraint is of type "USING" or "NATURAL", add a map to coalesce the duplicate columns + let (relation, coalesced) = match &ast_join.join_operator { + ast::JoinOperator::Inner(ast::JoinConstraint::Using(v)) + | ast::JoinOperator::LeftOuter(ast::JoinConstraint::Using(v)) + | ast::JoinOperator::RightOuter(ast::JoinConstraint::Using(v)) + | ast::JoinOperator::FullOuter(ast::JoinConstraint::Using(v)) => { + // Do we need to change all_columns? + let to_be_coalesced: Vec = v.into_iter().map(|id| id.value.to_string()).collect(); + // let coalesced: Vec<(Identifier, Identifier)> = to_be_coalesced.iter().map( + // |s| (s[..].into(), s[..].into()) + // ).collect(); + join.remove_duplicates_and_coalesce(to_be_coalesced, &join_columns) + }, + ast::JoinOperator::Inner(ast::JoinConstraint::Natural) + | ast::JoinOperator::LeftOuter(ast::JoinConstraint::Natural) + | ast::JoinOperator::RightOuter(ast::JoinConstraint::Natural) + | ast::JoinOperator::FullOuter(ast::JoinConstraint::Natural) => { + let v: Vec = join.left().fields() + .into_iter() + .filter_map(|f| join.right().schema().field(f.name()).is_ok().then_some(f.name().to_string())) + .collect(); + // let coalesced: Vec<(Identifier, Identifier)> = v.iter().map( + // |s| (s[..].into(), s[..].into()) + // ).collect(); + join.remove_duplicates_and_coalesce(v, &join_columns) + }, + ast::JoinOperator::LeftSemi(_) => todo!(), + ast::JoinOperator::RightSemi(_) => todo!(), + ast::JoinOperator::LeftAnti(_) => todo!(), + ast::JoinOperator::RightAnti(_) => todo!(), + _ => (Relation::from(join), vec![]) + }; + let composed = all_columns.and_then(join_columns); + // let composed_with_coalesced = composed.with(coalesced); + // println!("FINAL {}", composed_with_coalesced); + Ok(RelationWithColumns::new(Arc::new(relation), composed)) } /// Convert a TableWithJoins into a RelationWithColumns @@ -305,46 +325,11 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, ) -> Result { // Process the relation // Then the JOIN if needed - let result = table_with_joins.joins.iter().fold( - self.try_from_table_factor(&table_with_joins.relation), - |left, ast_join| { - let RelationWithColumns(join_relation, columns) = self.try_from_join(left?, &ast_join)?; - let join = Join::try_from(join_relation.deref().clone())?; - - let join_columns: Hierarchy = join - .field_inputs() - .map(|(f, i)| (i, f.into())) - .collect(); - // If the join constraint is of type "USING" or "NATURAL", add a map to coalesce the duplicate columns - let relation = match &ast_join.join_operator { - ast::JoinOperator::Inner(ast::JoinConstraint::Using(v)) - | ast::JoinOperator::LeftOuter(ast::JoinConstraint::Using(v)) - | ast::JoinOperator::RightOuter(ast::JoinConstraint::Using(v)) - | ast::JoinOperator::FullOuter(ast::JoinConstraint::Using(v)) => { - join.remove_duplicates_and_coalesce( - v.into_iter().map(|id| id.value.to_string()).collect(), - &join_columns - ) - }, - ast::JoinOperator::Inner(ast::JoinConstraint::Natural) - | ast::JoinOperator::LeftOuter(ast::JoinConstraint::Natural) - | ast::JoinOperator::RightOuter(ast::JoinConstraint::Natural) - | ast::JoinOperator::FullOuter(ast::JoinConstraint::Natural) => { - let v = join.left().fields() - .into_iter() - .filter_map(|f| join.right().schema().field(f.name()).is_ok().then_some(f.name().to_string())) - .collect(); - join.remove_duplicates_and_coalesce(v, &join_columns) - }, - ast::JoinOperator::LeftSemi(_) => todo!(), - ast::JoinOperator::RightSemi(_) => todo!(), - ast::JoinOperator::LeftAnti(_) => todo!(), - ast::JoinOperator::RightAnti(_) => todo!(), - _ => Relation::from(join), - }; - let columns = columns.and_then(join_columns); - Ok(RelationWithColumns::new(Arc::new(relation), columns)) - }, + let result = table_with_joins.joins + .iter() + .fold(self.try_from_table_factor(&table_with_joins.relation), + |left, ast_join| + self.try_from_join(left?, &ast_join), ); result } @@ -377,6 +362,7 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, let mut named_exprs: Vec<(String, Expr)> = vec![]; // Columns from names let columns = &names.map(|s| s.clone().into()); + println!("columns: \n{}", columns); for select_item in select_items { match select_item { ast::SelectItem::UnnamedExpr(expr) => named_exprs.push(( @@ -404,16 +390,39 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, )), ast::SelectItem::ExprWithAlias { expr, alias } => { named_exprs.push((alias.clone().value, self.translator.try_expr(expr,columns)?)) - //named_exprs.push((alias.clone().value, Expr::try_from(expr.with(columns))?)) } ast::SelectItem::QualifiedWildcard(_, _) => todo!(), ast::SelectItem::Wildcard(_) => { - for field in from.schema().iter() { - named_exprs.push((field.name().to_string(), Expr::col(field.name()))) + // push those named_exprs + // return all columns. use tail paths in the hierarchy as names + // if they are not ambiguous otherwise use the identifiers + let ambiguous_col_paths = columns.ambiguous_tail_paths(); + + let available_cols: Vec = from.schema() + .iter() + .map(|f|f.name().into()) + .collect(); + + println!("amb: {:?}", ambiguous_col_paths); + for (col_path, col_id) in columns.iter() { + if available_cols.contains(col_id) { + if ambiguous_col_paths.contains(col_path) { + // Use the col_id + let col_name = (col_id.last().unwrap()).to_string(); + named_exprs.push((col_name.clone(), Expr::col(col_name))) + } else { + // Use the hierarchy path tail + let col_name = (col_path.last().unwrap()).to_string(); + named_exprs.push((col_name.clone(), Expr::col(col_id.last().unwrap()))) + } + } } } } } + + println!("named_exprs: \n{:?}", named_exprs); + // Prepare the GROUP BY let group_by = match group_by { ast::GroupByExpr::All => todo!(), @@ -475,6 +484,7 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, // Prepare the WHERE let filter: Option = selection .as_ref() + // todo. Use pass the expression through the translator .map(|e| e.with(columns).try_into()) .map_or(Ok(None), |r| r.map(Some))?; @@ -493,6 +503,7 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, builder.input(from).build() } }; + println!("relation: \n{}", relation); if let Some(h) = having { relation = Relation::map() .with_iter( From e038ea77d0293b6c6ab2e62266b411bf2ef65657 Mon Sep 17 00:00:00 2001 From: Andi Cuko Date: Thu, 14 Mar 2024 11:18:43 +0100 Subject: [PATCH 11/18] wip --- src/differential_privacy/aggregates.rs | 12 +++++++----- src/relation/rewriting.rs | 4 ++-- src/sql/mod.rs | 2 +- src/sql/relation.rs | 11 ++++++----- 4 files changed, 16 insertions(+), 13 deletions(-) diff --git a/src/differential_privacy/aggregates.rs b/src/differential_privacy/aggregates.rs index dd6359f5..75444b1c 100644 --- a/src/differential_privacy/aggregates.rs +++ b/src/differential_privacy/aggregates.rs @@ -176,14 +176,16 @@ impl PupRelation { let mut input_builder = Map::builder() .with(( self.privacy_unit(), - Expr::coalesce( - Expr::cast_as_text(Expr::col(self.privacy_unit())), - Expr::val(self.privacy_unit_default().to_string()), - ), + Expr::col(self.privacy_unit()) + // Expr::coalesce( + // Expr::cast_as_text(Expr::col(self.privacy_unit())), + // Expr::val(self.privacy_unit_default().to_string()), + // ), )) .with(( self.privacy_unit_weight(), - Expr::coalesce(Expr::col(self.privacy_unit_weight()), Expr::val(0.)), + Expr::col(self.privacy_unit_weight()) + //Expr::coalesce(Expr::col(self.privacy_unit_weight()), Expr::val(0.)), )); let mut group_by_names = vec![]; diff --git a/src/relation/rewriting.rs b/src/relation/rewriting.rs index a132ecb3..0e993027 100644 --- a/src/relation/rewriting.rs +++ b/src/relation/rewriting.rs @@ -215,9 +215,9 @@ impl Join { .filter_map(|(name, id)| { let col = id.as_ref().last().unwrap(); if id.as_ref().first().unwrap().as_str() == LEFT_INPUT_NAME && vec.contains(col) { - coalesced_cols.push((col[..].into(), name.as_str().into())); + coalesced_cols.push((col[..].into(), col[..].into())); Some(( - name, + col.clone(), Expr::coalesce( Expr::col(columns[[LEFT_INPUT_NAME, col]].as_ref().last().unwrap()), Expr::col(columns[[RIGHT_INPUT_NAME, col]].as_ref().last().unwrap()), diff --git a/src/sql/mod.rs b/src/sql/mod.rs index 220812d8..1db34ff9 100644 --- a/src/sql/mod.rs +++ b/src/sql/mod.rs @@ -123,7 +123,7 @@ mod tests { let database = postgresql::test_database(); let relations = database.relations(); let query = r#" - WITH tab AS (SELECT x FROM table_2 AS t1 JOIN table_2 AS t2 USING(x) JOIN table_2 AS t3 USING(x)) + WITH tab AS (SELECT * FROM table_2 AS t1 JOIN table_2 AS t2 USING(x) JOIN table_2 AS t3 USING(x)) SELECT * from tab"#; let relation = Relation::try_from(parse(query).unwrap().with(&relations)).unwrap(); relation.display_dot().unwrap(); diff --git a/src/sql/relation.rs b/src/sql/relation.rs index 2ab1c255..1ccf24ce 100644 --- a/src/sql/relation.rs +++ b/src/sql/relation.rs @@ -177,7 +177,8 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, ast::JoinConstraint::Using(idents) => { // the "Using (id)" condition is equivalent to "ON _LEFT_.id = _RIGHT_.id" Expr::and_iter( idents.into_iter() - .map(|id| Expr::eq( + .map(|id| + Expr::eq( Expr::Column(Identifier::from(vec![LEFT_INPUT_NAME.to_string(), id.value.to_string()])), Expr::Column(Identifier::from(vec![RIGHT_INPUT_NAME.to_string(), id.value.to_string()])), )) @@ -264,9 +265,11 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, &ast_join.join_operator, &all_columns, )?; - println!("COLS {}", all_columns); + println!("OP {:?}", operator); + left_relation.display_dot().unwrap(); + right_relation.display_dot().unwrap(); let join: Join = Relation::join() .operator(operator) .left(left_relation) @@ -313,8 +316,6 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, _ => (Relation::from(join), vec![]) }; let composed = all_columns.and_then(join_columns); - // let composed_with_coalesced = composed.with(coalesced); - // println!("FINAL {}", composed_with_coalesced); Ok(RelationWithColumns::new(Arc::new(relation), composed)) } @@ -402,7 +403,7 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, .iter() .map(|f|f.name().into()) .collect(); - + println!("av: {:?}", available_cols); println!("amb: {:?}", ambiguous_col_paths); for (col_path, col_id) in columns.iter() { if available_cols.contains(col_id) { From 90ebc2a5eddff1023b642ef640183bdc1d08d84d Mon Sep 17 00:00:00 2001 From: Andi Cuko Date: Fri, 15 Mar 2024 17:40:02 +0100 Subject: [PATCH 12/18] tests ok --- src/relation/rewriting.rs | 11 +++-- src/sql/mod.rs | 11 +++-- src/sql/relation.rs | 88 +++++++++++++++++---------------------- 3 files changed, 52 insertions(+), 58 deletions(-) diff --git a/src/relation/rewriting.rs b/src/relation/rewriting.rs index e77e6d7a..3eaeae57 100644 --- a/src/relation/rewriting.rs +++ b/src/relation/rewriting.rs @@ -208,14 +208,17 @@ impl Join { self, vec: Vec, columns: &Hierarchy, - ) -> (Relation, Vec<(Identifier, Identifier)>) { + ) -> (Relation, Hierarchy) { let mut coalesced_cols: Vec<(Identifier, Identifier)> = vec![]; let fields = self .field_inputs() - .filter_map(|(name, id)| { + .filter_map(|(_, id)| { let col = id.as_ref().last().unwrap(); if id.as_ref().first().unwrap().as_str() == LEFT_INPUT_NAME && vec.contains(col) { - coalesced_cols.push((col[..].into(), col[..].into())); + let left_col = columns[[LEFT_INPUT_NAME, col]].as_ref().last().unwrap(); + let right_col = columns[[RIGHT_INPUT_NAME, col]].as_ref().last().unwrap(); + coalesced_cols.push((left_col.as_str().into(), col[..].into())); + coalesced_cols.push((right_col.as_str().into(), col[..].into())); Some(( col.clone(), Expr::coalesce( @@ -235,7 +238,7 @@ impl Join { (Relation::map() .input(Relation::from(self)) .with_iter(fields) - .build(), coalesced_cols) + .build(), coalesced_cols.into_iter().collect()) } } diff --git a/src/sql/mod.rs b/src/sql/mod.rs index 1db34ff9..d10d9c71 100644 --- a/src/sql/mod.rs +++ b/src/sql/mod.rs @@ -123,10 +123,13 @@ mod tests { let database = postgresql::test_database(); let relations = database.relations(); let query = r#" - WITH tab AS (SELECT * FROM table_2 AS t1 JOIN table_2 AS t2 USING(x) JOIN table_2 AS t3 USING(x)) - SELECT * from tab"#; + with aa as (SELECT x AS ahah FROM table2 t1 JOIN table2 t1 USING (x)) + SELECT * FROM aa ORDER BY ahah + "#; let relation = Relation::try_from(parse(query).unwrap().with(&relations)).unwrap(); relation.display_dot().unwrap(); + let relation_query: &str = &ast::Query::from(&relation).to_string(); + println!("{}",relation_query); } #[test] @@ -143,12 +146,12 @@ mod tests { t2 AS (SELECT * FROM table_2) SELECT max(a), sum(d) FROM t1 INNER JOIN t2 ON t1.d = t2.x CROSS JOIN table_2 GROUP BY t2.y, t1.a", " - WITH t1 AS (SELECT a,d FROM table_1), + WITH t1 AS (SELECT a, d FROM table_1), t2 AS (SELECT * FROM table_2) SELECT * FROM t1 INNER JOIN t2 ON t1.d = t2.x INNER JOIN table_2 ON t1.d=table_2.x ORDER BY t1.a LIMIT 10", ] { let relation = Relation::try_from(parse(query).unwrap().with(&database.relations())).unwrap(); - relation.display_dot(); + relation.display_dot().unwrap(); } } diff --git a/src/sql/relation.rs b/src/sql/relation.rs index 1ccf24ce..d26501bf 100644 --- a/src/sql/relation.rs +++ b/src/sql/relation.rs @@ -173,7 +173,7 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, columns: &'a Hierarchy, ) -> Result { Ok(match join_constraint { - ast::JoinConstraint::On(expr) => expr.with(columns).try_into()?, + ast::JoinConstraint::On(expr) => self.translator.try_expr(expr, columns)?, ast::JoinConstraint::Using(idents) => { // the "Using (id)" condition is equivalent to "ON _LEFT_.id = _RIGHT_.id" Expr::and_iter( idents.into_iter() @@ -257,19 +257,12 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, v.extend(i.to_vec()); v.into() }); - println!("L COLS {}", left_columns); - println!("R COLS {}", right_columns); // fully qualified input names -> fully qualified JOIN names let all_columns: Hierarchy = left_columns.with(right_columns); let operator = self.try_from_join_operator_with_columns( &ast_join.join_operator, &all_columns, )?; - println!("COLS {}", all_columns); - println!("OP {:?}", operator); - - left_relation.display_dot().unwrap(); - right_relation.display_dot().unwrap(); let join: Join = Relation::join() .operator(operator) .left(left_relation) @@ -281,8 +274,6 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, .map(|(f, i)| (i, f.into())) .collect(); - // let composed_columns = all_columns.and_then(join_columns); - // If the join constraint is of type "USING" or "NATURAL", add a map to coalesce the duplicate columns let (relation, coalesced) = match &ast_join.join_operator { ast::JoinOperator::Inner(ast::JoinConstraint::Using(v)) @@ -291,9 +282,6 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, | ast::JoinOperator::FullOuter(ast::JoinConstraint::Using(v)) => { // Do we need to change all_columns? let to_be_coalesced: Vec = v.into_iter().map(|id| id.value.to_string()).collect(); - // let coalesced: Vec<(Identifier, Identifier)> = to_be_coalesced.iter().map( - // |s| (s[..].into(), s[..].into()) - // ).collect(); join.remove_duplicates_and_coalesce(to_be_coalesced, &join_columns) }, ast::JoinOperator::Inner(ast::JoinConstraint::Natural) @@ -304,18 +292,19 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, .into_iter() .filter_map(|f| join.right().schema().field(f.name()).is_ok().then_some(f.name().to_string())) .collect(); - // let coalesced: Vec<(Identifier, Identifier)> = v.iter().map( - // |s| (s[..].into(), s[..].into()) - // ).collect(); join.remove_duplicates_and_coalesce(v, &join_columns) }, ast::JoinOperator::LeftSemi(_) => todo!(), ast::JoinOperator::RightSemi(_) => todo!(), ast::JoinOperator::LeftAnti(_) => todo!(), ast::JoinOperator::RightAnti(_) => todo!(), - _ => (Relation::from(join), vec![]) + _ => { + let empty: Vec<(Identifier, Identifier)> = vec![]; + (Relation::from(join), empty.into_iter().collect()) + } }; - let composed = all_columns.and_then(join_columns); + let with_coalesced = join_columns.clone().with(join_columns.and_then(coalesced.clone())); + let composed = all_columns.and_then(with_coalesced); Ok(RelationWithColumns::new(Arc::new(relation), composed)) } @@ -358,12 +347,13 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, from: Arc, having: &'a Option, distinct: &'a Option, - ) -> Result> { + ) -> Result { // Collect all expressions with their aliases let mut named_exprs: Vec<(String, Expr)> = vec![]; // Columns from names let columns = &names.map(|s| s.clone().into()); - println!("columns: \n{}", columns); + let mut renamed_columns: Vec<(Identifier, Identifier)> = vec![]; + // println!("columns: \n{}", columns); for select_item in select_items { match select_item { ast::SelectItem::UnnamedExpr(expr) => named_exprs.push(( @@ -387,49 +377,45 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, expr => namer::name_from_content(FIELD, &expr), }, self.translator.try_expr(expr,columns)? - //Expr::try_from(expr.with(columns))?, )), ast::SelectItem::ExprWithAlias { expr, alias } => { named_exprs.push((alias.clone().value, self.translator.try_expr(expr,columns)?)) } ast::SelectItem::QualifiedWildcard(_, _) => todo!(), ast::SelectItem::Wildcard(_) => { - // push those named_exprs - // return all columns. use tail paths in the hierarchy as names - // if they are not ambiguous otherwise use the identifiers - let ambiguous_col_paths = columns.ambiguous_tail_paths(); - - let available_cols: Vec = from.schema() - .iter() - .map(|f|f.name().into()) - .collect(); - println!("av: {:?}", available_cols); - println!("amb: {:?}", ambiguous_col_paths); - for (col_path, col_id) in columns.iter() { - if available_cols.contains(col_id) { - if ambiguous_col_paths.contains(col_path) { - // Use the col_id - let col_name = (col_id.last().unwrap()).to_string(); - named_exprs.push((col_name.clone(), Expr::col(col_name))) + // for each field in the schema of the `from` relation + // push its name if it's path tail in + let non_ambigous_col_names: Hierarchy = columns + .iter() + .filter_map(|(path, id)|{ + let path_tail = path.last().unwrap().clone(); + if let Some(_) = columns.get(&[path_tail.clone()]) { + Some((id.clone(), path_tail)) } else { - // Use the hierarchy path tail - let col_name = (col_path.last().unwrap()).to_string(); - named_exprs.push((col_name.clone(), Expr::col(col_id.last().unwrap()))) + None } - } + }) + .collect(); + println!("NN AMB COL {}", non_ambigous_col_names); + for field in from.schema().iter() { + let temp = field.name().to_string(); + let new_alias = non_ambigous_col_names + .get(&[field.name().to_string()]) + .unwrap_or(&temp); + named_exprs.push((new_alias.clone(), Expr::col(field.name()))); + renamed_columns.push((field.name().into(), new_alias.as_str().into())) } } } } - - println!("named_exprs: \n{:?}", named_exprs); + let renamed_columns: Hierarchy = renamed_columns.into_iter().collect(); // Prepare the GROUP BY let group_by = match group_by { ast::GroupByExpr::All => todo!(), ast::GroupByExpr::Expressions(group_by_exprs) => group_by_exprs .iter() - .map(|e| e.with(columns).try_into()) + .map(|e| self.translator.try_expr(e, columns)) .collect::>>()?, }; // If the GROUP BY contains aliases, then replace them by the corresponding expression in `named_exprs`. @@ -450,7 +436,7 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, // Add the having in named_exprs let having = if let Some(expr) = having { let having_name = namer::name_from_content(FIELD, &expr); - let mut expr = self.translator.try_expr(expr,columns)?; //Expr::try_from(expr.with(columns))?; + let mut expr = self.translator.try_expr(expr,columns)?; let columns = named_exprs .iter() .map(|(s, x)| (Expr::col(s.to_string()), x.clone())) @@ -468,6 +454,7 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, } else { None }; + println!("named_exprs: {:?}", named_exprs); // Build the Map or Reduce based on the type of split // If group_by is non-empty, start with them so that aggregations can take them into account let split = if group_by.is_empty() { @@ -486,7 +473,7 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, let filter: Option = selection .as_ref() // todo. Use pass the expression through the translator - .map(|e| e.with(columns).try_into()) + .map(|e| self.translator.try_expr(e, columns)) .map_or(Ok(None), |r| r.map(Some))?; // Build a Relation @@ -504,7 +491,6 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, builder.input(from).build() } }; - println!("relation: \n{}", relation); if let Some(h) = having { relation = Relation::map() .with_iter( @@ -524,7 +510,8 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, } relation = relation.distinct() } - Ok(Arc::new(relation)) + let columns = &columns.clone().with(columns.and_then(renamed_columns)); + Ok(RelationWithColumns::new(Arc::new(relation), columns.clone())) } /// Convert a Select into a Relation @@ -573,7 +560,7 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, let RelationWithColumns(from, columns) = self.try_from_tables_with_joins( from )?; - let relation = self.try_from_select_items_selection_and_group_by( + let RelationWithColumns(relation, columns) = self.try_from_select_items_selection_and_group_by( &columns.filter_map(|i| Some(i.split_last().ok()?.0)), projection, selection, @@ -614,6 +601,7 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, ast::SetExpr::Select(select) => { let RelationWithColumns(relation, columns) = self.try_from_select(select.as_ref())?; + println!("COOLS: {}", columns); if order_by.is_empty() && limit.is_none() && offset.is_none() { Ok(relation) } else { From 58d9835bffb9f6122ecc50578b2c6f7d976db0dc Mon Sep 17 00:00:00 2001 From: Andi Cuko Date: Fri, 15 Mar 2024 17:41:22 +0100 Subject: [PATCH 13/18] now tests ok --- src/sql/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sql/mod.rs b/src/sql/mod.rs index d10d9c71..5af53b8a 100644 --- a/src/sql/mod.rs +++ b/src/sql/mod.rs @@ -123,7 +123,7 @@ mod tests { let database = postgresql::test_database(); let relations = database.relations(); let query = r#" - with aa as (SELECT x AS ahah FROM table2 t1 JOIN table2 t1 USING (x)) + with aa as (SELECT x AS ahah FROM table_2 t1 JOIN table_2 t1 USING (x)) SELECT * FROM aa ORDER BY ahah "#; let relation = Relation::try_from(parse(query).unwrap().with(&relations)).unwrap(); From f0892f8c6758dfb2ef8782ad0f50aab8551c4d77 Mon Sep 17 00:00:00 2001 From: Andi Cuko Date: Fri, 15 Mar 2024 18:13:01 +0100 Subject: [PATCH 14/18] wip --- src/hierarchy.rs | 43 ------------------------------------------- src/sql/mod.rs | 4 ++-- src/sql/relation.rs | 11 ++++------- 3 files changed, 6 insertions(+), 52 deletions(-) diff --git a/src/hierarchy.rs b/src/hierarchy.rs index ba164f39..03a36056 100644 --- a/src/hierarchy.rs +++ b/src/hierarchy.rs @@ -211,22 +211,6 @@ impl Hierarchy { .filter_map(|(p, o)| Some((p.clone(), f(o)?))) .collect() } - - /// It checks whether the tail in the hierarchy's paths is ambiguous or not. - /// It returns the full paths which tail is is ambiguous. - /// It assumes that no empty paths are present in the hierarchy. - pub fn ambiguous_tail_paths(&self) -> Vec> { - self.iter() - .filter_map(|(qualified_key, _)| { - let headless_path = [qualified_key.last().unwrap().clone()]; - if let Some(_) = self.get(&headless_path) { - None - } else { - Some(qualified_key.clone()) - } - }) - .collect() - } } impl Hierarchy

{ @@ -484,31 +468,4 @@ mod tests { )) ); } - - #[test] - fn test_ambiguous_tail_paths() { - let values = Hierarchy::from([ - (vec!["a", "b", "c"], 1), - (vec!["a", "b", "d"], 2), - (vec!["a", "c"], 3), - (vec!["a", "e"], 4), - (vec!["a", "e", "f"], 5), - (vec!["b", "c"], 6), - ]); - let ambiguous = values.ambiguous_tail_paths(); - assert_eq!(ambiguous, vec![ - vec!["a", "b", "c"], - vec!["a", "c"], - vec!["b", "c"], - ]); - - let values = Hierarchy::from([ - (vec!["a", "b", "d"], 2), - (vec!["a", "b"], 4), - (vec!["a", "e", "f"], 5), - ]); - let ambiguous: Vec> = values.ambiguous_tail_paths(); - let empty: Vec> = vec![]; - assert_eq!(ambiguous, empty); - } } diff --git a/src/sql/mod.rs b/src/sql/mod.rs index 5af53b8a..a61f34e3 100644 --- a/src/sql/mod.rs +++ b/src/sql/mod.rs @@ -123,8 +123,8 @@ mod tests { let database = postgresql::test_database(); let relations = database.relations(); let query = r#" - with aa as (SELECT x AS ahah FROM table_2 t1 JOIN table_2 t1 USING (x)) - SELECT * FROM aa ORDER BY ahah + with aa as (SELECT x AS ahaha FROM table_2) + SELECT * FROM aa WHERE ahaha > 50 ORDER BY ahaha "#; let relation = Relation::try_from(parse(query).unwrap().with(&relations)).unwrap(); relation.display_dot().unwrap(); diff --git a/src/sql/relation.rs b/src/sql/relation.rs index d26501bf..4090f6d1 100644 --- a/src/sql/relation.rs +++ b/src/sql/relation.rs @@ -303,8 +303,8 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, (Relation::from(join), empty.into_iter().collect()) } }; - let with_coalesced = join_columns.clone().with(join_columns.and_then(coalesced.clone())); - let composed = all_columns.and_then(with_coalesced); + let with_coalesced = join_columns.clone().with(join_columns.and_then(coalesced)); + let composed = all_columns.and_then(with_coalesced); Ok(RelationWithColumns::new(Arc::new(relation), composed)) } @@ -352,8 +352,8 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, let mut named_exprs: Vec<(String, Expr)> = vec![]; // Columns from names let columns = &names.map(|s| s.clone().into()); + //println!("columns: {}", columns); let mut renamed_columns: Vec<(Identifier, Identifier)> = vec![]; - // println!("columns: \n{}", columns); for select_item in select_items { match select_item { ast::SelectItem::UnnamedExpr(expr) => named_exprs.push(( @@ -390,20 +390,19 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, .filter_map(|(path, id)|{ let path_tail = path.last().unwrap().clone(); if let Some(_) = columns.get(&[path_tail.clone()]) { + renamed_columns.push((id.clone(), path_tail.as_str().into())); Some((id.clone(), path_tail)) } else { None } }) .collect(); - println!("NN AMB COL {}", non_ambigous_col_names); for field in from.schema().iter() { let temp = field.name().to_string(); let new_alias = non_ambigous_col_names .get(&[field.name().to_string()]) .unwrap_or(&temp); named_exprs.push((new_alias.clone(), Expr::col(field.name()))); - renamed_columns.push((field.name().into(), new_alias.as_str().into())) } } } @@ -454,7 +453,6 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, } else { None }; - println!("named_exprs: {:?}", named_exprs); // Build the Map or Reduce based on the type of split // If group_by is non-empty, start with them so that aggregations can take them into account let split = if group_by.is_empty() { @@ -601,7 +599,6 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, ast::SetExpr::Select(select) => { let RelationWithColumns(relation, columns) = self.try_from_select(select.as_ref())?; - println!("COOLS: {}", columns); if order_by.is_empty() && limit.is_none() && offset.is_none() { Ok(relation) } else { From 8fff68c8a6859e985e3a2f22d259ce6b89672628 Mon Sep 17 00:00:00 2001 From: Andi Cuko Date: Fri, 15 Mar 2024 18:47:42 +0100 Subject: [PATCH 15/18] ok --- src/relation/rewriting.rs | 4 +++- src/sql/mod.rs | 4 ++-- src/sql/relation.rs | 22 ++++++++++++++-------- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/src/relation/rewriting.rs b/src/relation/rewriting.rs index 3eaeae57..b1f0c2b0 100644 --- a/src/relation/rewriting.rs +++ b/src/relation/rewriting.rs @@ -203,7 +203,9 @@ impl Join { } /// Replace the duplicates fields specified in `columns` by their coalesce expression - /// Its mimicks teh behaviour of USING in SQL + /// Its mimics teh behavior of USING in SQL + /// + /// The coalesced fields names and the corresponding alias is also returned in Hierarchy pub fn remove_duplicates_and_coalesce( self, vec: Vec, diff --git a/src/sql/mod.rs b/src/sql/mod.rs index a61f34e3..cfbd0e9b 100644 --- a/src/sql/mod.rs +++ b/src/sql/mod.rs @@ -123,8 +123,8 @@ mod tests { let database = postgresql::test_database(); let relations = database.relations(); let query = r#" - with aa as (SELECT x AS ahaha FROM table_2) - SELECT * FROM aa WHERE ahaha > 50 ORDER BY ahaha + with aa as (SELECT * FROM table_2 t1 JOIN table_2 t2 USING (x) JOIN table_2 t3 USING (x)) + SELECT * FROM aa ORDER BY x "#; let relation = Relation::try_from(parse(query).unwrap().with(&relations)).unwrap(); relation.display_dot().unwrap(); diff --git a/src/sql/relation.rs b/src/sql/relation.rs index 4090f6d1..ce5d515e 100644 --- a/src/sql/relation.rs +++ b/src/sql/relation.rs @@ -337,7 +337,7 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, ) } - /// Build a relation from the + /// Build a RelationWithColumns from select_items selection group_by having and distinct fn try_from_select_items_selection_and_group_by( &self, names: &'a Hierarchy, @@ -352,7 +352,9 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, let mut named_exprs: Vec<(String, Expr)> = vec![]; // Columns from names let columns = &names.map(|s| s.clone().into()); - //println!("columns: {}", columns); + + // The select all forces the preservation of names for non ambiguous + // columns. In this vector we collect those. let mut renamed_columns: Vec<(Identifier, Identifier)> = vec![]; for select_item in select_items { match select_item { @@ -383,9 +385,10 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, } ast::SelectItem::QualifiedWildcard(_, _) => todo!(), ast::SelectItem::Wildcard(_) => { - // for each field in the schema of the `from` relation - // push its name if it's path tail in - let non_ambigous_col_names: Hierarchy = columns + // push all names that are present in the from into named_exprs. + // for not non ambiguous col names preserve the input name + // for the ambiguous ones used the name present in the relation. + let non_ambiguous_col_names: Hierarchy = columns .iter() .filter_map(|(path, id)|{ let path_tail = path.last().unwrap().clone(); @@ -399,7 +402,7 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, .collect(); for field in from.schema().iter() { let temp = field.name().to_string(); - let new_alias = non_ambigous_col_names + let new_alias = non_ambiguous_col_names .get(&[field.name().to_string()]) .unwrap_or(&temp); named_exprs.push((new_alias.clone(), Expr::col(field.name()))); @@ -407,7 +410,6 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, } } } - let renamed_columns: Hierarchy = renamed_columns.into_iter().collect(); // Prepare the GROUP BY let group_by = match group_by { @@ -418,7 +420,7 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, .collect::>>()?, }; // If the GROUP BY contains aliases, then replace them by the corresponding expression in `named_exprs`. - // Note that we mimic postgres behaviour and support only GROUP BY alias column (no other expressions containing aliases are allowed) + // Note that we mimic postgres behavior and support only GROUP BY alias column (no other expressions containing aliases are allowed) // The aliases cannot be used in HAVING let group_by = group_by.into_iter() .map(|x| match &x { @@ -508,6 +510,10 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, } relation = relation.distinct() } + + // When SELECT * we preserve input names when possible so we cerate new columns + // that reflects the actual mapping between input and relation's fields. + let renamed_columns: Hierarchy = renamed_columns.into_iter().collect(); let columns = &columns.clone().with(columns.and_then(renamed_columns)); Ok(RelationWithColumns::new(Arc::new(relation), columns.clone())) } From 1fd4dae2ce7978f1109e29de45d65943a0ee0654 Mon Sep 17 00:00:00 2001 From: Andi Cuko Date: Mon, 18 Mar 2024 12:30:05 +0100 Subject: [PATCH 16/18] try_from_select_items_selection_and_group_by --- src/hierarchy.rs | 38 +++++++++++++++++++++ src/sql/mod.rs | 5 +-- src/sql/relation.rs | 83 +++++++++++++++++++++++++-------------------- 3 files changed, 88 insertions(+), 38 deletions(-) diff --git a/src/hierarchy.rs b/src/hierarchy.rs index 03a36056..419614c6 100644 --- a/src/hierarchy.rs +++ b/src/hierarchy.rs @@ -211,6 +211,21 @@ impl Hierarchy { .filter_map(|(p, o)| Some((p.clone(), f(o)?))) .collect() } + + /// It creates a new hierarchy with elements for which the tail of their + /// path is not ambiguous. In the new hierarchy, only the tails of the original + /// path are used as a path. + pub fn non_ambiguous_tails(&self) -> Hierarchy { + self + .iter() + .filter_map(|(path, _)|{ + let path_tail = path.last().unwrap().clone(); + self + .get(&[path_tail.clone()]) + .and_then( |t| Some(([path_tail], t.clone())) ) + }) + .collect() + } } impl Hierarchy

{ @@ -468,4 +483,27 @@ mod tests { )) ); } + + #[test] + fn test_non_ambiguous() { + let values = Hierarchy::from([ + (vec!["a", "b", "c"], 1), + (vec!["a", "b", "d"], 2), + (vec!["a", "c"], 3), + (vec!["a", "e"], 4), + (vec!["a", "e", "f"], 5), + (vec!["b", "c"], 6), + ]); + + let non_ambiguous = values.non_ambiguous_tails(); + println!("{}", non_ambiguous); + + let values = Hierarchy::from([ + (vec!["t1", "x"], 1), + (vec!["x"], 1), + ]); + + let non_ambiguous = values.non_ambiguous_tails(); + println!("{}", non_ambiguous); + } } diff --git a/src/sql/mod.rs b/src/sql/mod.rs index cfbd0e9b..540cbdf5 100644 --- a/src/sql/mod.rs +++ b/src/sql/mod.rs @@ -123,8 +123,9 @@ mod tests { let database = postgresql::test_database(); let relations = database.relations(); let query = r#" - with aa as (SELECT * FROM table_2 t1 JOIN table_2 t2 USING (x) JOIN table_2 t3 USING (x)) - SELECT * FROM aa ORDER BY x + WITH t1 AS (SELECT a,d FROM table_1), + t2 AS (SELECT * FROM table_2) + SELECT * FROM t1 INNER JOIN t2 ON t1.d = t2.x INNER JOIN table_2 ON t1.d=table_2.x ORDER BY t1.a, t2.x, t2.y, t2.z "#; let relation = Relation::try_from(parse(query).unwrap().with(&relations)).unwrap(); relation.display_dot().unwrap(); diff --git a/src/sql/relation.rs b/src/sql/relation.rs index ce5d515e..ad3d309d 100644 --- a/src/sql/relation.rs +++ b/src/sql/relation.rs @@ -337,25 +337,21 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, ) } - /// Build a RelationWithColumns from select_items selection group_by having and distinct - fn try_from_select_items_selection_and_group_by( + /// Extracts named expressions from the from relation and the select items + fn try_named_expr_columns_from_select_items( &self, - names: &'a Hierarchy, + columns: &'a Hierarchy, select_items: &'a [ast::SelectItem], - selection: &'a Option, - group_by: &'a ast::GroupByExpr, - from: Arc, - having: &'a Option, - distinct: &'a Option, - ) -> Result { - // Collect all expressions with their aliases + from: &'a Arc, + ) -> Result<(Vec<(String, Expr)>, Hierarchy)> { + let mut named_exprs: Vec<(String, Expr)> = vec![]; - // Columns from names - let columns = &names.map(|s| s.clone().into()); - + // The select all forces the preservation of names for non ambiguous - // columns. In this vector we collect those. + // columns. In this vector we collect those names. They are needed + // to update the column mapping for order by limit and offset. let mut renamed_columns: Vec<(Identifier, Identifier)> = vec![]; + for select_item in select_items { match select_item { ast::SelectItem::UnnamedExpr(expr) => named_exprs.push(( @@ -386,30 +382,48 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, ast::SelectItem::QualifiedWildcard(_, _) => todo!(), ast::SelectItem::Wildcard(_) => { // push all names that are present in the from into named_exprs. - // for not non ambiguous col names preserve the input name + // for non ambiguous col names preserve the input name // for the ambiguous ones used the name present in the relation. - let non_ambiguous_col_names: Hierarchy = columns - .iter() - .filter_map(|(path, id)|{ - let path_tail = path.last().unwrap().clone(); - if let Some(_) = columns.get(&[path_tail.clone()]) { - renamed_columns.push((id.clone(), path_tail.as_str().into())); - Some((id.clone(), path_tail)) - } else { - None - } - }) + let non_ambiguous_cols = columns.non_ambiguous_tails(); + // Invert mapping of non_ambiguous_cols + let new_aliases: Hierarchy = non_ambiguous_cols.iter() + .map(|(p, i)|(i.deref(), p.last().unwrap().clone())) .collect(); + for field in from.schema().iter() { - let temp = field.name().to_string(); - let new_alias = non_ambiguous_col_names - .get(&[field.name().to_string()]) - .unwrap_or(&temp); - named_exprs.push((new_alias.clone(), Expr::col(field.name()))); + let field_name = field.name().to_string(); + let new_alias = new_aliases + .get_key_value(&[field.name().to_string()]) + .and_then(|(k, v)|{ + renamed_columns.push((k.to_vec().into(), v.clone().into())); + Some(v.clone()) + } ); + named_exprs.push((new_alias.unwrap_or(field_name), Expr::col(field.name()))); } } } } + Ok((named_exprs, renamed_columns.into_iter().collect())) + } + + /// Build a RelationWithColumns from select_items selection group_by having and distinct + fn try_from_select_items_selection_and_group_by( + &self, + names: &'a Hierarchy, + select_items: &'a [ast::SelectItem], + selection: &'a Option, + group_by: &'a ast::GroupByExpr, + from: Arc, + having: &'a Option, + distinct: &'a Option, + ) -> Result { + // Collect all expressions with their aliases + let mut named_exprs: Vec<(String, Expr)> = vec![]; + // Columns from names + let columns = &names.map(|s| s.clone().into()); + + let (named_expr_from_select, new_columns) = self.try_named_expr_columns_from_select_items(columns, select_items, &from)?; + named_exprs.extend(named_expr_from_select.into_iter()); // Prepare the GROUP BY let group_by = match group_by { @@ -510,11 +524,8 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, } relation = relation.distinct() } - - // When SELECT * we preserve input names when possible so we cerate new columns - // that reflects the actual mapping between input and relation's fields. - let renamed_columns: Hierarchy = renamed_columns.into_iter().collect(); - let columns = &columns.clone().with(columns.and_then(renamed_columns)); + // preserve old columns while composing with new ones + let columns = &columns.clone().with(columns.and_then(new_columns)); Ok(RelationWithColumns::new(Arc::new(relation), columns.clone())) } From c31086ac3a8efeb9fc57d08ccc2367c4b2e52776 Mon Sep 17 00:00:00 2001 From: Andi Cuko Date: Mon, 18 Mar 2024 14:30:25 +0100 Subject: [PATCH 17/18] cleaning --- src/hierarchy.rs | 14 +++++++++++--- src/relation/rewriting.rs | 29 ++++++++++++++++++----------- src/sql/mod.rs | 14 -------------- src/sql/relation.rs | 21 +++++++++++++++++++++ 4 files changed, 50 insertions(+), 28 deletions(-) diff --git a/src/hierarchy.rs b/src/hierarchy.rs index 419614c6..cf552187 100644 --- a/src/hierarchy.rs +++ b/src/hierarchy.rs @@ -494,16 +494,24 @@ mod tests { (vec!["a", "e", "f"], 5), (vec!["b", "c"], 6), ]); - + let expected = Hierarchy::from([ + (vec!["d"], 2), + (vec!["e"], 4), + (vec!["f"], 5), + ]); let non_ambiguous = values.non_ambiguous_tails(); + assert_eq!(non_ambiguous, expected); println!("{}", non_ambiguous); let values = Hierarchy::from([ (vec!["t1", "x"], 1), - (vec!["x"], 1), + (vec!["x"], 2), + ]); + let expected = Hierarchy::from([ + (vec!["x"], 2), ]); - let non_ambiguous = values.non_ambiguous_tails(); + assert_eq!(non_ambiguous, expected); println!("{}", non_ambiguous); } } diff --git a/src/relation/rewriting.rs b/src/relation/rewriting.rs index b1f0c2b0..6703649a 100644 --- a/src/relation/rewriting.rs +++ b/src/relation/rewriting.rs @@ -202,21 +202,27 @@ impl Join { self } - /// Replace the duplicates fields specified in `columns` by their coalesce expression - /// Its mimics teh behavior of USING in SQL + /// To mimic the behavior of USING(col) and NATURAL JOIN in SQL we create + /// a map where join columns identified by `vec` are coalesced. + /// vec: vector of string identifying input columns present in both _LEFT_ + /// and _RIGHT_ relation of the join. + /// columns: is the Hierarchy mapping input names in the JOIN to name field /// - /// The coalesced fields names and the corresponding alias is also returned in Hierarchy + /// It returns a: + /// - Map build on top the Join with coalesced column along with + /// the other fields of the join and + /// - coalesced columns mapping (name in join -> name in map) pub fn remove_duplicates_and_coalesce( self, vec: Vec, columns: &Hierarchy, ) -> (Relation, Hierarchy) { let mut coalesced_cols: Vec<(Identifier, Identifier)> = vec![]; - let fields = self + let coalesced = self .field_inputs() - .filter_map(|(_, id)| { - let col = id.as_ref().last().unwrap(); - if id.as_ref().first().unwrap().as_str() == LEFT_INPUT_NAME && vec.contains(col) { + .filter_map(|(_, input_id)| { + let col = input_id.as_ref().last().unwrap(); + if input_id.as_ref().first().unwrap().as_str() == LEFT_INPUT_NAME && vec.contains(col) { let left_col = columns[[LEFT_INPUT_NAME, col]].as_ref().last().unwrap(); let right_col = columns[[RIGHT_INPUT_NAME, col]].as_ref().last().unwrap(); coalesced_cols.push((left_col.as_str().into(), col[..].into())); @@ -224,14 +230,15 @@ impl Join { Some(( col.clone(), Expr::coalesce( - Expr::col(columns[[LEFT_INPUT_NAME, col]].as_ref().last().unwrap()), - Expr::col(columns[[RIGHT_INPUT_NAME, col]].as_ref().last().unwrap()), + Expr::col(left_col), + Expr::col(right_col), ), )) } else { None } - }) + }); + let coalesced_with_others = coalesced .chain(self.field_inputs().filter_map(|(name, id)| { let col = id.as_ref().last().unwrap(); (!vec.contains(col)).then_some((name.clone(), Expr::col(name))) @@ -239,7 +246,7 @@ impl Join { .collect::>(); (Relation::map() .input(Relation::from(self)) - .with_iter(fields) + .with_iter(coalesced_with_others) .build(), coalesced_cols.into_iter().collect()) } } diff --git a/src/sql/mod.rs b/src/sql/mod.rs index 540cbdf5..8a1da91d 100644 --- a/src/sql/mod.rs +++ b/src/sql/mod.rs @@ -118,20 +118,6 @@ mod tests { use itertools::Itertools; use sqlparser::dialect::BigQueryDialect; - #[test] - fn test_display_test() { - let database = postgresql::test_database(); - let relations = database.relations(); - let query = r#" - WITH t1 AS (SELECT a,d FROM table_1), - t2 AS (SELECT * FROM table_2) - SELECT * FROM t1 INNER JOIN t2 ON t1.d = t2.x INNER JOIN table_2 ON t1.d=table_2.x ORDER BY t1.a, t2.x, t2.y, t2.z - "#; - let relation = Relation::try_from(parse(query).unwrap().with(&relations)).unwrap(); - relation.display_dot().unwrap(); - let relation_query: &str = &ast::Query::from(&relation).to_string(); - println!("{}",relation_query); - } #[test] fn test_display() { diff --git a/src/sql/relation.rs b/src/sql/relation.rs index ad3d309d..9cfae920 100644 --- a/src/sql/relation.rs +++ b/src/sql/relation.rs @@ -1456,6 +1456,27 @@ mod tests { let mut database = postgresql::test_database(); let relations = database.relations(); + let query_str = r#" + SELECT * + FROM table_2 AS t1 INNER JOIN table_2 AS t2 USING(x) INNER JOIN table_2 AS t3 USING(x) + WHERE x > 50 + ORDER BY x, t2.y, t2.z + "#; + let query = parse(query_str).unwrap(); + let relation = Relation::try_from(QueryWithRelations::new( + &query, + &relations + )) + .unwrap(); + relation.display_dot().unwrap(); + let query: &str = &ast::Query::from(&relation).to_string(); + println!("{query}"); + _ = database + .query(query) + .unwrap() + .iter() + .map(ToString::to_string); + let query_str = r#" WITH my_tab AS (SELECT * FROM user_table u JOIN order_table o USING (id)) SELECT * FROM my_tab WHERE id > 50; From c3f307fa3a5fe1cfb73c438730ba77970772c6d8 Mon Sep 17 00:00:00 2001 From: Andi Cuko Date: Tue, 19 Mar 2024 12:03:00 +0100 Subject: [PATCH 18/18] clean hiereachy file --- src/hierarchy.rs | 46 --------------------------------------------- src/sql/relation.rs | 30 +++++++++++++++++++---------- 2 files changed, 20 insertions(+), 56 deletions(-) diff --git a/src/hierarchy.rs b/src/hierarchy.rs index cf552187..03a36056 100644 --- a/src/hierarchy.rs +++ b/src/hierarchy.rs @@ -211,21 +211,6 @@ impl Hierarchy { .filter_map(|(p, o)| Some((p.clone(), f(o)?))) .collect() } - - /// It creates a new hierarchy with elements for which the tail of their - /// path is not ambiguous. In the new hierarchy, only the tails of the original - /// path are used as a path. - pub fn non_ambiguous_tails(&self) -> Hierarchy { - self - .iter() - .filter_map(|(path, _)|{ - let path_tail = path.last().unwrap().clone(); - self - .get(&[path_tail.clone()]) - .and_then( |t| Some(([path_tail], t.clone())) ) - }) - .collect() - } } impl Hierarchy

{ @@ -483,35 +468,4 @@ mod tests { )) ); } - - #[test] - fn test_non_ambiguous() { - let values = Hierarchy::from([ - (vec!["a", "b", "c"], 1), - (vec!["a", "b", "d"], 2), - (vec!["a", "c"], 3), - (vec!["a", "e"], 4), - (vec!["a", "e", "f"], 5), - (vec!["b", "c"], 6), - ]); - let expected = Hierarchy::from([ - (vec!["d"], 2), - (vec!["e"], 4), - (vec!["f"], 5), - ]); - let non_ambiguous = values.non_ambiguous_tails(); - assert_eq!(non_ambiguous, expected); - println!("{}", non_ambiguous); - - let values = Hierarchy::from([ - (vec!["t1", "x"], 1), - (vec!["x"], 2), - ]); - let expected = Hierarchy::from([ - (vec!["x"], 2), - ]); - let non_ambiguous = values.non_ambiguous_tails(); - assert_eq!(non_ambiguous, expected); - println!("{}", non_ambiguous); - } } diff --git a/src/sql/relation.rs b/src/sql/relation.rs index 9cfae920..5422d300 100644 --- a/src/sql/relation.rs +++ b/src/sql/relation.rs @@ -8,7 +8,7 @@ use super::{ Error, Result, }; use crate::{ - ast, builder::{Ready, With, WithIterator, WithoutContext}, data_type::injection::Composed, dialect::{Dialect, GenericDialect}, dialect_translation::{postgresql::PostgreSqlTranslator, QueryToRelationTranslator}, display::Dot, expr::{Expr, Identifier, Reduce, Split}, hierarchy::{Hierarchy, Path}, namer::{self, FIELD}, parser::Parser, relation::{ + ast, builder::{Ready, With, WithIterator, WithoutContext}, dialect::{Dialect, GenericDialect}, dialect_translation::{postgresql::PostgreSqlTranslator, QueryToRelationTranslator}, display::Dot, expr::{Expr, Identifier, Reduce, Split}, hierarchy::{Hierarchy, Path}, namer::{self, FIELD}, parser::Parser, relation::{ Join, JoinOperator, MapBuilder, Relation, SetOperator, SetQuantifier, Variant as _, WithInput, LEFT_INPUT_NAME, RIGHT_INPUT_NAME @@ -17,12 +17,7 @@ use crate::{ use dot::Id; use itertools::Itertools; use std::{ - convert::TryFrom, - iter::{once, Iterator}, - result, - str::FromStr, - sync::Arc, - ops::Deref + collections::HashMap, convert::TryFrom, iter::{once, Iterator}, ops::Deref, result, str::FromStr, sync::Arc }; /* @@ -384,7 +379,7 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, // push all names that are present in the from into named_exprs. // for non ambiguous col names preserve the input name // for the ambiguous ones used the name present in the relation. - let non_ambiguous_cols = columns.non_ambiguous_tails(); + let non_ambiguous_cols = last(columns); // Invert mapping of non_ambiguous_cols let new_aliases: Hierarchy = non_ambiguous_cols.iter() .map(|(p, i)|(i.deref(), p.last().unwrap().clone())) @@ -392,13 +387,13 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a, for field in from.schema().iter() { let field_name = field.name().to_string(); - let new_alias = new_aliases + let alias = new_aliases .get_key_value(&[field.name().to_string()]) .and_then(|(k, v)|{ renamed_columns.push((k.to_vec().into(), v.clone().into())); Some(v.clone()) } ); - named_exprs.push((new_alias.unwrap_or(field_name), Expr::col(field.name()))); + named_exprs.push((alias.unwrap_or(field_name), Expr::col(field.name()))); } } } @@ -768,6 +763,21 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> TryFrom<(QueryWithRelation } } +/// It creates a new hierarchy with Identifier for which the last part of their +/// path is not ambiguous. The new hierarchy will contain one-element paths +fn last(columns: &Hierarchy) -> Hierarchy { + columns + .iter() + .filter_map(|(path, _)|{ + let path_last = path.last().unwrap().clone(); + columns + .get(&[path_last.clone()]) + .and_then( |t| Some((path_last, t.clone())) ) + }) + .collect() +} + + /// A simple SQL query parser with dialect pub fn parse_with_dialect(query: &str, dialect: D) -> Result { let mut tokenizer = Tokenizer::new(&dialect, query);