From bc1b93f2bf5d2485f417f022c11500a341354ce5 Mon Sep 17 00:00:00 2001 From: David Sisson Date: Tue, 20 Aug 2024 18:44:13 -0700 Subject: [PATCH] feat: define mark join (#682) Defines the Mark Join used by a number of engines in query rewrites. --------- Co-authored-by: Weston Pace Co-authored-by: Jacques Nadeau --- proto/substrait/algebra.proto | 8 ++++++++ site/docs/relations/logical_relations.md | 2 ++ 2 files changed, 10 insertions(+) diff --git a/proto/substrait/algebra.proto b/proto/substrait/algebra.proto index 30be4ab4c..08f775a8f 100644 --- a/proto/substrait/algebra.proto +++ b/proto/substrait/algebra.proto @@ -206,6 +206,8 @@ message JoinRel { JOIN_TYPE_RIGHT_SEMI = 8; JOIN_TYPE_RIGHT_ANTI = 9; JOIN_TYPE_RIGHT_SINGLE = 10; + JOIN_TYPE_LEFT_MARK = 11; + JOIN_TYPE_RIGHT_MARK = 12; } substrait.extensions.AdvancedExtension advanced_extension = 10; @@ -684,6 +686,8 @@ message HashJoinRel { JOIN_TYPE_RIGHT_ANTI = 8; JOIN_TYPE_LEFT_SINGLE = 9; JOIN_TYPE_RIGHT_SINGLE = 10; + JOIN_TYPE_LEFT_MARK = 11; + JOIN_TYPE_RIGHT_MARK = 12; } substrait.extensions.AdvancedExtension advanced_extension = 10; @@ -732,6 +736,8 @@ message MergeJoinRel { JOIN_TYPE_RIGHT_ANTI = 8; JOIN_TYPE_LEFT_SINGLE = 9; JOIN_TYPE_RIGHT_SINGLE = 10; + JOIN_TYPE_LEFT_MARK = 11; + JOIN_TYPE_RIGHT_MARK = 12; } substrait.extensions.AdvancedExtension advanced_extension = 10; @@ -760,6 +766,8 @@ message NestedLoopJoinRel { JOIN_TYPE_RIGHT_ANTI = 8; JOIN_TYPE_LEFT_SINGLE = 9; JOIN_TYPE_RIGHT_SINGLE = 10; + JOIN_TYPE_LEFT_MARK = 11; + JOIN_TYPE_RIGHT_MARK = 12; } substrait.extensions.AdvancedExtension advanced_extension = 10; diff --git a/site/docs/relations/logical_relations.md b/site/docs/relations/logical_relations.md index 5d9a0dfdb..2bfc1b185 100644 --- a/site/docs/relations/logical_relations.md +++ b/site/docs/relations/logical_relations.md @@ -234,6 +234,8 @@ The join operation will combine two separate inputs into a single output, based | Right Anti | Return records from the right input. These are returned only if the records do not have a join partner on the left side. | | Left Single | Return all records from the left input with no join expansion. If at least one record from the right input matches the left, return one arbitrary matching record from the right input. For any left records without matching right records, return the left record along with nulls for the right input. Similar to a left outer join but only returns one right match at most. Useful for nested sub-queries where we need exactly one record in output (or throw exception). See Section 3.2 of https://15721.courses.cs.cmu.edu/spring2018/papers/16-optimizer2/hyperjoins-btw2017.pdf for more information. | | Right Single | Same as left single except that the right and left inputs are switched. | +| Left Mark | Returns one record for each record from the left input. Appends one additional "mark" column to the output of the join. The new column will be listed after all columns from both sides and will be of type nullable boolean. If there is at least one join partner in the right input where the join condition evaluates to true then the mark column will be set to true. Otherwise, if there is at least one join partner in the right input where the join condition evaluates to NULL then the mark column will be set to NULL. Otherwise the mark column will be set to false. | +| Right Mark | Returns records from the right input. Appends one additional "mark" column to the output of the join. The new column will be listed after all columns from both sides and will be of type nullable boolean. If there is at least one join partner in the left input where the join condition evaluates to true then the mark column will be set to true. Otherwise, if there is at least one join partner in the left input where the join condition evaluates to NULL then the mark column will be set to NULL. Otherwise the mark column will be set to false. | === "JoinRel Message"