From c911b862ddaf9aa2fd52dab1168dd775e7ef4368 Mon Sep 17 00:00:00 2001 From: Alan Cai Date: Fri, 1 Dec 2023 16:49:02 -0800 Subject: [PATCH] WIP to add 'EXCLUDE' to physical plan compiler --- docs/wiki/documentation/Functions.md | 8 +- partiql-ast/src/main/pig/partiql.ion | 9 + .../lang/compiler/PartiQLCompilerBuilder.kt | 2 + .../lang/eval/internal/StructExprValue.kt | 2 +- .../internal/builtins/ScalarBuiltinsExt.kt | 22 +- .../physical/PhysicalBexprToThunkConverter.kt | 17 + .../ExcludeRelationalOperatorFactory.kt | 364 ++++++++++++++++++ .../operators/RelationalOperatorFactory.kt | 2 +- .../operators/RelationalOperatorKind.kt | 3 +- .../AstToLogicalVisitorTransform.kt | 4 + ...solvedToDefaultPhysicalVisitorTransform.kt | 12 + ...ogicalToLogicalResolvedVisitorTransform.kt | 33 ++ .../eval/EvaluatingCompilerExcludeTests.kt | 5 +- .../functions/FilterDistinctEvaluationTest.kt | 8 +- .../lang/eval/builtins/ScalarBuiltinsExt.kt | 22 +- 15 files changed, 478 insertions(+), 35 deletions(-) create mode 100644 partiql-eval/src/main/kotlin/org/partiql/lang/eval/physical/operators/ExcludeRelationalOperatorFactory.kt diff --git a/docs/wiki/documentation/Functions.md b/docs/wiki/documentation/Functions.md index 57ea621151..7631673b35 100644 --- a/docs/wiki/documentation/Functions.md +++ b/docs/wiki/documentation/Functions.md @@ -477,20 +477,20 @@ EXTRACT(TIMEZONE_MINUTE FROM TIME WITH TIME ZONE '23:12:59-08:30') -- -30 ### `FILTER_DISTINCT` -- since v0.7.0 Signature -: `FILTER_DISTINCT: Container -> Bag` +: `FILTER_DISTINCT: Container -> Bag|List` Header : `FILTER_DISTINCT(c)` Purpose -: Returns a bag of distinct values contained within a bag, list, sexp, or struct. If the container is a struct, -the field names are not considered. +: Returns a bag or list of distinct values contained within a bag, list, sexp, or struct. If the container is a struct, +the field names are not considered. A list will be returned if and only if the input is a list. Examples : ```sql -FILTER_DISTINCT([0, 0, 1]) -- <<0, 1>> +FILTER_DISTINCT([0, 0, 1]) -- [0, 1] FILTER_DISTINCT(<<0, 0, 1>>) -- <<0, 1>> FILTER_DISTINCT(SEXP(0, 0, 1)) -- <<0, 1>> FILTER_DISTINCT({'a': 0, 'b': 0, 'c': 1}) -- <<0, 1>> diff --git a/partiql-ast/src/main/pig/partiql.ion b/partiql-ast/src/main/pig/partiql.ion index 7c845ac441..8be48b6049 100644 --- a/partiql-ast/src/main/pig/partiql.ion +++ b/partiql-ast/src/main/pig/partiql.ion @@ -799,6 +799,9 @@ may then be further optimized by selecting better implementations of each operat // For every row of `source`, adds each specified `let_binding`. (let source::bexpr bindings::(* let_binding 1)) + + // For every row of `source`, omits the values specified by `exclude_expr`s + (exclude_clause source::bexpr exprs::(* exclude_expr 1)) ) ) @@ -872,6 +875,7 @@ may then be further optimized by selecting better implementations of each operat column_component returning_mapping assignment + exclude_op ) ) ) @@ -934,6 +938,9 @@ may then be further optimized by selecting better implementations of each operat ) ) + (exclude exclude_expr) + (include (product exclude_expr root::int steps::(* exclude_step 1))) + // Replace statement.dml.target with statement.dml.uniqueId (the "resolved" corollary). (with statement (exclude dml) @@ -1007,6 +1014,8 @@ may then be further optimized by selecting better implementations of each operat // Notice that the physical window operator contains a list of window expression // That is because, we want to combine the window functions that are operating on the same window to a single window operator (window i::impl source:: bexpr window_specification:: over window_expression_list:: (* window_expression 1)) + + (exclude_clause i::impl source::bexpr exprs::(* exclude_expr 1)) ) ) ) diff --git a/partiql-eval/src/main/kotlin/org/partiql/lang/compiler/PartiQLCompilerBuilder.kt b/partiql-eval/src/main/kotlin/org/partiql/lang/compiler/PartiQLCompilerBuilder.kt index 5ade0ad938..b177a67ec7 100644 --- a/partiql-eval/src/main/kotlin/org/partiql/lang/compiler/PartiQLCompilerBuilder.kt +++ b/partiql-eval/src/main/kotlin/org/partiql/lang/compiler/PartiQLCompilerBuilder.kt @@ -24,6 +24,7 @@ import org.partiql.lang.eval.builtins.storedprocedure.StoredProcedure import org.partiql.lang.eval.internal.builtins.SCALAR_BUILTINS_DEFAULT import org.partiql.lang.eval.internal.builtins.definitionalBuiltins import org.partiql.lang.eval.physical.operators.AggregateOperatorFactoryDefault +import org.partiql.lang.eval.physical.operators.ExcludeRelationalOperatorFactoryDefault import org.partiql.lang.eval.physical.operators.FilterRelationalOperatorFactoryDefault import org.partiql.lang.eval.physical.operators.JoinRelationalOperatorFactoryDefault import org.partiql.lang.eval.physical.operators.LetRelationalOperatorFactoryDefault @@ -87,6 +88,7 @@ class PartiQLCompilerBuilder private constructor() { // Notice here we will not propagate the optin requirement to the user @OptIn(ExperimentalWindowFunctions::class) WindowRelationalOperatorFactoryDefault, + ExcludeRelationalOperatorFactoryDefault, ) @JvmStatic diff --git a/partiql-eval/src/main/kotlin/org/partiql/lang/eval/internal/StructExprValue.kt b/partiql-eval/src/main/kotlin/org/partiql/lang/eval/internal/StructExprValue.kt index 28ab8f347b..81cb7ae993 100644 --- a/partiql-eval/src/main/kotlin/org/partiql/lang/eval/internal/StructExprValue.kt +++ b/partiql-eval/src/main/kotlin/org/partiql/lang/eval/internal/StructExprValue.kt @@ -34,7 +34,7 @@ internal enum class StructOrdering { * Provides a [ExprValueType.STRUCT] implementation lazily backed by a sequence. */ internal open class StructExprValue( - private val ordering: StructOrdering, + internal val ordering: StructOrdering, private val sequence: Sequence ) : BaseExprValue() { diff --git a/partiql-eval/src/main/kotlin/org/partiql/lang/eval/internal/builtins/ScalarBuiltinsExt.kt b/partiql-eval/src/main/kotlin/org/partiql/lang/eval/internal/builtins/ScalarBuiltinsExt.kt index 495b84cab2..1463107d68 100644 --- a/partiql-eval/src/main/kotlin/org/partiql/lang/eval/internal/builtins/ScalarBuiltinsExt.kt +++ b/partiql-eval/src/main/kotlin/org/partiql/lang/eval/internal/builtins/ScalarBuiltinsExt.kt @@ -110,7 +110,7 @@ internal object ExprFunctionUtcNow : ExprFunction { } /** - * Returns a bag of distinct values contained within a bag, list, sexp, or struct. + * Returns a bag or list of distinct values contained within a bag, list, sexp, or struct. * If the container is a struct, the field names are not considered. */ internal object ExprFunctionFilterDistinct : ExprFunction { @@ -118,23 +118,25 @@ internal object ExprFunctionFilterDistinct : ExprFunction { override val signature = FunctionSignature( name = "filter_distinct", requiredParameters = listOf(unionOf(StaticType.BAG, StaticType.LIST, StaticType.SEXP, StaticType.STRUCT)), - returnType = StaticType.BAG + returnType = unionOf(StaticType.BAG, StaticType.LIST) ) override fun callWithRequired(session: EvaluationSession, required: List): ExprValue { val argument = required.first() // We cannot use a [HashSet] here because [ExprValue] does not implement .equals() and .hashCode() val encountered = TreeSet(DEFAULT_COMPARATOR) - return ExprValue.newBag( - sequence { - argument.asSequence().forEach { - if (!encountered.contains(it)) { - encountered.add(it.unnamedValue()) - yield(it) - } + val seq = sequence { + argument.asSequence().forEach { + if (!encountered.contains(it)) { + encountered.add(it.unnamedValue()) + yield(it) } } - ) + } + return when (argument.type) { + ExprValueType.LIST -> ExprValue.newList(seq) + else -> ExprValue.newBag(seq) + } } } diff --git a/partiql-eval/src/main/kotlin/org/partiql/lang/eval/physical/PhysicalBexprToThunkConverter.kt b/partiql-eval/src/main/kotlin/org/partiql/lang/eval/physical/PhysicalBexprToThunkConverter.kt index 4158f0c61f..40dcf6ff33 100644 --- a/partiql-eval/src/main/kotlin/org/partiql/lang/eval/physical/PhysicalBexprToThunkConverter.kt +++ b/partiql-eval/src/main/kotlin/org/partiql/lang/eval/physical/PhysicalBexprToThunkConverter.kt @@ -14,6 +14,7 @@ import org.partiql.lang.eval.physical.operators.CompiledAggregateFunction import org.partiql.lang.eval.physical.operators.CompiledGroupKey import org.partiql.lang.eval.physical.operators.CompiledSortKey import org.partiql.lang.eval.physical.operators.CompiledWindowFunction +import org.partiql.lang.eval.physical.operators.ExcludeRelationalOperatorFactory import org.partiql.lang.eval.physical.operators.FilterRelationalOperatorFactory import org.partiql.lang.eval.physical.operators.JoinRelationalOperatorFactory import org.partiql.lang.eval.physical.operators.LetRelationalOperatorFactory @@ -28,6 +29,7 @@ import org.partiql.lang.eval.physical.operators.ScanRelationalOperatorFactory import org.partiql.lang.eval.physical.operators.SortOperatorFactory import org.partiql.lang.eval.physical.operators.UnpivotOperatorFactory import org.partiql.lang.eval.physical.operators.WindowRelationalOperatorFactory +import org.partiql.lang.eval.physical.operators.compileExcludeClause import org.partiql.lang.eval.physical.operators.valueExpression import org.partiql.lang.eval.physical.window.createBuiltinWindowFunction import org.partiql.lang.util.toIntExact @@ -324,6 +326,21 @@ internal class PhysicalBexprToThunkConverter( // wrap in thunk return bindingsExpr.toRelationThunk(node.metas) } + + override fun convertExcludeClause(node: PartiqlPhysical.Bexpr.ExcludeClause): RelationThunkEnv { + // recurse into children + val sourceBexpr = this.convert(node.source) + val compiledBindings = compileExcludeClause(node) + + // locate operator factory + val factory = findOperatorFactory(RelationalOperatorKind.EXCLUDE, node.i.name.text) + + // create operator implementation + val bindingsExpr = factory.create(node.i, sourceBexpr, compiledBindings) + + // wrap in thunk + return bindingsExpr.toRelationThunk(node.metas) + } } private fun PartiqlPhysical.Expr.isLitTrue() = diff --git a/partiql-eval/src/main/kotlin/org/partiql/lang/eval/physical/operators/ExcludeRelationalOperatorFactory.kt b/partiql-eval/src/main/kotlin/org/partiql/lang/eval/physical/operators/ExcludeRelationalOperatorFactory.kt new file mode 100644 index 0000000000..5a25cd75f6 --- /dev/null +++ b/partiql-eval/src/main/kotlin/org/partiql/lang/eval/physical/operators/ExcludeRelationalOperatorFactory.kt @@ -0,0 +1,364 @@ +package org.partiql.lang.eval.physical.operators + +import com.amazon.ionelement.api.emptyMetaContainer +import org.partiql.lang.domains.PartiqlPhysical +import org.partiql.lang.eval.ExprValue +import org.partiql.lang.eval.ExprValueType +import org.partiql.lang.eval.internal.StructExprValue +import org.partiql.lang.eval.internal.StructOrdering +import org.partiql.lang.eval.longValue +import org.partiql.lang.eval.name +import org.partiql.lang.eval.namedValue +import org.partiql.lang.eval.physical.EvaluatorState +import org.partiql.lang.eval.relation.RelationIterator +import org.partiql.lang.eval.relation.relation +import org.partiql.lang.eval.stringValue +import org.partiql.lang.planner.transforms.DEFAULT_IMPL_NAME +import org.partiql.pig.runtime.LongPrimitive +import org.partiql.pig.runtime.SymbolPrimitive + +/** + * Provides an implementation of the [PartiqlPhysical.Bexpr.Exclude] operator. + * + * @constructor + * + * @param name + */ +abstract class ExcludeRelationalOperatorFactory(name: String) : RelationalOperatorFactory { + final override val key = RelationalOperatorFactoryKey(RelationalOperatorKind.EXCLUDE, name) + + /** + * Creates a [RelationExpression] instance for [PartiqlPhysical.Bexpr.ExcludeClause]. + * + * @param impl + * @param sourceBexpr + * @param compiledExcludeExprs + * @return + */ + abstract fun create( + impl: PartiqlPhysical.Impl, + sourceBexpr: RelationExpression, + compiledExcludeExprs: List + ): RelationExpression +} + +internal object ExcludeRelationalOperatorFactoryDefault : ExcludeRelationalOperatorFactory(DEFAULT_IMPL_NAME) { + override fun create( + impl: PartiqlPhysical.Impl, + sourceBexpr: RelationExpression, + compiledExcludeExprs: List + ): RelationExpression = ExcludeOperator( + input = sourceBexpr, + compiledExcludeExprs = compiledExcludeExprs + ) +} + +internal class ExcludeOperator( + val input: RelationExpression, + val compiledExcludeExprs: List +) : RelationExpression { + private fun excludeBindings( + curRegisters: Array, + root: Int, + exclusions: RemoveAndOtherSteps + ): Array { + val curExprValue = curRegisters.getOrNull(root) + return if (curExprValue != null) { + val newExprValue = excludeExprValue(curExprValue, exclusions) + curRegisters[root] = newExprValue + curRegisters + } else { + curRegisters + } + } + + /** + * Returns an [ExprValue] created from a sequence of [seq]. Requires [type] to be a sequence type + * (i.e. [ExprValueType.isSequence] == true). + */ + private fun newSequence(type: ExprValueType, seq: Sequence): ExprValue { + return when (type) { + ExprValueType.LIST -> ExprValue.newList(seq) + ExprValueType.BAG -> ExprValue.newBag(seq) + ExprValueType.SEXP -> ExprValue.newSexp(seq) + else -> error("Sequence type required") + } + } + + private fun excludeExprValue(initialExprValue: ExprValue, exclusions: RemoveAndOtherSteps): ExprValue { + val toRemove = exclusions.remove + val otherSteps = exclusions.steps + when (initialExprValue.type) { + ExprValueType.STRUCT -> { + if (toRemove.any { it is PartiqlPhysical.ExcludeStep.ExcludeTupleWildcard }) { + // TODO ALAN: fix `ordering` to rely on `initialExprValue`'s ordering; need to determine which + // `StructExprValue` to use (`eval` or `eval.internal`) + return StructExprValue(sequence = emptySequence(), ordering = StructOrdering.ORDERED) + } + val attrsToRemove = toRemove.filterIsInstance() + .map { it.attr.name.text } + .toSet() + val sequenceWithRemoved = initialExprValue.mapNotNull { structField -> + if (attrsToRemove.contains(structField.name?.stringValue())) { + null + } else { + structField + } + } + val finalSequence = sequenceWithRemoved.map { structField -> + var expr = structField + val name = structField.name!! + val structFieldKey = PartiqlPhysical.build { + PartiqlPhysical.ExcludeStep.ExcludeTupleAttr( + PartiqlPhysical.Identifier( + SymbolPrimitive( + structField.name?.stringValue()!!, + emptyMetaContainer() + ), + caseInsensitive() + ) + ) + } + if (otherSteps.contains(structFieldKey)) { + expr = excludeExprValue(structField, otherSteps[structFieldKey]!!) + } + val tupleWildcardEntry = + otherSteps[PartiqlPhysical.build { excludeTupleWildcard(emptyMetaContainer()) }] + if (tupleWildcardEntry != null) { + expr = excludeExprValue(expr, tupleWildcardEntry) + } + expr.namedValue(name) + } + // TODO ALAN: fix `ordering` to rely on `initialExprValue`'s ordering; need to determine which + // `StructExprValue` to use (`eval` or `eval.internal`) + return StructExprValue(sequence = finalSequence.asSequence(), ordering = StructOrdering.ORDERED) + } + + ExprValueType.LIST, ExprValueType.BAG, ExprValueType.SEXP -> { + if (toRemove.any { it is PartiqlPhysical.ExcludeStep.ExcludeCollectionWildcard }) { + return newSequence(initialExprValue.type, emptySequence()) + } else { + // remove some elements + val indexesToRemove = toRemove.filterIsInstance() + .map { it.index.value } + .toSet() + val sequenceWithRemoved = initialExprValue.mapNotNull { element -> + if (indexesToRemove.contains(element.name?.longValue())) { + null + } else { + element + } + }.asSequence() + val finalSequence = sequenceWithRemoved.map { element -> + var expr = element + if (initialExprValue.type == ExprValueType.LIST || initialExprValue.type == ExprValueType.SEXP) { + val elementKey = PartiqlPhysical.build { + PartiqlPhysical.ExcludeStep.ExcludeCollectionIndex( + LongPrimitive( + element.name?.longValue()!!, + emptyMetaContainer() + ) + ) + } + if (otherSteps.contains(elementKey)) { + expr = excludeExprValue(element, otherSteps[elementKey]!!) + } + } + val collectionWildcardEntry = + otherSteps[PartiqlPhysical.build { excludeCollectionWildcard(emptyMetaContainer()) }] + if (collectionWildcardEntry != null) { + expr = excludeExprValue(expr, collectionWildcardEntry) + } + expr + } + return newSequence(initialExprValue.type, finalSequence) + } + } + else -> { + return initialExprValue + } + } + } + + override fun evaluate(state: EvaluatorState): RelationIterator { + val rows = input.evaluate(state) + + return relation(rows.relType) { + while (rows.nextRow()) { + val newRegisters = compiledExcludeExprs.fold(state.registers) { curRegisters, expr -> + excludeBindings(curRegisters, expr.root, expr.exclusions) + } + state.load(newRegisters) + yield() + } + } + } +} + +/** + * TODO ALAN: after rebase to include `EvaluatingCompiler` implementation of `EXCLUDE`, need to refactor for better + * code reuse here and other parts. + * Creates a list of compiled exclude expressions with each index of the resulting list corresponding to a different + * exclude path root. + */ +internal fun compileExcludeClause(excludeClause: PartiqlPhysical.Bexpr.ExcludeClause): List { + val excludeExprs = excludeClause.exprs + fun addToCompiledExcludeExprs(curCompiledExpr: RemoveAndOtherSteps, steps: List): RemoveAndOtherSteps { + // subsumption cases + // when steps.size == 1: possibly add to remove set + // when steps.size > 1: possibly add to steps map + val first = steps.first() + var entryRemove = curCompiledExpr.remove.toMutableSet() + var entrySteps = curCompiledExpr.steps.toMutableMap() + if (steps.size == 1) { + when (first) { + is PartiqlPhysical.ExcludeStep.ExcludeTupleAttr -> { + if (entryRemove.contains(PartiqlPhysical.build { excludeTupleWildcard() })) { + // contains wildcard; do not add; e.g. a.* and a.b -> keep a.* + } else { + // add to entries to remove + entryRemove.add(first) + // remove from other steps; e.g. a.b.c and a.b -> keep a.b + entrySteps.remove(first) + } + } + is PartiqlPhysical.ExcludeStep.ExcludeTupleWildcard -> { + entryRemove.add(first) + // remove all tuple attribute exclude steps + entryRemove = entryRemove.filterNot { + it is PartiqlPhysical.ExcludeStep.ExcludeTupleAttr + }.toMutableSet() + // remove all tuple attribute/wildcard exclude steps from deeper levels + entrySteps = entrySteps.filterNot { + it.key is PartiqlPhysical.ExcludeStep.ExcludeTupleAttr || it.key is PartiqlPhysical.ExcludeStep.ExcludeTupleWildcard + }.toMutableMap() + } + is PartiqlPhysical.ExcludeStep.ExcludeCollectionIndex -> { + if (entryRemove.contains(PartiqlPhysical.build { excludeCollectionWildcard() })) { + // contains wildcard; do not add; e.g a[*] and a[1] -> keep a[*] + } else { + // add to entries to remove + entryRemove.add(first) + // remove from other steps; e.g. a.b[2].c and a.b[2] -> keep a.b[2] + entrySteps.remove(first) + } + } + is PartiqlPhysical.ExcludeStep.ExcludeCollectionWildcard -> { + entryRemove.add(first) + // remove all collection index exclude steps + entryRemove = entryRemove.filterNot { + it is PartiqlPhysical.ExcludeStep.ExcludeCollectionIndex + }.toMutableSet() + // remove all collection index/wildcard exclude steps from deeper levels + entrySteps = entrySteps.filterNot { + it.key is PartiqlPhysical.ExcludeStep.ExcludeCollectionIndex || it.key is PartiqlPhysical.ExcludeStep.ExcludeCollectionWildcard + }.toMutableMap() + } + } + } else { + // remove at deeper level; need to check if first step is already removed in current step + when (first) { + is PartiqlPhysical.ExcludeStep.ExcludeTupleAttr -> { + if (entryRemove.contains(PartiqlPhysical.build { excludeTupleWildcard() }) || entryRemove.contains(first)) { + // remove set contains tuple wildcard or attr; do not add to other steps; + // e.g. a.* and a.b.c -> a.* + } else { + val existingEntry = entrySteps.getOrDefault(first, RemoveAndOtherSteps.empty()) + val newEntry = addToCompiledExcludeExprs(existingEntry, steps.drop(1)) + entrySteps[first] = newEntry + } + } + is PartiqlPhysical.ExcludeStep.ExcludeTupleWildcard -> { + if (entryRemove.any { it is PartiqlPhysical.ExcludeStep.ExcludeTupleWildcard }) { + // tuple wildcard at current level; do nothing + } else { + val existingEntry = entrySteps.getOrDefault(first, RemoveAndOtherSteps.empty()) + val newEntry = addToCompiledExcludeExprs(existingEntry, steps.drop(1)) + entrySteps[first] = newEntry + } + } + is PartiqlPhysical.ExcludeStep.ExcludeCollectionIndex -> { + if (entryRemove.contains(PartiqlPhysical.build { excludeCollectionWildcard() }) || entryRemove.contains(first)) { + // remove set contains collection wildcard or index; do not add to other steps; + // e.g. a[*] and a[*][1] -> a[*] + } else { + val existingEntry = entrySteps.getOrDefault(first, RemoveAndOtherSteps.empty()) + val newEntry = addToCompiledExcludeExprs(existingEntry, steps.drop(1)) + entrySteps[first] = newEntry + } + } + is PartiqlPhysical.ExcludeStep.ExcludeCollectionWildcard -> { + if (entryRemove.any { it is PartiqlPhysical.ExcludeStep.ExcludeCollectionWildcard }) { + // collection wildcard at current level; do nothing + } else { + val existingEntry = entrySteps.getOrDefault(first, RemoveAndOtherSteps.empty()) + val newEntry = addToCompiledExcludeExprs(existingEntry, steps.drop(1)) + entrySteps[first] = newEntry + } + } + } + } + return RemoveAndOtherSteps(entryRemove, entrySteps) + } + val compiledExcludeExprs = excludeExprs + .groupBy { it.root } + .map { (root, exclusions) -> + val compiledExclusions = exclusions.fold(RemoveAndOtherSteps.empty()) { acc, exclusion -> + addToCompiledExcludeExprs(acc, exclusion.steps) + } + CompiledExcludeExpr(root.value.toInt(), compiledExclusions) + } + return compiledExcludeExprs +} + +/** + * Represents an instance of a compiled `EXCLUDE` expression. Notably, this expr will have redundant steps removed. + */ +data class CompiledExcludeExpr(val root: Int, val exclusions: RemoveAndOtherSteps) + +/** + * Represents all the exclusions at the current level and other nested levels. + * + * The idea behind this data structure is that at a current level (i.e. path step index), we keep track of the + * - Exclude paths that have a final exclude step at the current level. This set of tuple attributes and collection + * indexes to remove at the current level is modeled as a set of exclude steps (i.e. [RemoveAndOtherSteps.remove]). + * - Exclude paths that have additional steps (their final step is at a deeper level). This is modeled as a mapping + * of exclude steps to other [RemoveAndOtherSteps] to group all exclude paths that share the same current step. + * + * For example, let's say we have exclude paths (ignoring the exclude path root) of + * a.b, + * x.y.z1, + * x.y.z2 + * ^ ^ ^ + * Level 1 2 3 + * + * These exclude paths would be converted to the following in [RemoveAndOtherSteps]. + * ``` + * // For demonstration purposes, the syntax '' corresponds to the exclude tuple attribute step of + * RemoveAndOtherSteps( // Level 1 (no exclusions at level 1) + * remove = emptySet(), + * steps = mapOf( + * 'a' to RemoveAndOtherSteps( // Level 2 for paths that have `'a'` in Level 1 + * remove = setOf('b'), // path `a.b` has final step at level 2 + * steps = emptyMap() + * ), + * 'x' to RemoveAndOtherSteps( // Level 2 for paths that have `'x'` in Level 1 + * remove = emptySet(), + * steps = mapOf( + * 'y' to RemoveAndOtherSteps( // Level 3 for paths that have `'y'` in Level 2 and `'x'` in Level 1 + * remove = setOf('z1', 'z2'), // paths `x.y.z1` and `x.y.z2` have final step at level 3 + * steps = emptyMap() + * ) + * ) + * ), + * ) + * ) + * ``` + */ +data class RemoveAndOtherSteps(val remove: Set, val steps: Map) { + companion object { + fun empty(): RemoveAndOtherSteps { + return RemoveAndOtherSteps(emptySet(), emptyMap()) + } + } +} diff --git a/partiql-eval/src/main/kotlin/org/partiql/lang/eval/physical/operators/RelationalOperatorFactory.kt b/partiql-eval/src/main/kotlin/org/partiql/lang/eval/physical/operators/RelationalOperatorFactory.kt index dd2ce4d424..b46224c82e 100644 --- a/partiql-eval/src/main/kotlin/org/partiql/lang/eval/physical/operators/RelationalOperatorFactory.kt +++ b/partiql-eval/src/main/kotlin/org/partiql/lang/eval/physical/operators/RelationalOperatorFactory.kt @@ -6,7 +6,7 @@ package org.partiql.lang.eval.physical.operators * * Implementations of this interface also define a `create` function, each with a different signature, but always * returning an instance of [RelationExpression], which is ready to be evaluated as part of query evaluation. Within - * the `create` function, the factory factory may access any values placed in its + * the `create` function, the factory may access any values placed in its * [org.partiql.lang.domains.PartiqlPhysical.Impl.staticArgs], which may be relevant to the operator's implementation * and perform any compile-time initialization of the [RelationExpression]. */ diff --git a/partiql-eval/src/main/kotlin/org/partiql/lang/eval/physical/operators/RelationalOperatorKind.kt b/partiql-eval/src/main/kotlin/org/partiql/lang/eval/physical/operators/RelationalOperatorKind.kt index dd3af23f77..345b657205 100644 --- a/partiql-eval/src/main/kotlin/org/partiql/lang/eval/physical/operators/RelationalOperatorKind.kt +++ b/partiql-eval/src/main/kotlin/org/partiql/lang/eval/physical/operators/RelationalOperatorKind.kt @@ -19,5 +19,6 @@ enum class RelationalOperatorKind { LIMIT, LET, SORT, - AGGREGATE + AGGREGATE, + EXCLUDE, } diff --git a/partiql-eval/src/main/kotlin/org/partiql/lang/planner/transforms/AstToLogicalVisitorTransform.kt b/partiql-eval/src/main/kotlin/org/partiql/lang/planner/transforms/AstToLogicalVisitorTransform.kt index 9a8a626e0b..df6d323ec1 100644 --- a/partiql-eval/src/main/kotlin/org/partiql/lang/planner/transforms/AstToLogicalVisitorTransform.kt +++ b/partiql-eval/src/main/kotlin/org/partiql/lang/planner/transforms/AstToLogicalVisitorTransform.kt @@ -84,6 +84,10 @@ internal class AstToLogicalVisitorTransform( algebra = select.limit?.let { limit(transformExpr(it), algebra, it.metas) } ?: algebra + algebra = select.excludeClause?.let { excludeOp -> + excludeClause(algebra, excludeOp.exprs.map { transformExcludeExpr(it) }, excludeOp.metas) + } ?: algebra + val expr = transformProjection(select, algebra) when (node.setq) { is PartiqlAst.SetQuantifier.Distinct -> call("filter_distinct", expr) diff --git a/partiql-eval/src/main/kotlin/org/partiql/lang/planner/transforms/LogicalResolvedToDefaultPhysicalVisitorTransform.kt b/partiql-eval/src/main/kotlin/org/partiql/lang/planner/transforms/LogicalResolvedToDefaultPhysicalVisitorTransform.kt index 88c895680e..c74b71f378 100644 --- a/partiql-eval/src/main/kotlin/org/partiql/lang/planner/transforms/LogicalResolvedToDefaultPhysicalVisitorTransform.kt +++ b/partiql-eval/src/main/kotlin/org/partiql/lang/planner/transforms/LogicalResolvedToDefaultPhysicalVisitorTransform.kt @@ -168,6 +168,18 @@ internal class LogicalResolvedToDefaultPhysicalVisitorTransform( } } + override fun transformBexprExcludeClause(node: PartiqlLogicalResolved.Bexpr.ExcludeClause): PartiqlPhysical.Bexpr { + val thiz = this + return PartiqlPhysical.build { + excludeClause( + i = DEFAULT_IMPL, + source = thiz.transformBexpr(node.source), + exprs = node.exprs.map { transformExcludeExpr(it) }, + metas = node.metas + ) + } + } + override fun transformStatementQuery(node: PartiqlLogicalResolved.Statement.Query): PartiqlPhysical.Statement = PartiqlPhysical.build { query(transformExpr(node.expr), node.metas) } } diff --git a/partiql-eval/src/main/kotlin/org/partiql/lang/planner/transforms/LogicalToLogicalResolvedVisitorTransform.kt b/partiql-eval/src/main/kotlin/org/partiql/lang/planner/transforms/LogicalToLogicalResolvedVisitorTransform.kt index c5d0d5b545..258e6ef659 100644 --- a/partiql-eval/src/main/kotlin/org/partiql/lang/planner/transforms/LogicalToLogicalResolvedVisitorTransform.kt +++ b/partiql-eval/src/main/kotlin/org/partiql/lang/planner/transforms/LogicalToLogicalResolvedVisitorTransform.kt @@ -187,6 +187,24 @@ internal data class LogicalToLogicalResolvedVisitorTransform( localId_((-1).asPrimitive(), this@asErrorId.metas) } + override fun transformExcludeExpr(node: PartiqlLogical.ExcludeExpr): PartiqlLogicalResolved.ExcludeExpr { + val root = node.root + val bindingName = BindingName(root.name.text, root.case.toBindingCase()) + val index = when (val localResolutionResult = resolveLocalVariable(bindingName)) { + is ResolvedVariable.LocalVariable -> localResolutionResult.index + else -> -1 + } + val steps = node.steps.map { + this.transformExcludeStep(it) + } + return PartiqlLogicalResolved.build { + excludeExpr( + root = index.toLong(), + steps = steps + ) + } + } + override fun transformPlan(node: PartiqlLogical.Plan): PartiqlLogicalResolved.Plan = PartiqlLogicalResolved.build { plan_( @@ -244,6 +262,17 @@ internal data class LogicalToLogicalResolvedVisitorTransform( } } + override fun transformBexprExcludeClause_exprs(node: PartiqlLogical.Bexpr.ExcludeClause): List { + val bindings = getOutputScope(node).concatenate(this.inputScope) + return withInputScope(bindings) { + val exprs = node.exprs.map { transformExcludeExpr(it) } + exprs.filter { expr -> + // Filter out all unresolved local vars + expr.root.value.toInt() != -1 + } + } + } + override fun transformBexprWindow_windowSpecification(node: PartiqlLogical.Bexpr.Window): PartiqlLogicalResolved.Over { val bindings = getOutputScope(node).concatenate(this.inputScope) return withInputScope(bindings) { @@ -545,6 +574,10 @@ internal data class LogicalToLogicalResolvedVisitorTransform( val windowVariable = bexpr.windowExpressionList.map { it.decl } sourceScope.concatenate(windowVariable) } + + is PartiqlLogical.Bexpr.ExcludeClause -> { + getOutputScope(bexpr.source) + } } private fun LocalScope.concatenate(other: LocalScope): LocalScope = diff --git a/partiql-eval/src/test/kotlin/org/partiql/lang/eval/EvaluatingCompilerExcludeTests.kt b/partiql-eval/src/test/kotlin/org/partiql/lang/eval/EvaluatingCompilerExcludeTests.kt index e7990a6169..020d7d5e0f 100644 --- a/partiql-eval/src/test/kotlin/org/partiql/lang/eval/EvaluatingCompilerExcludeTests.kt +++ b/partiql-eval/src/test/kotlin/org/partiql/lang/eval/EvaluatingCompilerExcludeTests.kt @@ -9,9 +9,6 @@ import org.partiql.lang.eval.evaluatortestframework.PipelineEvaluatorTestAdapter import org.partiql.lang.util.ArgumentsProviderBase class EvaluatingCompilerExcludeTests : EvaluatorTestBase() { - - private val testHarness: EvaluatorTestAdapter = PipelineEvaluatorTestAdapter(CompilerPipelineFactory()) - class ExcludeTests : ArgumentsProviderBase() { override fun getParameters(): List = listOf( EvaluatorTestCase( @@ -810,7 +807,7 @@ class EvaluatingCompilerExcludeTests : EvaluatorTestBase() { @ParameterizedTest @ArgumentsSource(ExcludeTests::class) - fun validExcludeTests(tc: EvaluatorTestCase) = testHarness.runEvaluatorTestCase( + fun validExcludeTests(tc: EvaluatorTestCase) = runEvaluatorTestCase( tc, EvaluationSession.standard() ) diff --git a/partiql-eval/src/test/kotlin/org/partiql/lang/eval/internal/builtins/functions/FilterDistinctEvaluationTest.kt b/partiql-eval/src/test/kotlin/org/partiql/lang/eval/internal/builtins/functions/FilterDistinctEvaluationTest.kt index 4ae4f4fafc..56acaaf992 100644 --- a/partiql-eval/src/test/kotlin/org/partiql/lang/eval/internal/builtins/functions/FilterDistinctEvaluationTest.kt +++ b/partiql-eval/src/test/kotlin/org/partiql/lang/eval/internal/builtins/functions/FilterDistinctEvaluationTest.kt @@ -28,7 +28,7 @@ class FilterDistinctEvaluationTest : EvaluatorTestBase() { // These three tests ensure we can accept lists, bags, s-expressions and structs ExprFunctionTestCase( "filter_distinct([0, 0, 1])", - "$BAG_ANNOTATION::[0, 1]" + "[0, 1]" ), // list ExprFunctionTestCase( "filter_distinct(<<0, 0, 1>>)", @@ -46,15 +46,15 @@ class FilterDistinctEvaluationTest : EvaluatorTestBase() { // Some "smoke tests" to ensure the basic plumbing is working right. ExprFunctionTestCase( "filter_distinct(['foo', 'foo', 1, 1, `symbol`, `symbol`])", - "$BAG_ANNOTATION::[\"foo\", 1, symbol]" + "[\"foo\", 1, symbol]" ), ExprFunctionTestCase( "filter_distinct([{ 'a': 1 }, { 'a': 1 }, { 'a': 1 }])", - "$BAG_ANNOTATION::[{ 'a': 1 }]" + "[{ 'a': 1 }]" ), ExprFunctionTestCase( "filter_distinct([[1, 1], [1, 1], [2, 2]])", - "$BAG_ANNOTATION::[[1,1], [2, 2]]" + "[[1,1], [2, 2]]" ), ) } diff --git a/partiql-lang/src/main/kotlin/org/partiql/lang/eval/builtins/ScalarBuiltinsExt.kt b/partiql-lang/src/main/kotlin/org/partiql/lang/eval/builtins/ScalarBuiltinsExt.kt index 2ef0aaa812..1daa4860f7 100644 --- a/partiql-lang/src/main/kotlin/org/partiql/lang/eval/builtins/ScalarBuiltinsExt.kt +++ b/partiql-lang/src/main/kotlin/org/partiql/lang/eval/builtins/ScalarBuiltinsExt.kt @@ -110,7 +110,7 @@ internal object ExprFunctionUtcNow : ExprFunction { } /** - * Returns a bag of distinct values contained within a bag, list, sexp, or struct. + * Returns a bag or list of distinct values contained within a bag, list, sexp, or struct. * If the container is a struct, the field names are not considered. */ internal object ExprFunctionFilterDistinct : ExprFunction { @@ -118,23 +118,25 @@ internal object ExprFunctionFilterDistinct : ExprFunction { override val signature = FunctionSignature( name = "filter_distinct", requiredParameters = listOf(unionOf(StaticType.BAG, StaticType.LIST, StaticType.SEXP, StaticType.STRUCT)), - returnType = StaticType.BAG + returnType = unionOf(StaticType.BAG, StaticType.LIST) ) override fun callWithRequired(session: EvaluationSession, required: List): ExprValue { val argument = required.first() // We cannot use a [HashSet] here because [ExprValue] does not implement .equals() and .hashCode() val encountered = TreeSet(DEFAULT_COMPARATOR) - return ExprValue.newBag( - sequence { - argument.asSequence().forEach { - if (!encountered.contains(it)) { - encountered.add(it.unnamedValue()) - yield(it) - } + val seq = sequence { + argument.asSequence().forEach { + if (!encountered.contains(it)) { + encountered.add(it.unnamedValue()) + yield(it) } } - ) + } + return when (argument.type) { + ExprValueType.LIST -> ExprValue.newList(seq) + else -> ExprValue.newBag(seq) + } } }