From a123e5c2a54bfdd7b2ee8860ae62671c146bbdd4 Mon Sep 17 00:00:00 2001 From: Gautam Parai Date: Wed, 11 Dec 2019 22:55:19 -0800 Subject: [PATCH] Enable subfield pruning to pass through arbitrary() function Subfield pruning applies to queries accessing columns of complex types: maps, arrays and structs. Queries that use only some indices, keys or fields of these columns are optimized so that only necessary data is extracted during the table scan. This commit enables subfield pruning to pass through the arbitrary() aggregate function. --- .../presto/hive/TestHiveLogicalPlanner.java | 86 +++++++++++++++++++ .../optimizations/PushdownSubfields.java | 18 +++- 2 files changed, 102 insertions(+), 2 deletions(-) diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveLogicalPlanner.java b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveLogicalPlanner.java index 4d422b8d3a76..a45b3eeabffb 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveLogicalPlanner.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveLogicalPlanner.java @@ -490,6 +490,24 @@ private void assertPushdownSubscripts(String tableName) assertPushdownSubfields(format("SELECT min(a[1]) FROM %s GROUP BY id", tableName), tableName, ImmutableMap.of("a", toSubfields("a[1]"))); + assertPushdownSubfields(format("SELECT arbitrary(y[1]).a FROM %s GROUP BY id", tableName), tableName, + ImmutableMap.of("y", toSubfields("y[1].a"))); + + assertPushdownSubfields(format("SELECT arbitrary(y[1]).d.d1 FROM %s GROUP BY id", tableName), tableName, + ImmutableMap.of("y", toSubfields("y[1].d.d1"))); + + assertPushdownSubfields(format("SELECT arbitrary(y[2].d).d1 FROM %s GROUP BY id", tableName), tableName, + ImmutableMap.of("y", toSubfields("y[2].d.d1"))); + + assertPushdownSubfields(format("SELECT arbitrary(y[3].d.d1) FROM %s GROUP BY id", tableName), tableName, + ImmutableMap.of("y", toSubfields("y[3].d.d1"))); + + assertPushdownSubfields(format("SELECT arbitrary(z[1][2]).e.e1 FROM %s GROUP BY id", tableName), tableName, + ImmutableMap.of("z", toSubfields("z[1][2].e.e1"))); + + assertPushdownSubfields(format("SELECT arbitrary(z[2][3].e).e2 FROM %s GROUP BY id", tableName), tableName, + ImmutableMap.of("z", toSubfields("z[2][3].e.e2"))); + // Union assertPlan(format("SELECT a[1] FROM %s UNION ALL SELECT a[2] FROM %s", tableName, tableName), anyTree(exchange( @@ -531,6 +549,14 @@ private void assertPushdownSubscripts(String tableName) assertPushdownSubfields(format("SELECT a[1] FROM (SELECT DISTINCT * FROM %s) LIMIT 10", tableName), tableName, ImmutableMap.of()); + + // No pass through subfield pruning + assertPushdownSubfields(format("SELECT id, min(y[1]).a FROM %s GROUP BY 1", tableName), tableName, + ImmutableMap.of("y", toSubfields("y[1]"))); + assertPushdownSubfields(format("SELECT id, min(y[1]).a, min(y[1].d).d1 FROM %s GROUP BY 1", tableName), tableName, + ImmutableMap.of("y", toSubfields("y[1]"))); + assertPushdownSubfields(format("SELECT id, min(z[1][2]).e.e1 FROM %s GROUP BY 1", tableName), tableName, + ImmutableMap.of("z", toSubfields("z[1][2]"))); } @Test @@ -580,6 +606,21 @@ public void testPushdownSubfields() assertPushdownSubfields("SELECT id, min(x.a + length(y[2].b)) * avg(x.d.d1) FROM test_pushdown_struct_subfields GROUP BY 1", "test_pushdown_struct_subfields", ImmutableMap.of("x", toSubfields("x.a", "x.d.d1"), "y", toSubfields("y[2].b"))); + assertPushdownSubfields("SELECT id, arbitrary(x.a) FROM test_pushdown_struct_subfields GROUP BY 1", "test_pushdown_struct_subfields", + ImmutableMap.of("x", toSubfields("x.a"))); + + assertPushdownSubfields("SELECT id, arbitrary(x).a FROM test_pushdown_struct_subfields GROUP BY 1", "test_pushdown_struct_subfields", + ImmutableMap.of("x", toSubfields("x.a"))); + + assertPushdownSubfields("SELECT id, arbitrary(x).d.d1 FROM test_pushdown_struct_subfields GROUP BY 1", "test_pushdown_struct_subfields", + ImmutableMap.of("x", toSubfields("x.d.d1"))); + + assertPushdownSubfields("SELECT id, arbitrary(x.d).d1 FROM test_pushdown_struct_subfields GROUP BY 1", "test_pushdown_struct_subfields", + ImmutableMap.of("x", toSubfields("x.d.d1"))); + + assertPushdownSubfields("SELECT id, arbitrary(x.d.d2) FROM test_pushdown_struct_subfields GROUP BY 1", "test_pushdown_struct_subfields", + ImmutableMap.of("x", toSubfields("x.d.d2"))); + // Unnest assertPushdownSubfields("SELECT t.a, t.d.d1, x.a FROM test_pushdown_struct_subfields CROSS JOIN UNNEST(y) as t(a, b, c, d)", "test_pushdown_struct_subfields", ImmutableMap.of("x", toSubfields("x.a"), "y", toSubfields("y[*].a", "y[*].d.d1"))); @@ -605,6 +646,13 @@ public void testPushdownSubfields() assertPushdownSubfields("SELECT x.a, x.b, x.A + 2 FROM test_pushdown_struct_subfields WHERE x.B LIKE 'abc%'", "test_pushdown_struct_subfields", ImmutableMap.of("x", toSubfields("x.a", "x.b"))); + // No pass-through subfield pruning + assertPushdownSubfields("SELECT id, min(x.d).d1 FROM test_pushdown_struct_subfields GROUP BY 1", "test_pushdown_struct_subfields", + ImmutableMap.of("x", toSubfields("x.d"))); + + assertPushdownSubfields("SELECT id, min(x.d).d1, min(x.d.d2) FROM test_pushdown_struct_subfields GROUP BY 1", "test_pushdown_struct_subfields", + ImmutableMap.of("x", toSubfields("x.d"))); + assertUpdate("DROP TABLE test_pushdown_struct_subfields"); } @@ -641,6 +689,44 @@ public void testPushdownSubfieldsAssorted() "a", toSubfields("a[1]"), "d", toSubfields("d.d3[5]"))); + // Subfield pruning should pass-through arbitrary() function + assertPushdownSubfields("SELECT id, " + + "arbitrary(x.a), " + + "arbitrary(x).a, " + + "arbitrary(x).d.d1, " + + "arbitrary(x.d).d1, " + + "arbitrary(x.d.d2), " + + "arbitrary(y[1]).a, " + + "arbitrary(y[1]).d.d1, " + + "arbitrary(y[2]).d.d1, " + + "arbitrary(y[3].d.d1), " + + "arbitrary(z).c, " + + "arbitrary(w[1][2]).e.e1, " + + "arbitrary(w[2][3].e.e2) " + + "FROM test_pushdown_subfields " + + "GROUP BY 1", "test_pushdown_subfields", + ImmutableMap.of("x", toSubfields("x.a", "x.d.d1", "x.d.d2"), + "y", toSubfields("y[1].a", "y[1].d.d1", "y[2].d.d1", "y[3].d.d1"), + "z", toSubfields("z.c"), + "w", toSubfields("w[1][2].e.e1", "w[2][3].e.e2"))); + + // Subfield pruning should not pass-through other aggregate functions e.g. min() function + assertPushdownSubfields("SELECT id, " + + "min(x.d).d1, " + + "min(x.d.d2), " + + "min(z).c, " + + "min(z.b), " + + "min(y[1]).a, " + + "min(y[1]).d.d1, " + + "min(y[2].d.d1), " + + "min(w[1][2]).e.e1, " + + "min(w[2][3].e.e2) " + + "FROM test_pushdown_subfields " + + "GROUP BY 1", "test_pushdown_subfields", + ImmutableMap.of("x", toSubfields("x.d"), + "y", toSubfields("y[1]", "y[2].d.d1"), + "w", toSubfields("w[1][2]", "w[2][3].e.e2"))); + assertUpdate("DROP TABLE test_pushdown_subfields"); } diff --git a/presto-main/src/main/java/com/facebook/presto/sql/planner/optimizations/PushdownSubfields.java b/presto-main/src/main/java/com/facebook/presto/sql/planner/optimizations/PushdownSubfields.java index 1d09a16bc410..bc88c3b329c7 100644 --- a/presto-main/src/main/java/com/facebook/presto/sql/planner/optimizations/PushdownSubfields.java +++ b/presto-main/src/main/java/com/facebook/presto/sql/planner/optimizations/PushdownSubfields.java @@ -20,6 +20,7 @@ import com.facebook.presto.spi.Subfield; import com.facebook.presto.spi.Subfield.NestedField; import com.facebook.presto.spi.TableHandle; +import com.facebook.presto.spi.function.QualifiedFunctionName; import com.facebook.presto.spi.plan.AggregationNode; import com.facebook.presto.spi.plan.FilterNode; import com.facebook.presto.spi.plan.OrderingScheme; @@ -78,6 +79,7 @@ import static com.facebook.presto.SystemSessionProperties.isLegacyUnnest; import static com.facebook.presto.SystemSessionProperties.isPushdownSubfieldsEnabled; +import static com.facebook.presto.metadata.BuiltInFunctionNamespaceManager.DEFAULT_NAMESPACE; import static com.facebook.presto.spi.Subfield.allSubscripts; import static com.facebook.presto.spi.type.BigintType.BIGINT; import static com.facebook.presto.sql.relational.OriginalExpressionUtils.castToExpression; @@ -116,6 +118,7 @@ private static class Rewriter private final Metadata metadata; private final TypeProvider types; private final SubfieldExtractor subfieldExtractor; + private static final QualifiedFunctionName ARBITRARY_AGGREGATE_FUNCTION = QualifiedFunctionName.of(DEFAULT_NAMESPACE, "arbitrary"); public Rewriter(Session session, Metadata metadata, TypeProvider types) { @@ -130,8 +133,19 @@ public PlanNode visitAggregation(AggregationNode node, RewriteContext c { context.get().variables.addAll(node.getGroupingKeys()); - for (AggregationNode.Aggregation aggregation : node.getAggregations().values()) { - aggregation.getArguments().forEach(expression -> subfieldExtractor.process(castToExpression(expression), context.get())); + for (Map.Entry entry : node.getAggregations().entrySet()) { + VariableReferenceExpression variable = entry.getKey(); + AggregationNode.Aggregation aggregation = entry.getValue(); + + // Allow sub-field pruning to pass through the arbitrary() aggregation + QualifiedFunctionName aggregateName = metadata.getFunctionManager().getFunctionMetadata(aggregation.getCall().getFunctionHandle()).getName(); + if (ARBITRARY_AGGREGATE_FUNCTION.equals(aggregateName)) { + SymbolReference argument = (SymbolReference) castToExpression(aggregation.getArguments().get(0)); + context.get().addAssignment(variable, new VariableReferenceExpression(argument.getName(), types.get(argument))); + } + else { + aggregation.getArguments().forEach(expression -> subfieldExtractor.process(castToExpression(expression), context.get())); + } aggregation.getFilter().ifPresent(expression -> subfieldExtractor.process(castToExpression(expression), context.get()));