Skip to content

Commit

Permalink
Enable subfield pruning to pass through arbitrary() function
Browse files Browse the repository at this point in the history
Subfield pruning applies to queries accessing columns of complex types: maps,
arrays and structs. Queries that use only some indices, keys or fields of
these columns are optimized so that only necessary data is extracted during
the table scan. This commit enables subfield pruning to pass through the
arbitrary() aggregate function.
  • Loading branch information
Gautam Parai authored and mbasmanova committed Dec 13, 2019
1 parent 62c4524 commit a123e5c
Show file tree
Hide file tree
Showing 2 changed files with 102 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,24 @@ private void assertPushdownSubscripts(String tableName)
assertPushdownSubfields(format("SELECT min(a[1]) FROM %s GROUP BY id", tableName), tableName,
ImmutableMap.of("a", toSubfields("a[1]")));

assertPushdownSubfields(format("SELECT arbitrary(y[1]).a FROM %s GROUP BY id", tableName), tableName,
ImmutableMap.of("y", toSubfields("y[1].a")));

assertPushdownSubfields(format("SELECT arbitrary(y[1]).d.d1 FROM %s GROUP BY id", tableName), tableName,
ImmutableMap.of("y", toSubfields("y[1].d.d1")));

assertPushdownSubfields(format("SELECT arbitrary(y[2].d).d1 FROM %s GROUP BY id", tableName), tableName,
ImmutableMap.of("y", toSubfields("y[2].d.d1")));

assertPushdownSubfields(format("SELECT arbitrary(y[3].d.d1) FROM %s GROUP BY id", tableName), tableName,
ImmutableMap.of("y", toSubfields("y[3].d.d1")));

assertPushdownSubfields(format("SELECT arbitrary(z[1][2]).e.e1 FROM %s GROUP BY id", tableName), tableName,
ImmutableMap.of("z", toSubfields("z[1][2].e.e1")));

assertPushdownSubfields(format("SELECT arbitrary(z[2][3].e).e2 FROM %s GROUP BY id", tableName), tableName,
ImmutableMap.of("z", toSubfields("z[2][3].e.e2")));

// Union
assertPlan(format("SELECT a[1] FROM %s UNION ALL SELECT a[2] FROM %s", tableName, tableName),
anyTree(exchange(
Expand Down Expand Up @@ -531,6 +549,14 @@ private void assertPushdownSubscripts(String tableName)

assertPushdownSubfields(format("SELECT a[1] FROM (SELECT DISTINCT * FROM %s) LIMIT 10", tableName), tableName,
ImmutableMap.of());

// No pass through subfield pruning
assertPushdownSubfields(format("SELECT id, min(y[1]).a FROM %s GROUP BY 1", tableName), tableName,
ImmutableMap.of("y", toSubfields("y[1]")));
assertPushdownSubfields(format("SELECT id, min(y[1]).a, min(y[1].d).d1 FROM %s GROUP BY 1", tableName), tableName,
ImmutableMap.of("y", toSubfields("y[1]")));
assertPushdownSubfields(format("SELECT id, min(z[1][2]).e.e1 FROM %s GROUP BY 1", tableName), tableName,
ImmutableMap.of("z", toSubfields("z[1][2]")));
}

@Test
Expand Down Expand Up @@ -580,6 +606,21 @@ public void testPushdownSubfields()
assertPushdownSubfields("SELECT id, min(x.a + length(y[2].b)) * avg(x.d.d1) FROM test_pushdown_struct_subfields GROUP BY 1", "test_pushdown_struct_subfields",
ImmutableMap.of("x", toSubfields("x.a", "x.d.d1"), "y", toSubfields("y[2].b")));

assertPushdownSubfields("SELECT id, arbitrary(x.a) FROM test_pushdown_struct_subfields GROUP BY 1", "test_pushdown_struct_subfields",
ImmutableMap.of("x", toSubfields("x.a")));

assertPushdownSubfields("SELECT id, arbitrary(x).a FROM test_pushdown_struct_subfields GROUP BY 1", "test_pushdown_struct_subfields",
ImmutableMap.of("x", toSubfields("x.a")));

assertPushdownSubfields("SELECT id, arbitrary(x).d.d1 FROM test_pushdown_struct_subfields GROUP BY 1", "test_pushdown_struct_subfields",
ImmutableMap.of("x", toSubfields("x.d.d1")));

assertPushdownSubfields("SELECT id, arbitrary(x.d).d1 FROM test_pushdown_struct_subfields GROUP BY 1", "test_pushdown_struct_subfields",
ImmutableMap.of("x", toSubfields("x.d.d1")));

assertPushdownSubfields("SELECT id, arbitrary(x.d.d2) FROM test_pushdown_struct_subfields GROUP BY 1", "test_pushdown_struct_subfields",
ImmutableMap.of("x", toSubfields("x.d.d2")));

// Unnest
assertPushdownSubfields("SELECT t.a, t.d.d1, x.a FROM test_pushdown_struct_subfields CROSS JOIN UNNEST(y) as t(a, b, c, d)", "test_pushdown_struct_subfields",
ImmutableMap.of("x", toSubfields("x.a"), "y", toSubfields("y[*].a", "y[*].d.d1")));
Expand All @@ -605,6 +646,13 @@ public void testPushdownSubfields()
assertPushdownSubfields("SELECT x.a, x.b, x.A + 2 FROM test_pushdown_struct_subfields WHERE x.B LIKE 'abc%'", "test_pushdown_struct_subfields",
ImmutableMap.of("x", toSubfields("x.a", "x.b")));

// No pass-through subfield pruning
assertPushdownSubfields("SELECT id, min(x.d).d1 FROM test_pushdown_struct_subfields GROUP BY 1", "test_pushdown_struct_subfields",
ImmutableMap.of("x", toSubfields("x.d")));

assertPushdownSubfields("SELECT id, min(x.d).d1, min(x.d.d2) FROM test_pushdown_struct_subfields GROUP BY 1", "test_pushdown_struct_subfields",
ImmutableMap.of("x", toSubfields("x.d")));

assertUpdate("DROP TABLE test_pushdown_struct_subfields");
}

Expand Down Expand Up @@ -641,6 +689,44 @@ public void testPushdownSubfieldsAssorted()
"a", toSubfields("a[1]"),
"d", toSubfields("d.d3[5]")));

// Subfield pruning should pass-through arbitrary() function
assertPushdownSubfields("SELECT id, " +
"arbitrary(x.a), " +
"arbitrary(x).a, " +
"arbitrary(x).d.d1, " +
"arbitrary(x.d).d1, " +
"arbitrary(x.d.d2), " +
"arbitrary(y[1]).a, " +
"arbitrary(y[1]).d.d1, " +
"arbitrary(y[2]).d.d1, " +
"arbitrary(y[3].d.d1), " +
"arbitrary(z).c, " +
"arbitrary(w[1][2]).e.e1, " +
"arbitrary(w[2][3].e.e2) " +
"FROM test_pushdown_subfields " +
"GROUP BY 1", "test_pushdown_subfields",
ImmutableMap.of("x", toSubfields("x.a", "x.d.d1", "x.d.d2"),
"y", toSubfields("y[1].a", "y[1].d.d1", "y[2].d.d1", "y[3].d.d1"),
"z", toSubfields("z.c"),
"w", toSubfields("w[1][2].e.e1", "w[2][3].e.e2")));

// Subfield pruning should not pass-through other aggregate functions e.g. min() function
assertPushdownSubfields("SELECT id, " +
"min(x.d).d1, " +
"min(x.d.d2), " +
"min(z).c, " +
"min(z.b), " +
"min(y[1]).a, " +
"min(y[1]).d.d1, " +
"min(y[2].d.d1), " +
"min(w[1][2]).e.e1, " +
"min(w[2][3].e.e2) " +
"FROM test_pushdown_subfields " +
"GROUP BY 1", "test_pushdown_subfields",
ImmutableMap.of("x", toSubfields("x.d"),
"y", toSubfields("y[1]", "y[2].d.d1"),
"w", toSubfields("w[1][2]", "w[2][3].e.e2")));

assertUpdate("DROP TABLE test_pushdown_subfields");
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import com.facebook.presto.spi.Subfield;
import com.facebook.presto.spi.Subfield.NestedField;
import com.facebook.presto.spi.TableHandle;
import com.facebook.presto.spi.function.QualifiedFunctionName;
import com.facebook.presto.spi.plan.AggregationNode;
import com.facebook.presto.spi.plan.FilterNode;
import com.facebook.presto.spi.plan.OrderingScheme;
Expand Down Expand Up @@ -78,6 +79,7 @@

import static com.facebook.presto.SystemSessionProperties.isLegacyUnnest;
import static com.facebook.presto.SystemSessionProperties.isPushdownSubfieldsEnabled;
import static com.facebook.presto.metadata.BuiltInFunctionNamespaceManager.DEFAULT_NAMESPACE;
import static com.facebook.presto.spi.Subfield.allSubscripts;
import static com.facebook.presto.spi.type.BigintType.BIGINT;
import static com.facebook.presto.sql.relational.OriginalExpressionUtils.castToExpression;
Expand Down Expand Up @@ -116,6 +118,7 @@ private static class Rewriter
private final Metadata metadata;
private final TypeProvider types;
private final SubfieldExtractor subfieldExtractor;
private static final QualifiedFunctionName ARBITRARY_AGGREGATE_FUNCTION = QualifiedFunctionName.of(DEFAULT_NAMESPACE, "arbitrary");

public Rewriter(Session session, Metadata metadata, TypeProvider types)
{
Expand All @@ -130,8 +133,19 @@ public PlanNode visitAggregation(AggregationNode node, RewriteContext<Context> c
{
context.get().variables.addAll(node.getGroupingKeys());

for (AggregationNode.Aggregation aggregation : node.getAggregations().values()) {
aggregation.getArguments().forEach(expression -> subfieldExtractor.process(castToExpression(expression), context.get()));
for (Map.Entry<VariableReferenceExpression, AggregationNode.Aggregation> entry : node.getAggregations().entrySet()) {
VariableReferenceExpression variable = entry.getKey();
AggregationNode.Aggregation aggregation = entry.getValue();

// Allow sub-field pruning to pass through the arbitrary() aggregation
QualifiedFunctionName aggregateName = metadata.getFunctionManager().getFunctionMetadata(aggregation.getCall().getFunctionHandle()).getName();
if (ARBITRARY_AGGREGATE_FUNCTION.equals(aggregateName)) {
SymbolReference argument = (SymbolReference) castToExpression(aggregation.getArguments().get(0));
context.get().addAssignment(variable, new VariableReferenceExpression(argument.getName(), types.get(argument)));
}
else {
aggregation.getArguments().forEach(expression -> subfieldExtractor.process(castToExpression(expression), context.get()));
}

aggregation.getFilter().ifPresent(expression -> subfieldExtractor.process(castToExpression(expression), context.get()));

Expand Down

0 comments on commit a123e5c

Please sign in to comment.