Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sql/analyzer: Make indexed joins more aware of NULL safe comparisons. #1080

22 changes: 21 additions & 1 deletion enginetest/queries/queries.go
Original file line number Diff line number Diff line change
Expand Up @@ -2180,6 +2180,26 @@ var QueryTests = []QueryTest{
Query: "SELECT i FROM niltable WHERE i2 IS NOT NULL ORDER BY 1",
Expected: []sql.Row{{int64(2)}, {int64(4)}, {int64(6)}},
},
{
Query: "SELECT * FROM niltable WHERE i2 = NULL",
Expected: []sql.Row{},
},
{
Query: "SELECT i2 FROM niltable WHERE i2 <=> NULL",
Expected: []sql.Row{{nil}, {nil}, {nil}},
},
{
Query: "SELECT l.i, r.i2 FROM niltable l INNER JOIN niltable r ON l.i2 = r.i2 ORDER BY 1",
Expected: []sql.Row{{2, 2}, {4, 4}, {6, 6}},
},
{
Query: "SELECT l.i, r.i2 FROM niltable l INNER JOIN niltable r ON l.i2 != r.i2 ORDER BY 1, 2",
Expected: []sql.Row{{2, 4}, {2, 6}, {4, 2}, {4, 6}, {6, 2}, {6, 4}},
},
{
Query: "SELECT l.i, r.i2 FROM niltable l INNER JOIN niltable r ON l.i2 <=> r.i2 ORDER BY 1 ASC",
Expected: []sql.Row{{1, nil}, {1, nil}, {1, nil}, {2, 2}, {3, nil}, {3, nil}, {3, nil}, {4, 4}, {5, nil}, {5, nil}, {5, nil}, {6, 6}},
},
{
Query: "select i from datetime_table where date_col = date('2019-12-31T12:00:00')",
Expected: []sql.Row{{1}},
Expand Down Expand Up @@ -5987,7 +6007,7 @@ var QueryTests = []QueryTest{
},
{
Query: "SELECT NULL <=> NULL FROM dual",
Expected: []sql.Row{{1}},
Expected: []sql.Row{{true}},
},
{
Query: "SELECT POW(2,3) FROM dual",
Expand Down
36 changes: 36 additions & 0 deletions enginetest/queries/query_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -1497,6 +1497,17 @@ var PlanTests = []QueryPlanTest{
" └─ IndexedTableAccess(niltable on [niltable.i])\n" +
"",
},
{
Query: `SELECT l.i, r.i2 FROM niltable l INNER JOIN niltable r ON l.i2 <=> r.i2 ORDER BY 1 ASC`,
ExpectedPlan: "Sort(l.i ASC)\n" +
" └─ Project(l.i, r.i2)\n" +
" └─ IndexedJoin(l.i2 <=> r.i2)\n" +
" ├─ TableAlias(l)\n" +
" │ └─ Table(niltable)\n" +
" └─ TableAlias(r)\n" +
" └─ IndexedTableAccess(niltable on [niltable.i2])\n" +
"",
},
{
Query: `SELECT pk,i,f FROM one_pk RIGHT JOIN niltable ON pk=i WHERE pk > 0`,
ExpectedPlan: "Project(one_pk.pk, niltable.i, niltable.f)\n" +
Expand Down Expand Up @@ -1639,6 +1650,31 @@ var PlanTests = []QueryPlanTest{
" └─ IndexedTableAccess(two_pk on [two_pk.pk1,two_pk.pk2])\n" +
"",
},
{
Query: `SELECT * FROM niltable WHERE i2 = NULL`,
ExpectedPlan: "Projected table access on [i i2 b f]\n" +
" └─ IndexedTableAccess(niltable on [niltable.i2] with ranges: [{(∞, ∞)}])\n" +
"",
},
{
Query: `SELECT * FROM niltable WHERE i2 <> NULL`,
ExpectedPlan: "Filter(NOT((niltable.i2 = NULL)))\n" +
" └─ Projected table access on [i i2 b f]\n" +
" └─ IndexedTableAccess(niltable on [niltable.i2] with ranges: [{(∞, ∞)}])\n" +
"",
},
{
Query: `SELECT * FROM niltable WHERE i2 > NULL`,
ExpectedPlan: "Projected table access on [i i2 b f]\n" +
" └─ IndexedTableAccess(niltable on [niltable.i2] with ranges: [{(∞, ∞)}])\n" +
"",
},
{
Query: `SELECT * FROM niltable WHERE i2 <=> NULL`,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does this look like on indexes of multiple columns?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IndexBuilder always builds RangeCollections where each Range has a RangeColumnExpr for every expression in the index. By default, it's going to return a single range that matches everything for every column (so [NULL, ∞) in the new syntax).

For the same clause on a two-column index, it's either going to return ([NULL, NULL], [NULL, ∞)) or ([NULL, ∞), [NULL, NULL]), depending on whether the targeted column comes first or second in the index.

For an AND on both columns <=> NULL it will return ([NULL, NULL], [NULL, NULL]). There's a bunch of tests in index_query_plans for how this shakes out in different cases, including when the analyzer is managing conjunctions and disjunctions, etc.

ExpectedPlan: "Projected table access on [i i2 b f]\n" +
" └─ IndexedTableAccess(niltable on [niltable.i2] with ranges: [{[NULL, NULL]}])\n" +
"",
},
{
Query: `SELECT pk,i,f FROM one_pk LEFT JOIN niltable ON pk=i ORDER BY 1`,
ExpectedPlan: "Sort(one_pk.pk ASC)\n" +
Expand Down
6 changes: 3 additions & 3 deletions memory/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ func (idx *Index) NewLookup(ctx *sql.Context, ranges ...sql.Range) (sql.IndexLoo
rangeColumnExpr = expression.NewEquals(expression.NewLiteral(1, sql.Int8), expression.NewLiteral(2, sql.Int8))
case sql.RangeType_All:
rangeColumnExpr = expression.NewEquals(expression.NewLiteral(1, sql.Int8), expression.NewLiteral(1, sql.Int8))
case sql.RangeType_Null:
case sql.RangeType_EqualNull:
rangeColumnExpr = expression.NewIsNull(idx.Exprs[i])
case sql.RangeType_GreaterThan:
if sql.RangeCutIsBinding(rce.LowerBound) {
Expand All @@ -105,13 +105,13 @@ func (idx *Index) NewLookup(ctx *sql.Context, ranges ...sql.Range) (sql.IndexLoo
case sql.RangeType_GreaterOrEqual:
lit, typ := getType(sql.GetRangeCutKey(rce.LowerBound))
rangeColumnExpr = expression.NewGreaterThanOrEqual(idx.Exprs[i], expression.NewLiteral(lit, typ))
case sql.RangeType_LessThan:
case sql.RangeType_LessThanOrNull:
lit, typ := getType(sql.GetRangeCutKey(rce.UpperBound))
rangeColumnExpr = or(
expression.NewLessThan(idx.Exprs[i], expression.NewLiteral(lit, typ)),
expression.NewIsNull(idx.Exprs[i]),
)
case sql.RangeType_LessOrEqual:
case sql.RangeType_LessOrEqualOrNull:
lit, typ := getType(sql.GetRangeCutKey(rce.UpperBound))
rangeColumnExpr = or(
expression.NewLessThanOrEqual(idx.Exprs[i], expression.NewLiteral(lit, typ)),
Expand Down
4 changes: 3 additions & 1 deletion sql/analyzer/apply_indexes_for_subquery_comparisons.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,9 @@ func getIndexedInSubqueryFilter(ctx *sql.Context, a *Analyzer, left, right sql.E
return nil
}
keyExpr := gf.WithIndex(0)
ita := plan.NewIndexedTableAccess(rt, idx, []sql.Expression{keyExpr})
// We currently only support *expresssion.Equals and *InSubquery; neither matches null.
nullmask := []bool{false}
ita := plan.NewIndexedTableAccess(rt, plan.NewLookupBuilder(idx, []sql.Expression{keyExpr}, nullmask))
if canBuildIndex, err := ita.CanBuildIndex(ctx); err != nil || !canBuildIndex {
return nil
}
Expand Down
34 changes: 19 additions & 15 deletions sql/analyzer/apply_indexes_from_outer_scope.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,12 @@ func applyIndexesFromOuterScope(ctx *sql.Context, a *Analyzer, n sql.Node, scope
return n, transform.SameTree, nil
case *plan.TableAlias:
if strings.ToLower(n.Name()) == idxLookup.table {
return pushdownIndexToTable(a, n, idxLookup.index, idxLookup.keyExpr)
return pushdownIndexToTable(a, n, idxLookup.index, idxLookup.keyExpr, idxLookup.nullmask)
}
return n, transform.SameTree, nil
case *plan.ResolvedTable:
if strings.ToLower(n.Name()) == idxLookup.table {
return pushdownIndexToTable(a, n, idxLookup.index, idxLookup.keyExpr)
return pushdownIndexToTable(a, n, idxLookup.index, idxLookup.keyExpr, idxLookup.nullmask)
}
return n, transform.SameTree, nil
default:
Expand All @@ -91,7 +91,7 @@ func applyIndexesFromOuterScope(ctx *sql.Context, a *Analyzer, n sql.Node, scope

// pushdownIndexToTable attempts to push the index given down to the table given, if it implements
// sql.IndexAddressableTable
func pushdownIndexToTable(a *Analyzer, tableNode NameableNode, index sql.Index, keyExpr []sql.Expression) (sql.Node, transform.TreeIdentity, error) {
func pushdownIndexToTable(a *Analyzer, tableNode NameableNode, index sql.Index, keyExpr []sql.Expression, nullmask []bool) (sql.Node, transform.TreeIdentity, error) {
return transform.Node(tableNode, func(n sql.Node) (sql.Node, transform.TreeIdentity, error) {
switch n := n.(type) {
case *plan.ResolvedTable:
Expand All @@ -101,17 +101,18 @@ func pushdownIndexToTable(a *Analyzer, tableNode NameableNode, index sql.Index,
}
if _, ok := table.(sql.IndexAddressableTable); ok {
a.Log("table %q transformed with pushdown of index", tableNode.Name())
return plan.NewIndexedTableAccess(n, index, keyExpr), transform.NewTree, nil
return plan.NewIndexedTableAccess(n, plan.NewLookupBuilder(index, keyExpr, nullmask)), transform.NewTree, nil
}
}
return n, transform.SameTree, nil
})
}

type subqueryIndexLookup struct {
table string
keyExpr []sql.Expression
index sql.Index
table string
keyExpr []sql.Expression
nullmask []bool
index sql.Index
}

func getOuterScopeIndexes(
Expand Down Expand Up @@ -160,7 +161,7 @@ func getOuterScopeIndexes(
for table, idx := range indexes {
if exprsByTable[table] != nil {
// creating a key expression can fail in some cases, just skip this table
keyExpr, err := createIndexKeyExpr(ctx, idx, exprsByTable[table], tableAliases)
keyExpr, nullmask, err := createIndexKeyExpr(ctx, idx, exprsByTable[table], tableAliases)
if err != nil {
return nil, err
}
Expand All @@ -169,9 +170,10 @@ func getOuterScopeIndexes(
}

lookups = append(lookups, subqueryIndexLookup{
table: table,
keyExpr: keyExpr,
index: idx,
table: table,
keyExpr: keyExpr,
nullmask: nullmask,
index: idx,
})
}
}
Expand All @@ -180,34 +182,36 @@ func getOuterScopeIndexes(
}

// createIndexKeyExpr returns a slice of expressions to be used when creating an index lookup key for the table given.
func createIndexKeyExpr(ctx *sql.Context, idx sql.Index, joinExprs []*joinColExpr, tableAliases TableAliases) ([]sql.Expression, error) {
func createIndexKeyExpr(ctx *sql.Context, idx sql.Index, joinExprs []*joinColExpr, tableAliases TableAliases) ([]sql.Expression, []bool, error) {
// To allow partial matching, we need to see if the expressions are a prefix of the index
idxExpressions := idx.Expressions()
normalizedJoinExprStrs := make([]string, len(joinExprs))
for i := range joinExprs {
normalizedJoinExprStrs[i] = normalizeExpression(ctx, tableAliases, joinExprs[i].colExpr).String()
}
if ok, prefixCount := exprsAreIndexSubset(normalizedJoinExprStrs, idxExpressions); !ok || prefixCount != len(normalizedJoinExprStrs) {
return nil, nil
return nil, nil, nil
}
// Since the expressions are a prefix, we cut the index expressions we are using to just those involved
idxPrefixExpressions := idxExpressions[:len(normalizedJoinExprStrs)]

keyExprs := make([]sql.Expression, len(idxPrefixExpressions))
nullmask := make([]bool, len(idxPrefixExpressions))
IndexExpressions:
for i, idxExpr := range idxPrefixExpressions {
for j := range joinExprs {
if idxExpr == normalizedJoinExprStrs[j] {
keyExprs[i] = joinExprs[j].comparand
nullmask[i] = joinExprs[j].matchnull
continue IndexExpressions
}
}

return nil, fmt.Errorf("index `%s` reported having prefix of `%v` but has expressions `%v`",
return nil, nil, fmt.Errorf("index `%s` reported having prefix of `%v` but has expressions `%v`",
idx.ID(), normalizedJoinExprStrs, idxExpressions)
}

return keyExprs, nil
return keyExprs, nullmask, nil
}

func getSubqueryIndexes(
Expand Down
32 changes: 26 additions & 6 deletions sql/analyzer/indexed_joins.go
Original file line number Diff line number Diff line change
Expand Up @@ -216,12 +216,12 @@ func replaceTableAccessWithIndexedAccess(
}

if indexToApply.index != nil {
keyExprs := createIndexLookupKeyExpression(ctx, indexToApply, tableAliases)
keyExprs, matchesNullMask := createIndexLookupKeyExpression(ctx, indexToApply, tableAliases)
keyExprs, _, err := FixFieldIndexesOnExpressions(ctx, scope, a, schema, keyExprs...)
if err != nil {
return nil, transform.SameTree, err
}
return plan.NewIndexedTableAccess(node, indexToApply.index, keyExprs), transform.NewTree, nil
return plan.NewIndexedTableAccess(node, plan.NewLookupBuilder(indexToApply.index, keyExprs, matchesNullMask)), transform.NewTree, nil
} else {
ln, sameL, lerr := toIndexedTableAccess(node, indexToApply.disjunction[0])
if lerr != nil {
Expand Down Expand Up @@ -510,29 +510,31 @@ func joinTreeToNodes(tree *joinSearchNode, scope *Scope, ordered bool) sql.Node

// createIndexLookupKeyExpression returns a slice of expressions to be used when evaluating the context row given to the
// RowIter method of an IndexedTableAccess node. Column expressions must match the declared column order of the index.
func createIndexLookupKeyExpression(ctx *sql.Context, ji *joinIndex, tableAliases TableAliases) []sql.Expression {
func createIndexLookupKeyExpression(ctx *sql.Context, ji *joinIndex, tableAliases TableAliases) ([]sql.Expression, []bool) {
idxExprs := ji.index.Expressions()
count := len(idxExprs)
if count > len(ji.cols) {
count = len(ji.cols)
}
keyExprs := make([]sql.Expression, count)
nullmask := make([]bool, count)

IndexExpressions:
for i := 0; i < count; i++ {
for j, col := range ji.cols {
if idxExprs[i] == normalizeExpression(ctx, tableAliases, col).String() {
keyExprs[i] = ji.comparandExprs[j]
nullmask[i] = ji.nullmask[j]
continue IndexExpressions
}
}

// If we finished this loop, we didn't find a column of the index in the join expression.
// This should be impossible.
return nil
return nil, nil
}

return keyExprs
return keyExprs, nullmask
}

// A joinIndex captures an index to use in a join between two or more tables.
Expand Down Expand Up @@ -561,6 +563,12 @@ type joinIndex struct {
comparandCols []*expression.GetField
// The expressions of other tables, in the same order as cols
comparandExprs []sql.Expression
// Has a bool for each comparandExprs; the bool is true if this
// index lookup should return entries that are NULL when the
// lookup is NULL. The entry is false otherwise.
// Distinguishes between child.parent_id <=> parent.id VS
// child.parent_id = parent.id.
nullmask []bool
}

func (ji *joinIndex) hasIndex() bool {
Expand Down Expand Up @@ -783,6 +791,9 @@ func getJoinIndexes(
comparandExprs := make([]sql.Expression, 0, len(left.comparandExprs)+len(right.comparandExprs))
comparandExprs = append(comparandExprs, left.comparandExprs...)
comparandExprs = append(comparandExprs, right.comparandExprs...)
nullmask := make([]bool, 0, len(left.nullmask)+len(right.nullmask))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nullMask := append(left.nullmask, right.nullmaks...)?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could do the same for comparandExprs it would appear

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't this a little sketchy / hasn't it bitten us in the past?

Like, if the new slice shares storage with the old slice, and someone goes and mutates it? (Not saying that's necessarily a concern here...)

nullmask = append(nullmask, left.nullmask...)
nullmask = append(nullmask, right.nullmask...)
v = append(v, &joinIndex{
table: table,
index: nil,
Expand All @@ -794,6 +805,7 @@ func getJoinIndexes(
colExprs: colExprs,
comparandCols: comparandCols,
comparandExprs: comparandExprs,
nullmask: nullmask,
})
}
}
Expand All @@ -816,8 +828,11 @@ func getEqualityIndexes(
tableAliases TableAliases,
) (leftJoinIndex *joinIndex, rightJoinIndex *joinIndex) {

var matchnull bool
switch joinCond.cond.(type) {
case *expression.Equals, *expression.NullSafeEquals:
case *expression.Equals:
case *expression.NullSafeEquals:
matchnull = true
default:
return nil, nil
}
Expand Down Expand Up @@ -856,6 +871,7 @@ func getEqualityIndexes(
colExprs: []sql.Expression{leftCol.colExpr},
comparandCols: []*expression.GetField{leftCol.comparandCol},
comparandExprs: []sql.Expression{leftCol.comparand},
nullmask: []bool{matchnull},
}

rightJoinIndex = &joinIndex{
Expand All @@ -868,6 +884,7 @@ func getEqualityIndexes(
colExprs: []sql.Expression{rightCol.colExpr},
comparandCols: []*expression.GetField{rightCol.comparandCol},
comparandExprs: []sql.Expression{rightCol.comparand},
nullmask: []bool{matchnull},
}

return leftJoinIndex, rightJoinIndex
Expand Down Expand Up @@ -919,11 +936,13 @@ func colExprsToJoinIndex(table string, idx sql.Index, joinCond joinCond, colExpr
joinPosition = plan.JoinTypeRight
}

nullmask := make([]bool, len(colExprs))
for i, col := range colExprs {
cols[i] = col.col
cmpCols[i] = col.comparandCol
exprs[i] = col.colExpr
cmpExprs[i] = col.comparand
nullmask[i] = col.matchnull
}

return &joinIndex{
Expand All @@ -936,6 +955,7 @@ func colExprsToJoinIndex(table string, idx sql.Index, joinCond joinCond, colExpr
colExprs: exprs,
comparandCols: cmpCols,
comparandExprs: cmpExprs,
nullmask: nullmask,
}
}

Expand Down
Loading