-
Notifications
You must be signed in to change notification settings - Fork 3.8k
/
explorer.go
357 lines (326 loc) · 13.6 KB
/
explorer.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
// Copyright 2018 The Cockroach Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
// implied. See the License for the specific language governing
// permissions and limitations under the License.
package xform
import (
"github.com/cockroachdb/cockroach/pkg/sql/opt"
"github.com/cockroachdb/cockroach/pkg/sql/opt/idxconstraint"
"github.com/cockroachdb/cockroach/pkg/sql/opt/memo"
"github.com/cockroachdb/cockroach/pkg/sql/opt/norm"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/util"
)
//go:generate optgen -out explorer.og.go explorer ../ops/*.opt rules/*.opt
// explorer generates alternate expressions that are logically equivalent to
// existing expressions in the memo. The new expressions are added to the same
// memo group as the existing expression. The optimizer will cost all the
// expressions and pick the lowest cost alternative that provides any required
// physical properties.
//
// Equivalent expressions are generated by exploration rules. An exploration
// rule efficiently enumerates all possible combinations of its sub-expressions
// in order to look for matches. For example:
//
// // [AssociateJoin]
// (InnerJoin
// (InnerJoin $r:* $s:* $lowerOn:*)
// $t:*
// $upperOn:*
// )
// =>
// ...
//
// Say the memo group containing the upper inner-join has 3 expressions in it,
// and the memo group containing the lower inner-join has 4 expressions. Then
// the explorer will enumerate 12 possible expression combinations, looking for
// a combination that has an InnerJoin operator with another InnerJoin operator
// as its left operand.
//
// Once new expressions have been added to a group, they themselves become
// eligible for exploration, which might generate further expressions, and so
// on. Because the same group will often be explored multiple times, the
// explorer keeps state which helps it avoid duplicate work during subsequent
// passes.
//
// The explorer only traverses expression trees to the depth required by the
// exploration match patterns. It expects the optimizer to call exploreGroup
// for each group that needs to be explored. The optimizer can then use branch
// and bound pruning to skip exploration of entire sub-trees.
//
// For each expression combination that matches, a replace expression is
// constructed and added to the same memo group as the matched expression:
//
// // [AssociateJoin]
// (InnerJoin
// (InnerJoin $r:* $s:* $lowerOn:*)
// $t:*
// $upperOn:*
// )
// =>
// (InnerJoin
// (InnerJoin
// $r
// $t
// (Filters (ConstructConditionsNotUsing $s $lowerOn $upperOn))
// )
// $s
// (Filters (ConstructConditionsUsing $s $lowerOn $upperOn))
// )
//
// In this example, if the upper and lower groups each contain two InnerJoin
// expressions, then four new expressions will be added to the memo group of the
// matched expression. During the next pass, the four new expressions will
// themselves match this same rule. However, adding their replace expressions to
// the memo group will be a no-op, because they're already present.
type explorer struct {
o *Optimizer
mem *memo.Memo
f *norm.Factory
evalCtx *tree.EvalContext
// exprs is a buffer reused by custom replace functions.
exprs []memo.Expr
}
func (e *explorer) init(o *Optimizer) {
e.o = o
e.mem = o.mem
e.f = o.f
e.evalCtx = o.evalCtx
}
// exploreGroup generates alternate expressions that are logically equivalent
// to the expressions already in the given group, and adds them to the group.
// The explorer maintains state that tracks which expressions were explored in
// previous passes. It keeps "start" and "end" expressions for the group which
// track the expressions which need to be fully explored during the current
// pass. Each time exploreGroup is called, the end of the previous pass becomes
// the start of the next pass. For example:
//
// pass1 pass2 pass3
// <-start
// e0 e0 e0
// <-end <-start
// e1 (new) e1 e1
//
// e2 (new) e2 e2
// <-end <-start
// e3 (new) e3
// <-end
//
// For rules which match one or more sub-expressions in addition to the top-
// level expression, there is extra complexity because every combination needs
// to be considered. Even expressions which were explored in previous passes
// need to be partially re-explored, because they may match when considered in
// combination with a new sub-expression which wasn't present during the last
// pass. Only combinations which consist solely of old expressions can be
// skipped.
//
// Combination enumeration code is just a series of nested loops generated by
// Optgen. Each non-scalar match pattern or sub-pattern uses a loop to
// enumerate the expressions in the corresponding memo group. For example:
//
// $join:(InnerJoin
// $left:(InnerJoin)
// $right:(Select)
// $on:*
// )
//
// This match pattern would be implemented with 3 nested loops: 1 each for the
// $join, $left, and $right memo groups. If $join had 2 expressions, $left had
// 3 expressions, and $right had 2 expressions, then 2 * 3 * 2 = 12 combos will
// be considered. The innermost loop can skip iteration if all outer loops are
// bound to expressions which have already been explored in previous passes:
//
// for e1 in memo-exprs($join):
// for e2 in memo-exprs($left):
// for e3 in memo-exprs($right):
// if ordinal(e3) >= state.start:
// ... explore (e1, e2, e3) combo ...
//
func (e *explorer) exploreGroup(group memo.GroupID) *exploreState {
// Do nothing if this group has already been fully explored.
state := e.ensureExploreState(group)
if state.fullyExplored {
return state
}
// Update set of expressions that will be considered during this pass, by
// setting the start expression to be the end expression from last pass.
exprCount := e.mem.ExprCount(group)
state.start = state.end
state.end = memo.ExprOrdinal(exprCount)
fullyExplored := true
for i := 0; i < exprCount; i++ {
ordinal := memo.ExprOrdinal(i)
// If expression was fully explored in previous passes, then nothing
// further to do.
if state.isExprFullyExplored(ordinal) {
continue
}
eid := memo.ExprID{Group: group, Expr: ordinal}
if e.exploreExpr(state, eid) {
// No more rules can ever match this expression, so skip it in
// future passes.
state.markExprAsFullyExplored(ordinal)
} else {
// If even one expression is not fully explored, then the group is
// not fully explored.
fullyExplored = false
}
}
// If all existing group expressions have been fully explored, and no new
// ones were added, then group can be skipped in future passes.
if fullyExplored && e.mem.ExprCount(group) == int(state.end) {
state.fullyExplored = true
}
return state
}
// ensureExploreState allocates the exploration state in the optState struct
// associated with the memo group, with respect to the min physical props.
func (e *explorer) ensureExploreState(group memo.GroupID) *exploreState {
return &e.o.ensureOptState(group, memo.MinPhysPropsID).explore
}
// ----------------------------------------------------------------------
//
// Scan Rules
// Custom match and replace functions used with scan.opt rules.
//
// ----------------------------------------------------------------------
// isUnconstrainedPrimaryScan returns true if the given expression is scanning a
// primary index rather than a secondary index.
func (e *explorer) isUnconstrainedPrimaryScan(def memo.PrivateID) bool {
scanOpDef := e.mem.LookupPrivate(def).(*memo.ScanOpDef)
return scanOpDef.Index == opt.PrimaryIndex && scanOpDef.Constraint == nil
}
// generateIndexScans enumerates all indexes on the scan operator's table and
// generates an alternate scan operator for each index that includes the set of
// needed columns.
// TODO(andyk): Create join with primary index in non-covering case.
func (e *explorer) generateIndexScans(def memo.PrivateID) []memo.Expr {
e.exprs = e.exprs[:0]
scanOpDef := e.mem.LookupPrivate(def).(*memo.ScanOpDef)
tab := e.mem.Metadata().Table(scanOpDef.Table)
// Iterate over all secondary indexes (index 0 is the primary index).
for i := 1; i < tab.IndexCount(); i++ {
// If the alternate index includes the set of needed columns (def.Cols),
// then construct a new Scan operator using that index.
if scanOpDef.AltIndexHasCols(e.mem.Metadata(), i) {
newDef := &memo.ScanOpDef{Table: scanOpDef.Table, Index: i, Cols: scanOpDef.Cols}
indexScan := memo.MakeScanExpr(e.mem.InternScanOpDef(newDef))
e.exprs = append(e.exprs, memo.Expr(indexScan))
}
}
return e.exprs
}
// ----------------------------------------------------------------------
//
// Select Rules
// Custom match and replace functions used with select.opt rules.
//
// ----------------------------------------------------------------------
// isUnconstrainedScan returns true if thegiven expression is a Scan
// without any constraints.
func (e *explorer) isUnconstrainedScan(def memo.PrivateID) bool {
scanOpDef := e.mem.LookupPrivate(def).(*memo.ScanOpDef)
return scanOpDef.Constraint == nil
}
// constrainScan tries to push filters into Scan operations as constraints. It
// is applied on a Select -> Scan pattern. The scan operation is assumed to have
// no constraints.
//
// There are three cases:
//
// - if the filter can be completely converted to constraints, we return a
// constrained scan expression (to be added to the same group as the select
// operator).
//
// - if the filter can be partially converted to constraints, we construct the
// constrained scan and we return a select expression with the remaining
// filter (to be added to the same group as the select operator).
//
// - if the filter cannot be converted to constraints, does and returns
// nothing.
//
func (e *explorer) constrainScan(filterGroup memo.GroupID, scanDef memo.PrivateID) []memo.Expr {
e.exprs = e.exprs[:0]
scanOpDef := e.mem.LookupPrivate(scanDef).(*memo.ScanOpDef)
// Fill out data structures needed to initialize the idxconstraint library.
md := e.mem.Metadata()
index := md.Table(scanOpDef.Table).Index(scanOpDef.Index)
columns := make([]opt.OrderingColumn, index.UniqueColumnCount())
var notNullCols opt.ColSet
for i := range columns {
col := index.Column(i)
colID := md.TableColumn(scanOpDef.Table, col.Ordinal)
columns[i] = opt.MakeOrderingColumn(colID, col.Descending)
if !col.Column.IsNullable() {
notNullCols.Add(int(colID))
}
}
// Generate index constraints.
var ic idxconstraint.Instance
filter := memo.MakeNormExprView(e.mem, filterGroup)
ic.Init(filter, columns, notNullCols, false /* isInverted */, e.evalCtx, e.f)
constraint := ic.Constraint()
if constraint.IsUnconstrained() {
return nil
}
newDef := *scanOpDef
newDef.Constraint = constraint
remainingFilter := ic.RemainingFilter()
if e.mem.NormExpr(remainingFilter).Operator() == opt.TrueOp {
// No remaining filter. Add the constrained scan node to select's group.
constrainedScan := memo.MakeScanExpr(e.mem.InternScanOpDef(&newDef))
e.exprs = append(e.exprs, memo.Expr(constrainedScan))
} else {
// We have a remaining filter. We create the constrained scan in a new group
// and create a select node in the same group with the original select.
constrainedScan := e.f.ConstructScan(e.mem.InternScanOpDef(&newDef))
newSelect := memo.MakeSelectExpr(constrainedScan, remainingFilter)
e.exprs = append(e.exprs, memo.Expr(newSelect))
}
return e.exprs
}
// ----------------------------------------------------------------------
//
// Exploration state
//
// ----------------------------------------------------------------------
// exploreState contains state needed by the explorer for each memo group it
// explores. The state is allocated lazily for a group when the exploreGroup
// method is called. Various fields record what exploration has taken place so
// that the same work isn't repeated.
type exploreState struct {
// start (inclusive) and end (exclusive) specify which expressions need to
// be explored in the current pass. Expressions < start have been partly
// explored during previous passes. Expressions >= end are new expressions
// added during the current pass.
start memo.ExprOrdinal
end memo.ExprOrdinal
// fullyExplored is set to true once all expressions in the group have been
// fully explored, and no new expressions will ever be added. Further
// exploration of the group can be skipped.
fullyExplored bool
// fullyExploredExprs is a set of memo.ExprOrdinal values. Once an
// expression has been fully explored, its ordinal is added to this set.
fullyExploredExprs util.FastIntSet
}
// isExprFullyExplored is true if the given expression will never match an
// additional rule, and can therefore be skipped in future exploration passes.
func (e *exploreState) isExprFullyExplored(ordinal memo.ExprOrdinal) bool {
return e.fullyExploredExprs.Contains(int(ordinal))
}
// markExprAsFullyExplored is called when all possible matching combinations
// have been considered for the subtree rooted at the given expression. Even if
// there are more exploration passes, this expression will never have new
// children, grand-children, etc. that might cause it to match another rule.
func (e *exploreState) markExprAsFullyExplored(ordinal memo.ExprOrdinal) {
e.fullyExploredExprs.Add(int(ordinal))
}