Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[release-18.0] Make column resolution closer to MySQL (#14426) #14430

Merged
merged 2 commits into from
Nov 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion go/vt/vtgate/planbuilder/operators/aggregation_pushing.go
Original file line number Diff line number Diff line change
Expand Up @@ -429,7 +429,10 @@ func addColumnsFromLHSInJoinPredicates(ctx *plancontext.PlanningContext, rootAgg
for _, pred := range join.JoinPredicates {
for _, bve := range pred.LHSExprs {
expr := bve.Expr
wexpr := rootAggr.QP.GetSimplifiedExpr(expr)
wexpr, err := rootAggr.QP.GetSimplifiedExpr(ctx, expr)
if err != nil {
return err
}
idx, found := canReuseColumn(ctx, lhs.pushed.Columns, expr, extractExpr)
if !found {
idx = len(lhs.pushed.Columns)
Expand Down
6 changes: 5 additions & 1 deletion go/vt/vtgate/planbuilder/operators/distinct.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,11 @@ func (d *Distinct) planOffsets(ctx *plancontext.PlanningContext) error {
return err
}
for idx, col := range columns {
e := d.QP.GetSimplifiedExpr(col.Expr)
e, err := d.QP.GetSimplifiedExpr(ctx, col.Expr)
if err != nil {
// ambiguous columns are not a problem for DISTINCT
e = col.Expr
}
var wsCol *int
typ, coll, _ := ctx.SemTable.TypeForExpr(e)

Expand Down
102 changes: 71 additions & 31 deletions go/vt/vtgate/planbuilder/operators/queryprojection.go
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,10 @@ func createQPFromSelect(ctx *plancontext.PlanningContext, sel *sqlparser.Select)
if !qp.HasAggr && sel.Having != nil {
qp.HasAggr = containsAggr(sel.Having.Expr)
}
qp.calculateDistinct(ctx)

if err := qp.calculateDistinct(ctx); err != nil {
return nil, err
}

return qp, nil
}
Expand Down Expand Up @@ -368,7 +371,10 @@ func (qp *QueryProjection) addOrderBy(ctx *plancontext.PlanningContext, orderBy
canPushSorting := true
es := &expressionSet{}
for _, order := range orderBy {
simpleExpr := qp.GetSimplifiedExpr(order.Expr)
simpleExpr, err := qp.GetSimplifiedExpr(ctx, order.Expr)
if err != nil {
return err
}
if sqlparser.IsNull(simpleExpr) {
// ORDER BY null can safely be ignored
continue
Expand All @@ -386,9 +392,13 @@ func (qp *QueryProjection) addOrderBy(ctx *plancontext.PlanningContext, orderBy
return nil
}

func (qp *QueryProjection) calculateDistinct(ctx *plancontext.PlanningContext) {
func (qp *QueryProjection) calculateDistinct(ctx *plancontext.PlanningContext) error {
if qp.Distinct && !qp.HasAggr {
if qp.useGroupingOverDistinct(ctx) {
distinct, err := qp.useGroupingOverDistinct(ctx)
if err != nil {
return err
}
if distinct {
// if order by exists with overlap with select expressions, we can use the aggregation with ordering over distinct.
qp.Distinct = false
} else {
Expand All @@ -403,7 +413,7 @@ func (qp *QueryProjection) calculateDistinct(ctx *plancontext.PlanningContext) {
}

if !qp.Distinct || len(qp.groupByExprs) == 0 {
return
return nil
}

for _, gb := range qp.groupByExprs {
Expand All @@ -415,24 +425,28 @@ func (qp *QueryProjection) calculateDistinct(ctx *plancontext.PlanningContext) {
return getExpr
})
if !found {
return
return nil
}
}

// since we are returning all grouping expressions, we know the results are guaranteed to be unique
qp.Distinct = false
return nil
}

func (qp *QueryProjection) addGroupBy(ctx *plancontext.PlanningContext, groupBy sqlparser.GroupBy) error {
es := &expressionSet{}
for _, group := range groupBy {
selectExprIdx, aliasExpr := qp.FindSelectExprIndexForExpr(ctx, group)
simpleExpr := qp.GetSimplifiedExpr(group)
err := checkForInvalidGroupingExpressions(simpleExpr)
simpleExpr, err := qp.GetSimplifiedExpr(ctx, group)
if err != nil {
return err
}

if err = checkForInvalidGroupingExpressions(simpleExpr); err != nil {
return err
}

if !es.add(ctx, simpleExpr) {
continue
}
Expand Down Expand Up @@ -475,32 +489,61 @@ func (qp *QueryProjection) isExprInGroupByExprs(ctx *plancontext.PlanningContext
}

// GetSimplifiedExpr takes an expression used in ORDER BY or GROUP BY, and returns an expression that is simpler to evaluate
func (qp *QueryProjection) GetSimplifiedExpr(e sqlparser.Expr) sqlparser.Expr {
func (qp *QueryProjection) GetSimplifiedExpr(ctx *plancontext.PlanningContext, e sqlparser.Expr) (found sqlparser.Expr, err error) {
if qp == nil {
return e
return e, nil
}
// If the ORDER BY is against a column alias, we need to remember the expression
// behind the alias. The weightstring(.) calls needs to be done against that expression and not the alias.
// Eg - select music.foo as bar, weightstring(music.foo) from music order by bar

colExpr, isColName := e.(*sqlparser.ColName)
if !(isColName && colExpr.Qualifier.IsEmpty()) {
// we are only interested in unqualified column names. if it's not a column name and not
return e
in, isColName := e.(*sqlparser.ColName)
if !(isColName && in.Qualifier.IsEmpty()) {
// we are only interested in unqualified column names. if it's not a column name and not unqualified, we're done
return e, nil
}

check := func(e sqlparser.Expr) error {
if found != nil && !ctx.SemTable.EqualsExprWithDeps(found, e) {
return &semantics.AmbiguousColumnError{Column: sqlparser.String(in)}
}
found = e
return nil
}

for _, selectExpr := range qp.SelectExprs {
aliasedExpr, isAliasedExpr := selectExpr.Col.(*sqlparser.AliasedExpr)
if !isAliasedExpr {
ae, ok := selectExpr.Col.(*sqlparser.AliasedExpr)
if !ok {
continue
}
aliased := !aliasedExpr.As.IsEmpty()
if aliased && colExpr.Name.Equal(aliasedExpr.As) {
return aliasedExpr.Expr
aliased := !ae.As.IsEmpty()
if aliased {
if in.Name.Equal(ae.As) {
err = check(ae.Expr)
if err != nil {
return nil, err
}
}
} else {
seCol, ok := ae.Expr.(*sqlparser.ColName)
if !ok {
continue
}
if seCol.Name.Equal(in.Name) {
// If the column name matches, we have a match, even if the table name is not listed
err = check(ae.Expr)
if err != nil {
return nil, err
}
}
}
}

return e
if found == nil {
found = e
}

return found, nil
}

// toString should only be used for tests
Expand Down Expand Up @@ -866,18 +909,21 @@ func (qp *QueryProjection) orderByOverlapWithSelectExpr(ctx *plancontext.Plannin
return false
}

func (qp *QueryProjection) useGroupingOverDistinct(ctx *plancontext.PlanningContext) bool {
func (qp *QueryProjection) useGroupingOverDistinct(ctx *plancontext.PlanningContext) (bool, error) {
if !qp.orderByOverlapWithSelectExpr(ctx) {
return false
return false, nil
}
var gbs []GroupBy
for idx, selExpr := range qp.SelectExprs {
ae, err := selExpr.GetAliasedExpr()
if err != nil {
// not an alias Expr, cannot continue forward.
return false
return false, nil
}
sExpr, err := qp.GetSimplifiedExpr(ctx, ae.Expr)
if err != nil {
return false, err
}
sExpr := qp.GetSimplifiedExpr(ae.Expr)
// check if the grouping already exists on that column.
found := slices.IndexFunc(qp.groupByExprs, func(gb GroupBy) bool {
return ctx.SemTable.EqualsExprWithDeps(gb.SimplifiedExpr, sExpr)
Expand All @@ -892,7 +938,7 @@ func (qp *QueryProjection) useGroupingOverDistinct(ctx *plancontext.PlanningCont
gbs = append(gbs, groupBy)
}
qp.groupByExprs = append(qp.groupByExprs, gbs...)
return true
return true, nil
}

func checkForInvalidGroupingExpressions(expr sqlparser.Expr) error {
Expand All @@ -909,12 +955,6 @@ func checkForInvalidGroupingExpressions(expr sqlparser.Expr) error {
}, expr)
}

func SortAggregations(a []Aggr) {
sort.Slice(a, func(i, j int) bool {
return CompareRefInt(a[i].Index, a[j].Index)
})
}

func SortGrouping(a []GroupBy) {
sort.Slice(a, func(i, j int) bool {
return CompareRefInt(a[i].InnerIndex, a[j].InnerIndex)
Expand Down
44 changes: 6 additions & 38 deletions go/vt/vtgate/planbuilder/testdata/aggr_cases.json
Original file line number Diff line number Diff line change
Expand Up @@ -3300,39 +3300,7 @@
{
"comment": "scatter aggregate symtab lookup error",
"query": "select id, b as id, count(*) from user order by id",
"plan": {
"QueryType": "SELECT",
"Original": "select id, b as id, count(*) from user order by id",
"Instructions": {
"OperatorType": "Sort",
"Variant": "Memory",
"OrderBy": "(1|3) ASC",
"ResultColumns": 3,
"Inputs": [
{
"OperatorType": "Aggregate",
"Variant": "Scalar",
"Aggregates": "any_value(0) AS id, any_value(1) AS id, sum_count_star(2) AS count(*), any_value(3)",
"Inputs": [
{
"OperatorType": "Route",
"Variant": "Scatter",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"FieldQuery": "select id, b as id, count(*), weight_string(b) from `user` where 1 != 1",
"Query": "select id, b as id, count(*), weight_string(b) from `user`",
"Table": "`user`"
}
]
}
]
},
"TablesUsed": [
"user.user"
]
}
"plan": "Column 'id' in field list is ambiguous"
},
{
"comment": "aggr and non-aggr without group by (with query does not give useful result out)",
Expand Down Expand Up @@ -3388,9 +3356,9 @@
"Name": "user",
"Sharded": true
},
"FieldQuery": "select `user`.id, weight_string(id) from `user` where 1 != 1 group by id, weight_string(id)",
"FieldQuery": "select `user`.id, weight_string(`user`.id) from `user` where 1 != 1 group by id, weight_string(`user`.id)",
"OrderBy": "(0|1) ASC",
"Query": "select `user`.id, weight_string(id) from `user` group by id, weight_string(id) order by id asc",
"Query": "select `user`.id, weight_string(`user`.id) from `user` group by id, weight_string(`user`.id) order by id asc",
"Table": "`user`"
},
{
Expand Down Expand Up @@ -4775,7 +4743,7 @@
"OperatorType": "Distinct",
"Collations": [
"(0:2)",
"(1:2)"
"(1:3)"
],
"ResultColumns": 2,
"Inputs": [
Expand All @@ -4786,8 +4754,8 @@
"Name": "user",
"Sharded": true
},
"FieldQuery": "select a, b as a, weight_string(b) from `user` where 1 != 1",
"Query": "select distinct a, b as a, weight_string(b) from `user`",
"FieldQuery": "select a, b as a, weight_string(a), weight_string(b) from `user` where 1 != 1",
"Query": "select distinct a, b as a, weight_string(a), weight_string(b) from `user`",
"Table": "`user`"
}
]
Expand Down
43 changes: 43 additions & 0 deletions go/vt/vtgate/planbuilder/testdata/select_cases.json
Original file line number Diff line number Diff line change
Expand Up @@ -4765,5 +4765,48 @@
"user.customer"
]
}
},
{
"comment": "column with qualifier is correctly used",
"query": "select u.foo, ue.foo as apa from user u, user_extra ue order by foo ",
"plan": {
"QueryType": "SELECT",
"Original": "select u.foo, ue.foo as apa from user u, user_extra ue order by foo ",
"Instructions": {
"OperatorType": "Join",
"Variant": "Join",
"JoinColumnIndexes": "L:0,R:0",
"TableName": "`user`_user_extra",
"Inputs": [
{
"OperatorType": "Route",
"Variant": "Scatter",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"FieldQuery": "select u.foo, weight_string(u.foo) from `user` as u where 1 != 1",
"OrderBy": "(0|1) ASC",
"Query": "select u.foo, weight_string(u.foo) from `user` as u order by foo asc",
"Table": "`user`"
},
{
"OperatorType": "Route",
"Variant": "Scatter",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"FieldQuery": "select ue.foo as apa from user_extra as ue where 1 != 1",
"Query": "select ue.foo as apa from user_extra as ue",
"Table": "user_extra"
}
]
},
"TablesUsed": [
"user.user",
"user.user_extra"
]
}
}
]
Loading