Skip to content

Commit

Permalink
feat(expr/builders): Add initial expression builder helpers (#20)
Browse files Browse the repository at this point in the history
* feat(expr/builders): Add initial expression builder helpers.

* export FuncArgBuilder

* add one to type variation anchors
  • Loading branch information
zeroshade authored May 17, 2023
1 parent d70c727 commit 30fa08b
Show file tree
Hide file tree
Showing 5 changed files with 487 additions and 1 deletion.
3 changes: 3 additions & 0 deletions expr/binding_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ var (
extractID = extensions.ID{
URI: extensions.SubstraitDefaultURIPrefix + "functions_datetime.yaml",
Name: "extract"}
ntileID = extensions.ID{
URI: extensions.SubstraitDefaultURIPrefix + "functions_arithmetic.yaml",
Name: "ntile"}

boringSchema = types.NamedStruct{
Names: []string{
Expand Down
383 changes: 383 additions & 0 deletions expr/builder.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,383 @@
// SPDX-License-Identifier: Apache-2.0

package expr

import (
"github.com/substrait-io/substrait-go/extensions"
"github.com/substrait-io/substrait-go/types"
)

// Builder is a basic interface for any type which can construct
// an expression. The `Build` method will be reserved for producing
// a concrete type while `BuildExpr` will exist for compatibility
// with this interface for ease of use. Typically it will be
// implemented as a simply a call to Build anyways.
type Builder interface {
BuildExpr() (Expression, error)
}

// ExprBuilder is the parent context for all the expression builders.
// It maintains a pointer to an extension registry and, optionally,
// a pointer to a base input schema. This allows less verbose expression
// building as it isn't necessary to pass these to every `New*` function
// to construct the expressions.
//
// This is intended to be used like:
//
// b := expr.ExprBuilder{
// Reg: ...,
// BaseSchema: ...,
// }
// e, err := b.ScalarFunc(fnID, options...).Args(
// b.RootRef(expr.NewStructFieldRef(1)),
// b.ScalarFunc(fn2ID, options2...).Args(
// b.Wrap(expr.NewLiteral(int32(5), false /* nullable type */)),
// b.RootRef(expr.NewStructFieldRef(2))))
//
// See the unit tests for additional examples / constructs.
type ExprBuilder struct {
Reg ExtensionRegistry
BaseSchema *types.StructType
}

// Literal returns a wrapped literal that can be passed as an argument
// to any of the other expression builders such as ScalarFunc.Args.
func (e *ExprBuilder) Literal(l Literal) literalWrapper {
return literalWrapper{l, nil}
}

// Wrap is like Literal but allows propagating an error (such as
// when calling expr.NewLiteral) that will bubble up when attempting
// to build an expression so it doesn't get swallowed or force a panic.
func (e *ExprBuilder) Wrap(l Literal, err error) literalWrapper {
return literalWrapper{l, err}
}

// Enum wraps a string representing an Enum argument to a function being
// built.
func (e *ExprBuilder) Enum(val string) enumWrapper { return enumWrapper(val) }

// ScalarFunc returns a builder for the scalar function represented by the
// passed in ID and options. Use the Args method to add arguments to this
// builder. Validity of the ID, argument types and number of arguments will
// be checked at the point that the Build method is called to construct
// the final expression and will return an error if invalid.
//
// The extension registry inside of ExprBuilder will be used to resolve
// the ID, but only at the point at which Build is called. Therefore this
// can be called before actually loading the extensions as long as the
// extension identified by the ID is loaded into the registry *before*
// `Build` is called.
func (e *ExprBuilder) ScalarFunc(id extensions.ID, opts ...*types.FunctionOption) *scalarFuncBuilder {
return &scalarFuncBuilder{
b: e,
id: id,
opts: opts,
}
}

// WindowFunc returns a builder for the window function represented by the
// passed in ID and options. Other properties such as Arguments,
// aggregation phase, invocation, sort fields, etc. can be then added via
// individual methods on the returned builder. Validity of the ID, argument
// types and number of arguments will be checked at the point that the
// Build method is called to construct the final expression and will return
// an error if invalid.
//
// The extension registry inside of ExprBuilder will be used to resolve
// the ID, but only at the point at which Build is called. Therefore this
// can be called before actually loading the extensions as long as the
// extension identified by the ID is loaded into the registry *before*
// `Build` is called.
func (e *ExprBuilder) WindowFunc(id extensions.ID, opts ...*types.FunctionOption) *windowFuncBuilder {
return &windowFuncBuilder{
b: e,
id: id,
opts: opts,
}
}

// AggFunc returns a builder for the aggregate function represented by the
// passed in ID and options. Other properties such as Arguments,
// aggregation phase, invocation, sort fields, etc. can be then added via
// individual methods on the returned builder. Validity of the ID, argument
// types and number of arguments will be checked at the point that the
// Build method is called to construct the final expression and will return
// an error if invalid.
//
// The extension registry inside of ExprBuilder will be used to resolve
// the ID, but only at the point at which Build is called. Therefore this
// can be called before actually loading the extensions as long as the
// extension identified by the ID is loaded into the registry *before*
// `Build` is called.
func (e *ExprBuilder) AggFunc(id extensions.ID, opts ...*types.FunctionOption) *aggregateFuncBuilder {
return &aggregateFuncBuilder{
b: e,
id: id,
opts: opts,
}
}

// Ref constructs a field reference with the provided root and reference
// type. When `Build` is called on the returned builder, the `BaseSchema`
// in ExprBuilder will be used to resolve the type of the expression if
// relevant (such as a StructFieldRef/ListRef/MapKeyRef).
func (e *ExprBuilder) Ref(root RootRefType, ref Reference) *fieldRefBuilder {
return &fieldRefBuilder{
b: e, root: root, ref: ref,
}
}

// RootRef is a convenience method equivalent to calling ExprBuilder.Ref
// with `expr.RootReference` as the first argument.
func (e *ExprBuilder) RootRef(ref Reference) *fieldRefBuilder {
return e.Ref(RootReference, ref)
}

// Cast returns a builder for constructing a Cast expression. The failure
// behavior can be specified by calling FailBehavior before calling Build.
func (e *ExprBuilder) Cast(from Builder, to types.Type) *castBuilder {
return &castBuilder{
toType: to, input: from,
}
}

type literalWrapper struct {
wrapped Literal
err error
}

func (l literalWrapper) BuildFuncArg() (types.FuncArg, error) { return l.wrapped, l.err }
func (l literalWrapper) BuildExpr() (Expression, error) { return l.wrapped, l.err }

type enumWrapper string

func (e enumWrapper) BuildFuncArg() (types.FuncArg, error) { return types.Enum(e), nil }

type FuncArgBuilder interface {
BuildFuncArg() (types.FuncArg, error)
}

type castBuilder struct {
toType types.Type
input Builder
failureBehavior types.CastFailBehavior
}

func (cb *castBuilder) BuildExpr() (Expression, error) { return cb.Build() }
func (cb *castBuilder) BuildFuncArg() (types.FuncArg, error) { return cb.Build() }
func (cb *castBuilder) Build() (*Cast, error) {
in, err := cb.input.BuildExpr()
if err != nil {
return nil, err
}

return &Cast{
Type: cb.toType,
Input: in,
FailureBehavior: cb.failureBehavior,
}, nil
}

// FailBehavior sets the failure behavior for the resulting Cast expression
// that is built from this builder by calling the Build method.
func (cb *castBuilder) FailBehavior(b types.CastFailBehavior) *castBuilder {
cb.failureBehavior = b
return cb
}

type scalarFuncBuilder struct {
b *ExprBuilder

id extensions.ID
opts []*types.FunctionOption
args []FuncArgBuilder
}

func (sb *scalarFuncBuilder) Build() (*ScalarFunction, error) {
var err error
args := make([]types.FuncArg, len(sb.args))
for i, a := range sb.args {
if args[i], err = a.BuildFuncArg(); err != nil {
return nil, err
}
}

return NewScalarFunc(sb.b.Reg, sb.id, sb.opts, args...)
}

func (sb *scalarFuncBuilder) BuildExpr() (Expression, error) {
return sb.Build()
}

func (sb *scalarFuncBuilder) BuildFuncArg() (types.FuncArg, error) {
return sb.Build()
}

// Args sets the argument list for this builder. Subsequent calls to Args
// will *replace* the argument list, not append to it.
func (sb *scalarFuncBuilder) Args(args ...FuncArgBuilder) *scalarFuncBuilder {
sb.args = args
return sb
}

type windowFuncBuilder struct {
b *ExprBuilder

id extensions.ID
opts []*types.FunctionOption
args []FuncArgBuilder

phase types.AggregationPhase
invocation types.AggregationInvocation
partitions []Builder
sortList []SortField

lowerBound, upperBound Bound
}

func (wb *windowFuncBuilder) Build() (*WindowFunction, error) {
var err error
args := make([]types.FuncArg, len(wb.args))
for i, a := range wb.args {
if args[i], err = a.BuildFuncArg(); err != nil {
return nil, err
}
}

parts := make([]Expression, len(wb.partitions))
for i, p := range wb.partitions {
if parts[i], err = p.BuildExpr(); err != nil {
return nil, err
}
}

wf, err := NewWindowFunc(wb.b.Reg, wb.id, wb.opts, wb.invocation, wb.phase, args...)
if err != nil {
return nil, err
}

wf.Sorts, wf.LowerBound, wf.UpperBound = wb.sortList, wb.lowerBound, wb.upperBound
return wf, nil
}

func (wb *windowFuncBuilder) BuildFuncArg() (types.FuncArg, error) {
return wb.Build()
}

func (wb *windowFuncBuilder) BuildExpr() (Expression, error) {
return wb.Build()
}

// Args sets the argument list for this builder. Subsequent calls to Args
// will *replace* the argument list, not append to it.
func (wb *windowFuncBuilder) Args(args ...FuncArgBuilder) *windowFuncBuilder {
wb.args = args
return wb
}

// Phase sets the aggregation phase for the resulting WindowFunction
// expression that will be built by this builder.
func (wb *windowFuncBuilder) Phase(p types.AggregationPhase) *windowFuncBuilder {
wb.phase = p
return wb
}

// Invocation will set the Aggregation Invocation property for the
// resulting WindowFunction expression that will be built by this builder.
func (wb *windowFuncBuilder) Invocation(i types.AggregationInvocation) *windowFuncBuilder {
wb.invocation = i
return wb
}

// Sort sets the list of sort fields for this WindowFunction. Subsequent
// calls to Sort will replace the set of sort fields, not append to it.
func (wb *windowFuncBuilder) Sort(fields ...SortField) *windowFuncBuilder {
wb.sortList = fields
return wb
}

// Partitions sets the list of partitions for this WindowFunction. Subsequent
// calls to Partitions will replace the set of partitions, not append to it.
// This expects to receive other Builders and will validate that they produce
// valid expressions without errors at the time that `Build` is called.
func (wb *windowFuncBuilder) Partitions(parts ...Builder) *windowFuncBuilder {
wb.partitions = parts
return wb
}

func (wb *windowFuncBuilder) Bounds(lower, upper Bound) *windowFuncBuilder {
wb.lowerBound, wb.upperBound = lower, upper
return wb
}

type aggregateFuncBuilder struct {
b *ExprBuilder

id extensions.ID
opts []*types.FunctionOption
args []FuncArgBuilder

phase types.AggregationPhase
invocation types.AggregationInvocation
sortList []SortField
}

func (ab *aggregateFuncBuilder) Build() (*AggregateFunction, error) {
var err error
args := make([]types.FuncArg, len(ab.args))
for i, a := range ab.args {
if args[i], err = a.BuildFuncArg(); err != nil {
return nil, err
}
}

return NewAggregateFunc(ab.b.Reg, ab.id, ab.opts, ab.invocation, ab.phase, ab.sortList, args...)
}

// Args sets the argument list for this builder. Subsequent calls to Args
// will *replace* the argument list, not append to it.
func (ab *aggregateFuncBuilder) Args(args ...FuncArgBuilder) *aggregateFuncBuilder {
ab.args = args
return ab
}

// Phase sets the aggregation phase for the resulting Aggregate Function
// that will be built by this builder.
func (ab *aggregateFuncBuilder) Phase(p types.AggregationPhase) *aggregateFuncBuilder {
ab.phase = p
return ab
}

// Invocation will set the Aggregation Invocation property for the
// resulting AggregateFunction that will be built by this builder.
func (ab *aggregateFuncBuilder) Invocation(i types.AggregationInvocation) *aggregateFuncBuilder {
ab.invocation = i
return ab
}

// Sort sets the list of sort fields for this AggregateFunction. Subsequent
// calls to Sort will replace the set of sort fields, not append to it.
func (ab *aggregateFuncBuilder) Sorts(fields ...SortField) *aggregateFuncBuilder {
ab.sortList = fields
return ab
}

type fieldRefBuilder struct {
b *ExprBuilder

root RootRefType
ref Reference
}

func (rb *fieldRefBuilder) Build() (*FieldReference, error) {
return NewFieldRef(rb.root, rb.ref, rb.b.BaseSchema)
}

func (rb *fieldRefBuilder) BuildFuncArg() (types.FuncArg, error) {
return rb.Build()
}

func (rb *fieldRefBuilder) BuildExpr() (Expression, error) {
return rb.Build()
}
Loading

0 comments on commit 30fa08b

Please sign in to comment.