Skip to content

Commit

Permalink
apacheGH-134: Add Validity functions
Browse files Browse the repository at this point in the history
  • Loading branch information
zeroshade committed Sep 22, 2024
1 parent 12482ca commit 6695449
Show file tree
Hide file tree
Showing 12 changed files with 58,338 additions and 52,315 deletions.
25 changes: 25 additions & 0 deletions arrow/array/struct.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,31 @@ func NewStructArray(cols []arrow.Array, names []string) (*Struct, error) {
return NewStructArrayWithNulls(cols, names, nil, 0, 0)
}

// NewStructArrayWithFields builds a new Struct Array using the passed columns
// and provided fields. As opposed to NewStructArray, this allows you to provide
// the full fields to utilize for the struct column instead of just the names.
func NewStructArrayWithFields(cols []arrow.Array, fields []arrow.Field) (*Struct, error) {
if len(cols) != len(fields) {
return nil, fmt.Errorf("%w: mismatching number of fields and child arrays", arrow.ErrInvalid)
}
if len(cols) == 0 {
return nil, fmt.Errorf("%w: can't infer struct array length with 0 child arrays", arrow.ErrInvalid)
}

length := cols[0].Len()
children := make([]arrow.ArrayData, len(cols))
for i, c := range cols {
if length != c.Len() {
return nil, fmt.Errorf("%w: mismatching child array lengths", arrow.ErrInvalid)
}
children[i] = c.Data()
}

data := NewData(arrow.StructOf(fields...), length, []*memory.Buffer{nil}, children, 0, 0)
defer data.Release()
return NewStructData(data), nil
}

// NewStructArrayWithNulls is like NewStructArray as a convenience function,
// but also takes in a null bitmap, the number of nulls, and an optional offset
// to use for creating the Struct Array.
Expand Down
30 changes: 26 additions & 4 deletions arrow/compute/exprs/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ const (
SubstraitArithmeticFuncsURI = SubstraitDefaultURIPrefix + "functions_arithmetic.yaml"
// URI for official Substrait Comparison funcs extensions
SubstraitComparisonFuncsURI = SubstraitDefaultURIPrefix + "functions_comparison.yaml"
SubstraitBooleanFuncsURI = SubstraitDefaultURIPrefix + "functions_boolean.yaml"

TimestampTzTimezone = "UTC"
)
Expand Down Expand Up @@ -93,7 +94,7 @@ func init() {
}
}

for _, fn := range []string{"equal", "not_equal", "lt", "lte", "gt", "gte"} {
for _, fn := range []string{"equal", "not_equal", "lt", "lte", "gt", "gte", "is_null", "is_not_null", "is_nan"} {
err := DefaultExtensionIDRegistry.AddSubstraitScalarToArrow(
extensions.ID{URI: SubstraitComparisonFuncsURI, Name: fn},
simpleMapSubstraitToArrowFunc)
Expand All @@ -102,13 +103,30 @@ func init() {
}
}

for _, fn := range []string{"equal", "not_equal", "less", "less_equal", "greater", "greater_equal"} {
for _, fn := range []string{"equal", "not_equal", "less", "less_equal", "greater", "greater_equal", "is_null", "is_not_null", "is_nan"} {
err := DefaultExtensionIDRegistry.AddArrowToSubstrait(fn,
simpleMapArrowToSubstraitFunc(SubstraitComparisonFuncsURI))
if err != nil {
panic(err)
}
}

for _, fn := range []string{"and", "or"} {
err := DefaultExtensionIDRegistry.AddSubstraitScalarToArrow(
extensions.ID{URI: SubstraitBooleanFuncsURI, Name: fn},
simpleMapSubstraitToArrowFunc)
if err != nil {
panic(err)
}
}

for _, fn := range []string{"and_kleene", "or_kleene"} {
err := DefaultExtensionIDRegistry.AddArrowToSubstrait(fn,
simpleMapArrowToSubstraitFunc(SubstraitBooleanFuncsURI))
if err != nil {
panic(err)
}
}
}

type overflowBehavior string
Expand Down Expand Up @@ -168,13 +186,17 @@ var substraitToArrowFuncMap = map[string]string{
"gt": "greater",
"lte": "less_equal",
"gte": "greater_equal",
"or": "or_kleene",
"and": "and_kleene",
}

var arrowToSubstraitFuncMap = map[string]string{
"less": "lt",
"greater": "gt",
"less_equal": "lte",
"greater_equal": "gte",
"and_kleene": "and",
"or_kleene": "or",
}

func simpleMapSubstraitToArrowFunc(sf *expr.ScalarFunction) (fname string, opts compute.FunctionOptions, err error) {
Expand Down Expand Up @@ -553,9 +575,9 @@ func ToSubstraitType(dt arrow.DataType, nullable bool, ext ExtensionIDSet) (type
return &types.Float32Type{Nullability: nullability}, nil
case arrow.FLOAT64:
return &types.Float64Type{Nullability: nullability}, nil
case arrow.STRING:
case arrow.STRING, arrow.LARGE_STRING:
return &types.StringType{Nullability: nullability}, nil
case arrow.BINARY:
case arrow.BINARY, arrow.LARGE_BINARY:
return &types.BinaryType{Nullability: nullability}, nil
case arrow.DATE32:
return &types.DateType{Nullability: nullability}, nil
Expand Down
5 changes: 3 additions & 2 deletions arrow/compute/internal/kernels/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,15 @@ C2GOASM=c2goasm
CC=clang-11
CXX=clang++-11
C_FLAGS=-target x86_64-unknown-none -masm=intel -mno-red-zone -mstackrealign -mllvm -inline-threshold=5000 \
-fno-asynchronous-unwind-tables -fno-exceptions -fno-rtti -O3 -fno-builtin -ffast-math -fno-jump-tables -I_lib -I../../../../internal/utils/_lib
-fno-asynchronous-unwind-tables -fno-exceptions -fno-rtti -O3 -fno-builtin -fno-jump-tables \
-fno-math-errno -funsafe-math-optimizations -fno-rounding-math -fno-trapping-math -I_lib -I../../../../internal/utils/_lib
ASM_FLAGS_AVX2=-mavx2 -mfma
ASM_FLAGS_SSE4=-msse4
ASM_FLAGS_BMI2=-mbmi2
ASM_FLAGS_POPCNT=-mpopcnt

C_FLAGS_NEON=-O3 -fvectorize -mllvm -force-vector-width=16 -fno-asynchronous-unwind-tables -mno-red-zone -mstackrealign -fno-exceptions \
-fno-rtti -fno-builtin -ffast-math -fno-jump-tables -I_lib -I../../../../internal/utils/_lib
-fno-rtti -fno-builtin -fno-math-errno -funsafe-math-optimizations -fno-rounding-math -fno-trapping-math -fno-jump-tables -I_lib -I../../../../internal/utils/_lib

GO_SOURCES := $(shell find . -path ./_lib -prune -o -name '*.go' -not -name '*_test.go')
ALL_SOURCES := $(shell find . -path ./_lib -prune -o -name '*.go' -name '*.s' -not -name '*_test.go')
Expand Down
Loading

0 comments on commit 6695449

Please sign in to comment.