substrait-io · jvanstraten · Sep 12, 2022 · Sep 26, 2022 · Sep 28, 2022 · Sep 28, 2022
@@ -20,7 +20,7 @@ scalar_functions:
           delta = init_prec - 38
           prec = min(init_prec, 38)
           scale_after_borrow = max(init_scale - delta, min_scale)
-          scale = init_prec > 38 ? scale_after_borrow : init_scale
+          scale = if init_prec > 38 then scale_after_borrow else init_scale
           DECIMAL<prec, scale>
   -
     name: "subtract"
@@ -40,7 +40,7 @@ scalar_functions:
           delta = init_prec - 38
           prec = min(init_prec, 38)
           scale_after_borrow = max(init_scale - delta, min_scale)
-          scale = init_prec > 38 ? scale_after_borrow : init_scale
+          scale = if init_prec > 38 then scale_after_borrow else init_scale
           DECIMAL<prec, scale>
   -
     name: "multiply"
@@ -60,7 +60,7 @@ scalar_functions:
           delta = init_prec - 38
           prec = min(init_prec, 38)
           scale_after_borrow = max(init_scale - delta, min_scale)
-          scale = init_prec > 38 ? scale_after_borrow : init_scale
+          scale = if init_prec > 38 then scale_after_borrow else init_scale
           DECIMAL<prec, scale>
   -
     name: "divide"
@@ -80,7 +80,7 @@ scalar_functions:
           delta = init_prec - 38
           prec = min(init_prec, 38)
           scale_after_borrow = max(init_scale - delta, min_scale)
-          scale = init_prec > 38 ? scale_after_borrow : init_scale
+          scale = if init_prec > 38 then scale_after_borrow else init_scale
           DECIMAL<prec, scale>
   -
     name: "modulus"
@@ -100,7 +100,7 @@ scalar_functions:
           delta = init_prec - 38
           prec = min(init_prec, 38)
           scale_after_borrow = max(init_scale - delta, min_scale)
-          scale = init_prec > 38 ? scale_after_borrow : init_scale
+          scale = if init_prec > 38 then scale_after_borrow else init_scale
           DECIMAL<prec, scale>
 aggregate_functions:
   - name: "sum"
@@ -114,8 +114,8 @@ aggregate_functions:
             value: "DECIMAL<P, S>"
         nullability: DECLARED_OUTPUT
         decomposable: MANY
-        intermediate: "DECIMAL<38,S>?"
-        return: "DECIMAL<38,S>?"
+        intermediate: "DECIMAL?<38,S>"
+        return: "DECIMAL?<38,S>"
   - name: "avg"
     description: Average a set of values.
     impls:
@@ -137,8 +137,8 @@ aggregate_functions:
             value: "DECIMAL<P, S>"
         nullability: DECLARED_OUTPUT
         decomposable: MANY
-        intermediate: "DECIMAL<P, S>?"
-        return: "DECIMAL<P, S>?"
+        intermediate: "DECIMAL?<P, S>"
+        return: "DECIMAL?<P, S>"
   - name: "max"
     description: Max a set of values.
     impls:
@@ -147,5 +147,5 @@ aggregate_functions:
             value: "DECIMAL<P,S>"
         nullability: DECLARED_OUTPUT
         decomposable: MANY
-        intermediate: "DECIMAL<P, S>?"
-        return: "DECIMAL<P, S>?"
+        intermediate: "DECIMAL?<P, S>"
+        return: "DECIMAL?<P, S>"
@@ -0,0 +1,6 @@
+arrange:
+  - index.md
+  - scalar_functions.md
+  - aggregate_functions.md
+  - window_functions.md
+  - table_functions.md
@@ -13,8 +13,6 @@ Aggregate function signatures contain all the properties defined for [scalar fun
 | Intermediate Output Type | If the function is decomposable, represents the intermediate output type that is used, if the function is defined as either `ONE` or `MANY` decomposable. Will be a struct in many cases. | Required for `ONE` and `MANY`.      |
 | Invocation               | Whether the function uses all or only distinct values in the aggregation calculation. Valid options are: `ALL`, `DISTINCT`. | Optional, defaults to `ALL`     |
 
-
-
 ## Aggregate Binding
 
 When binding an aggregate function, the binding must include the following additional properties beyond the standard scalar binding properties:
@@ -24,3 +22,31 @@ When binding an aggregate function, the binding must include the following addit
 | Phase    | Describes the input type of the data: [INITIAL_TO_INTERMEDIATE, INTERMEDIATE_TO_INTERMEDIATE, INITIAL_TO_RESULT, INTERMEDIATE_TO_RESULT] describing what portion of the operation is required. For functions that are NOT decomposable, the only valid option will be INITIAL_TO_RESULT. |
 | Ordering | Zero or more ordering keys along with key order (ASC\|DESC\|NULL FIRST, etc.), declared similar to the sort keys in an `ORDER BY` relational operation. If no sorts are specified, the records are not sorted prior to being passed to the aggregate function. |
 
+When the phase is `*_TO_INTERMEDIATE`, the return type of the aggregate function is overridden to the intermediate type. When the phase is `INTERMEDIATE_TO_*`, non-constant value argument slots are overridden to behave like type argument slots instead, and an extra value argument is expected at the end of the actual argument list, of which the type matches the derived intermediate type exactly. Using the following function as an example:
+
+```
+min_max_difference(T??) -> STRUCT<T?,T?> ->
+    assert T == i8 || T == i16 || T == i32 || T == i64
+    T?
+```
+
+ - `INITIAL_TO_RESULT` would bind to `min_max_difference(i32)` and yield `i32?`;
+ - `INITIAL_TO_INTERMEDIATE` would bind to `min_max_difference(i32)` and yield `STRUCT<i32?, i32?>`;
+ - `INTERMEDIATE_TO_INTERMEDIATE` would bind to `min_max_difference(type i32, STRUCT<i32?, i32?>)` and yield `STRUCT<i32?, i32?>`;
+ - `INTERMEDIATE_TO_RESULT` would bind to `min_max_difference(type i32, STRUCT<i32?, i32?>)` and yield `i32?`;
+
+!!! note
+
+    The value to type argument replacement is necessary, because the intermediate and return type derivations may depend on it in a nontrivial and (in general) non-reversable way.
+
+## Pattern Matching and Evaluation Order
+
+The patterns used to define the argument types and return type are processed in the following order.
+
+ - Match the actual argument types against the argument slot patterns from left to right. The pattern from the last argument slot may be matched any number of times if the function is variadic. For `INTERMEDIATE_TO_*` bindings, the intermediate data input is not matched yet.
+ - Evaluate any statements in the return type specification from top to bottom/left to right.
+ - Evaluate the return type pattern (even for `*_TO_INTERMEDIATE` bindings where the result is not used; evaluation may still affect whether the function exists or not).
+ - Evaluate the intermediate type pattern, if one is specified (even for `INITIAL_TO_RESULT` bindings; note also that it is evaluated and *then* matched even for `INTERMEDIATE_TO_RESULT`).
+ - For `INTERMEDIATE_TO_*` bindings, match the above evaluation result against the data type passed to the last argument; they must be exactly equal.
+
+If any pattern fails to match or evaluate, the function is said to not match the given argument pack.