apache · Yoruet · Sep 24, 2024 · Sep 24, 2024 · Sep 24, 2024 · Sep 25, 2024
diff --git a/be/src/vec/aggregate_functions/aggregate_function_regr_intercept.cpp b/be/src/vec/aggregate_functions/aggregate_function_regr_intercept.cpp
@@ -0,0 +1,87 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#include "common/status.h"
+#include "vec/aggregate_functions/aggregate_function.h"
+#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
+#include "vec/aggregate_functions/aggregate_function_regr_intercept.h"
+#include "vec/aggregate_functions/helpers.h"
+#include "vec/core/types.h"
+#include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_nullable.h"
+
+namespace doris::vectorized {
+
+template <typename TX, typename TY>
+AggregateFunctionPtr type_dispatch_for_aggregate_function_regr_intercept(const DataTypes& argument_types,
+                                                                         const bool& result_is_nullable,
+                                                                         bool nullable_input) {
+    using StatFunctionTemplate = RegrInterceptFuncTwoArg<TX, TY>;
+    if (nullable_input) {
+        return creator_without_type::create_ignore_nullable<
+                AggregateFunctionRegrInterceptSimple<StatFunctionTemplate, true>>(
+                argument_types, result_is_nullable);
+    } else {
+        return creator_without_type::create_ignore_nullable<
+                AggregateFunctionRegrInterceptSimple<StatFunctionTemplate, false>>(
+                argument_types, result_is_nullable);
+    }
+}
+
+AggregateFunctionPtr create_aggregate_function_regr_intercept(const std::string& name,
+                                                              const DataTypes& argument_types,
+                                                              const bool result_is_nullable) {
+    if (argument_types.size() != 2) {
+        LOG(WARNING) << "aggregate function " << name << " requires exactly 2 arguments";
+        return nullptr;
+    }
+    if (!result_is_nullable) {
+        LOG(WARNING) << "aggregate function " << name << " requires nullable result type";
+        return nullptr;
+    }
+    const bool nullable_input = argument_types[0]->is_nullable() || argument_types[1]->is_nullable();
+    WhichDataType x_type(remove_nullable(argument_types[0]));
+    WhichDataType y_type(remove_nullable(argument_types[1]));
+
+#define DISPATCH(TX, TY)                                                                                   \
+    if (x_type.idx == TypeIndex::TX && y_type.idx == TypeIndex::TY)                                        \
+        return type_dispatch_for_aggregate_function_regr_intercept<TX, TY>(argument_types, result_is_nullable, \
+                                                                           nullable_input);
+#define FOR_ALL_NUMERIC_TYPE_PAIRS(M) \
+    M(UInt8, UInt8)   M(UInt8, Int8)   M(UInt8, Int16)   M(UInt8, Int32)   M(UInt8, Int64)   M(UInt8, Int128)   M(UInt8, Float32)   M(UInt8, Float64) \
+    M(Int8, UInt8)    M(Int8, Int8)    M(Int8, Int16)    M(Int8, Int32)    M(Int8, Int64)    M(Int8, Int128)    M(Int8, Float32)    M(Int8, Float64)  \
+    M(Int16, UInt8)   M(Int16, Int8)   M(Int16, Int16)   M(Int16, Int32)   M(Int16, Int64)   M(Int16, Int128)   M(Int16, Float32)   M(Int16, Float64) \
+    M(Int32, UInt8)   M(Int32, Int8)   M(Int32, Int16)   M(Int32, Int32)   M(Int32, Int64)   M(Int32, Int128)   M(Int32, Float32)   M(Int32, Float64) \
+    M(Int64, UInt8)   M(Int64, Int8)   M(Int64, Int16)   M(Int64, Int32)   M(Int64, Int64)   M(Int64, Int128)   M(Int64, Float32)   M(Int64, Float64) \
+    M(Int128, UInt8)  M(Int128, Int8)  M(Int128, Int16)  M(Int128, Int32)  M(Int128, Int64)  M(Int128, Int128)  M(Int128, Float32)  M(Int128, Float64)\
+    M(Float32, UInt8) M(Float32, Int8) M(Float32, Int16) M(Float32, Int32) M(Float32, Int64) M(Float32, Int128) M(Float32, Float32) M(Float32, Float64)\
+    M(Float64, UInt8) M(Float64, Int8) M(Float64, Int16) M(Float64, Int32) M(Float64, Int64) M(Float64, Int128) M(Float64, Float32) M(Float64, Float64)
+
+    FOR_ALL_NUMERIC_TYPE_PAIRS(DISPATCH)
+#undef DISPATCH
+#undef FOR_ALL_NUMERIC_TYPE_PAIRS
+
+    LOG(WARNING) << "Unsupported input types " << argument_types[0]->get_name()
+                 << " and " << argument_types[1]->get_name()
+                 << " for aggregate function " << name;
+    return nullptr;
+}
+
+void register_aggregate_function_regr_intercept(AggregateFunctionSimpleFactory& factory) {
+    factory.register_function_both("regr_intercept", create_aggregate_function_regr_intercept);
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/aggregate_functions/aggregate_function_regr_intercept.h b/be/src/vec/aggregate_functions/aggregate_function_regr_intercept.h
@@ -0,0 +1,189 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cmath>
+#include <cstdint>
+#include <string>
+#include <type_traits>
+
+#include "common/exception.h"
+#include "common/status.h"
+#include "vec/aggregate_functions/aggregate_function.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/columns/column_vector.h"
+#include "vec/common/assert_cast.h"
+#include "vec/core/field.h"
+#include "vec/core/types.h"
+#include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_nullable.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/io/io_helper.h"
+namespace doris::vectorized {
+
+template <typename T>
+struct AggregateFunctionRegrInterceptData {
+    UInt64 count = 0;
+    double sum_x {};
+    double sum_y {};
+    double sum_of_x_mul_y {};
+    double sum_of_x_squared {};
+
+    void write(BufferWritable& buf) const {
+        write_binary(sum_x, buf);
+        write_binary(sum_y, buf);
+        write_binary(sum_of_x_mul_y, buf);
+        write_binary(sum_of_x_squared, buf);
+        write_binary(count, buf);
+    }
+
+    void read(BufferReadable& buf) {
+        read_binary(sum_x, buf);
+        read_binary(sum_y, buf);
+        read_binary(sum_of_x_mul_y, buf);
+        read_binary(sum_of_x_squared, buf);
+        read_binary(count, buf);
+    }
+
+    void reset() {
+        sum_x = {};
+        sum_y = {};
+        sum_of_x_mul_y = {};
+        sum_of_x_squared = {};
+        count = 0;
+    }
+
+    double get_intercept_result() const {
+        double denominator = count * sum_of_x_squared - sum_x * sum_x;
+        if (count < 2 || denominator == 0.0) {
+            return std::numeric_limits<double>::quiet_NaN();
+        }
+        double slope = (count * sum_of_x_mul_y - sum_x * sum_y) / denominator;
+        return (sum_y - slope * sum_x) / count;
+    }
+
+    void merge(const AggregateFunctionRegrInterceptData& rhs) {
+        if (rhs.count == 0) {
+            return;
+        }
+        sum_x += rhs.sum_x;
+        sum_y += rhs.sum_y;
+        sum_of_x_mul_y += rhs.sum_of_x_mul_y;
+        sum_of_x_squared += rhs.sum_of_x_squared;
+        count += rhs.count;
+    }
+
+    void add(T value_y, T value_x) {
+        sum_x += value_x;
+        sum_y += value_y;
+        sum_of_x_mul_y += value_x * value_y;
+        sum_of_x_squared += value_x * value_x;
+        count += 1;
+    }
+};
+
+template <typename TX, typename TY>
+struct RegrInterceptFuncTwoArg {
+    using TypeX = TX;
+    using TypeY = TY;
+    using Data = AggregateFunctionRegrInterceptData<Float64>;
+};
+
+template <typename StatFunc, bool NullableInput>
+class AggregateFunctionRegrInterceptSimple
+        : public IAggregateFunctionDataHelper<
+                  typename StatFunc::Data,
+                  AggregateFunctionRegrInterceptSimple<StatFunc, NullableInput>> {
+public:
+    using TX = typename StatFunc::TypeX;
+    using TY = typename StatFunc::TypeY;
+    using XInputCol = ColumnVector<TX>;
+    using YInputCol = ColumnVector<TY>;
+    using ResultCol = ColumnVector<Float64>;
+
+    explicit AggregateFunctionRegrInterceptSimple(const DataTypes& argument_types_)
+            : IAggregateFunctionDataHelper<
+                      typename StatFunc::Data,
+                      AggregateFunctionRegrInterceptSimple<StatFunc, NullableInput>>(argument_types_) {
+        DCHECK(!argument_types_.empty());
+    }
+
+    String get_name() const override { return "regr_intercept"; }
+
+    DataTypePtr get_return_type() const override {
+        return make_nullable(std::make_shared<DataTypeFloat64>());
+    }
+
+    void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num,
+             Arena*) const override {
+        if constexpr (NullableInput) {
+            const ColumnNullable& y_column_nullable =
+                    assert_cast<const ColumnNullable&>(*columns[0]);
+            const ColumnNullable& x_column_nullable =
+                    assert_cast<const ColumnNullable&>(*columns[1]);
+            bool y_null = y_column_nullable.is_null_at(row_num);
+            bool x_null = x_column_nullable.is_null_at(row_num);
+            if (y_null || x_null) {
+                return;
+            } else {
+                TY y_value = assert_cast<const YInputCol&>(y_column_nullable.get_nested_column())
+                                        .get_data()[row_num];
+                TX x_value = assert_cast<const XInputCol&>(x_column_nullable.get_nested_column())
+                                        .get_data()[row_num];
+                this->data(place).add(static_cast<Float64>(y_value), static_cast<Float64>(x_value));
+            }
+        } else {
+            TY y_value = assert_cast<const YInputCol&>(*columns[0]).get_data()[row_num];
+            TX x_value = assert_cast<const XInputCol&>(*columns[1]).get_data()[row_num];
+            this->data(place).add(static_cast<Float64>(y_value), static_cast<Float64>(x_value));
+        }
+    }
+
+    void reset(AggregateDataPtr __restrict place) const override { this->data(place).reset(); }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs,
+               Arena*) const override {
+        this->data(place).merge(this->data(rhs));
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, BufferWritable& buf) const override {
+        this->data(place).write(buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, BufferReadable& buf,
+                     Arena*) const override {
+        this->data(place).read(buf);
+    }
+
+    void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& to) const override {
+        const auto& data = this->data(place);
+        auto& dst_column_with_nullable = assert_cast<ColumnNullable&>(to);
+        auto& dst_column =
+                assert_cast<ResultCol&>(dst_column_with_nullable.get_nested_column());
+        Float64 intercept = data.get_intercept_result();
+        if (std::isnan(intercept)) {
+            dst_column_with_nullable.get_null_map_data().push_back(1);
+            dst_column.insert_default();
+        } else {
+            dst_column_with_nullable.get_null_map_data().push_back(0);
+            dst_column.get_data().push_back(intercept);
+        }
+    }
+};
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/aggregate_functions/aggregate_function_regr_slope.cpp b/be/src/vec/aggregate_functions/aggregate_function_regr_slope.cpp
@@ -0,0 +1,86 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#include "common/status.h"
+#include "vec/aggregate_functions/aggregate_function.h"
+#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
+#include "vec/aggregate_functions/aggregate_function_regr_slope.h"
+#include "vec/aggregate_functions/helpers.h"
+#include "vec/core/types.h"
+#include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_nullable.h"
+
+namespace doris::vectorized {
+
+template <typename TX, typename TY>
+AggregateFunctionPtr type_dispatch_for_aggregate_function_regr_slope(const DataTypes& argument_types,
+                                                                     const bool& result_is_nullable,
+                                                                     bool nullable_input) {
+    using StatFunctionTemplate = RegrSlopeFuncTwoArg<TX, TY>;
+    if (nullable_input) {
+        return creator_without_type::create_ignore_nullable<
+                AggregateFunctionRegrSlopeSimple<StatFunctionTemplate, true>>(
+                argument_types, result_is_nullable);
+    } else {
+        return creator_without_type::create_ignore_nullable<
+                AggregateFunctionRegrSlopeSimple<StatFunctionTemplate, false>>(
+                argument_types, result_is_nullable);
+    }
+}
+
+AggregateFunctionPtr create_aggregate_function_regr_slope(const std::string& name,
+                                                          const DataTypes& argument_types,
+                                                          const bool result_is_nullable) {
+    if (argument_types.size() != 2) {
+        LOG(WARNING) << "aggregate function " << name << " requires exactly 2 arguments";
+        return nullptr;
+    }
+    if (!result_is_nullable) {
+        LOG(WARNING) << "aggregate function " << name << " requires nullable result type";
+        return nullptr;
+    }
+    const bool nullable_input = argument_types[0]->is_nullable() || argument_types[1]->is_nullable();
+    WhichDataType x_type(remove_nullable(argument_types[0]));
+    WhichDataType y_type(remove_nullable(argument_types[1]));
+
+#define DISPATCH(TX, TY)                                                                                   \
+    if (x_type.idx == TypeIndex::TX && y_type.idx == TypeIndex::TY)                                        \
+        return type_dispatch_for_aggregate_function_regr_slope<TX, TY>(argument_types, result_is_nullable, \
+                                                                             nullable_input);
+#define FOR_ALL_NUMERIC_TYPE_PAIRS(M) \
+    M(UInt8, UInt8)   M(UInt8, Int8)   M(UInt8, Int16)   M(UInt8, Int32)   M(UInt8, Int64)   M(UInt8, Int128)   M(UInt8, Float32)   M(UInt8, Float64) \
+    M(Int8, UInt8)    M(Int8, Int8)    M(Int8, Int16)    M(Int8, Int32)    M(Int8, Int64)    M(Int8, Int128)    M(Int8, Float32)    M(Int8, Float64)  \
+    M(Int16, UInt8)   M(Int16, Int8)   M(Int16, Int16)   M(Int16, Int32)   M(Int16, Int64)   M(Int16, Int128)   M(Int16, Float32)   M(Int16, Float64) \
+    M(Int32, UInt8)   M(Int32, Int8)   M(Int32, Int16)   M(Int32, Int32)   M(Int32, Int64)   M(Int32, Int128)   M(Int32, Float32)   M(Int32, Float64) \
+    M(Int64, UInt8)   M(Int64, Int8)   M(Int64, Int16)   M(Int64, Int32)   M(Int64, Int64)   M(Int64, Int128)   M(Int64, Float32)   M(Int64, Float64) \
+    M(Int128, UInt8)  M(Int128, Int8)  M(Int128, Int16)  M(Int128, Int32)  M(Int128, Int64)  M(Int128, Int128)  M(Int128, Float32)  M(Int128, Float64)\
+    M(Float32, UInt8) M(Float32, Int8) M(Float32, Int16) M(Float32, Int32) M(Float32, Int64) M(Float32, Int128) M(Float32, Float32) M(Float32, Float64)\
+    M(Float64, UInt8) M(Float64, Int8) M(Float64, Int16) M(Float64, Int32) M(Float64, Int64) M(Float64, Int128) M(Float64, Float32) M(Float64, Float64)
+
+    FOR_ALL_NUMERIC_TYPE_PAIRS(DISPATCH)
+#undef DISPATCH
+#undef FOR_ALL_NUMERIC_TYPE_PAIRS
+
+    LOG(WARNING) << "Unsupported input types " << argument_types[0]->get_name()
+                 << " and " << argument_types[1]->get_name()
+                 << " for aggregate function " << name;
+    return nullptr;
+}
+
+void register_aggregate_function_regr_slope(AggregateFunctionSimpleFactory& factory) {
+    factory.register_function_both("regr_slope", create_aggregate_function_regr_slope);
+}
+} // namespace doris::vectorized