From bd43690eddbb612e11a524db5fcb632c9cf751fd Mon Sep 17 00:00:00 2001
From: exmy <xumovens@gmail.com>
Date: Wed, 26 Apr 2023 15:45:38 +0800
Subject: [PATCH] [GLUTEN-1392][CH] Support new ExpandRel (#1432)

What changes were proposed in this pull request?
support new ExpandRel introduced by #1361

(Fixes: #1392)

How was this patch tested?
unit tests
---
 .gitignore                                    |   4 +
 .../backendsapi/clickhouse/CHBackend.scala    |   2 +
 .../GlutenClickHouseTPCHParquetSuite.scala    |  46 +++++++
 cpp-ch/local-engine/Operator/ExpandStep.cpp   |  45 ++-----
 cpp-ch/local-engine/Operator/ExpandStep.h     |  12 +-
 .../local-engine/Operator/ExpandTransform.cpp |  72 ++++++-----
 cpp-ch/local-engine/Operator/ExpandTransorm.h |   7 +-
 cpp-ch/local-engine/Parser/ExpandField.h      |  42 +++++++
 .../local-engine/Parser/ExpandRelParser.cpp   | 112 +++++++++++-------
 cpp-ch/local-engine/Parser/ExpandRelParser.h  |   4 +-
 .../Parser/SerializedPlanParser.cpp           |   8 +-
 .../clickhouse/ClickHouseTestSettings.scala   |  11 --
 12 files changed, 232 insertions(+), 133 deletions(-)
 create mode 100644 cpp-ch/local-engine/Parser/ExpandField.h

diff --git a/.gitignore b/.gitignore
index b39acbdf63dd..d89be1976c42 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,6 +16,10 @@
 # vscode config
 .vscode
 
+# vscode scala
+.bloop
+.metals
+
 # Mobile Tools for Java (J2ME)
 .mtj.tmp/
 
diff --git a/backends-clickhouse/src/main/scala/io/glutenproject/backendsapi/clickhouse/CHBackend.scala b/backends-clickhouse/src/main/scala/io/glutenproject/backendsapi/clickhouse/CHBackend.scala
index 1973e4e54a67..469c5f1b8b82 100644
--- a/backends-clickhouse/src/main/scala/io/glutenproject/backendsapi/clickhouse/CHBackend.scala
+++ b/backends-clickhouse/src/main/scala/io/glutenproject/backendsapi/clickhouse/CHBackend.scala
@@ -135,6 +135,8 @@ object CHBackendSettings extends BackendSettings with Logging {
 
   override def supportExpandExec(): Boolean = true
 
+  override def supportNewExpandContract(): Boolean = true
+
   override def excludeScanExecFromCollapsedStage(): Boolean =
     SQLConf.get
       .getConfString(GLUTEN_CLICKHOUSE_SEP_SCAN_RDD, GLUTEN_CLICKHOUSE_SEP_SCAN_RDD_DEFAULT)
diff --git a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetSuite.scala b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetSuite.scala
index 787835daef75..18d6b0727682 100644
--- a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetSuite.scala
+++ b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetSuite.scala
@@ -666,6 +666,52 @@ class GlutenClickHouseTPCHParquetSuite extends GlutenClickHouseTPCHAbstractSuite
     compareResultsAgainstVanillaSpark(sql, true, { _ => })
   }
 
+  test("expand with nullable type not match") {
+    val sql =
+      """
+        |select a, n_regionkey, n_nationkey from
+        |(select nvl(n_name, "aaaa") as a, n_regionkey, n_nationkey from nation)
+        |group by n_regionkey, n_nationkey
+        |grouping sets((a, n_regionkey, n_nationkey),(a, n_regionkey), (a))
+        |order by a, n_regionkey, n_nationkey
+        |""".stripMargin
+    runQueryAndCompare(sql)(checkOperatorMatch[ExpandExecTransformer])
+  }
+
+  test("expand col result") {
+    val sql =
+      """
+        |select n_regionkey, n_nationkey, count(1) as cnt from nation
+        |group by n_regionkey, n_nationkey with rollup
+        |order by n_regionkey, n_nationkey, cnt
+        |""".stripMargin
+    runQueryAndCompare(sql)(checkOperatorMatch[ExpandExecTransformer])
+  }
+
+  test("expand with not nullable") {
+    val sql =
+      """
+        |select a,b, sum(c) from
+        |(select nvl(n_nationkey, 0) as c, nvl(n_name, '') as b, nvl(n_nationkey, 0) as a from nation)
+        |group by a,b with rollup
+        |""".stripMargin
+    runQueryAndCompare(sql)(checkOperatorMatch[ExpandExecTransformer])
+  }
+
+  test("expand with function expr") {
+    val sql =
+      """
+        |select
+        | n_name,
+        | count(distinct n_regionkey) as col1,
+        | count(distinct concat(n_regionkey, n_nationkey)) as col2
+        |from nation
+        |group by n_name
+        |order by n_name, col1, col2
+        |""".stripMargin
+    runQueryAndCompare(sql)(checkOperatorMatch[ExpandExecTransformer])
+  }
+
   test("test 'position/locate'") {
     runQueryAndCompare(
       """
diff --git a/cpp-ch/local-engine/Operator/ExpandStep.cpp b/cpp-ch/local-engine/Operator/ExpandStep.cpp
index d69ae30236d5..ce38233963ae 100644
--- a/cpp-ch/local-engine/Operator/ExpandStep.cpp
+++ b/cpp-ch/local-engine/Operator/ExpandStep.cpp
@@ -32,16 +32,12 @@ static DB::ITransformingStep::Traits getTraits()
 
 ExpandStep::ExpandStep(
     const DB::DataStream & input_stream_,
-    const std::vector<size_t> & aggregating_expressions_columns_,
-    const std::vector<std::set<size_t>> & grouping_sets_,
-    const std::string & grouping_id_name_)
+    const ExpandField & project_set_exprs_)
     : DB::ITransformingStep(
         input_stream_,
-        buildOutputHeader(input_stream_.header, aggregating_expressions_columns_, grouping_id_name_),
+        buildOutputHeader(input_stream_.header, project_set_exprs_),
         getTraits())
-    , aggregating_expressions_columns(aggregating_expressions_columns_)
-    , grouping_sets(grouping_sets_)
-    , grouping_id_name(grouping_id_name_)
+    , project_set_exprs(project_set_exprs_)
 {
     header = input_stream_.header;
     output_header = getOutputStream().header;
@@ -49,36 +45,21 @@ ExpandStep::ExpandStep(
 
 DB::Block ExpandStep::buildOutputHeader(
     const DB::Block & input_header,
-    const std::vector<size_t> & aggregating_expressions_columns_,
-    const std::string & grouping_id_name_)
+    const ExpandField & project_set_exprs_)
 {
     DB::ColumnsWithTypeAndName cols;
-    std::set<size_t> agg_cols;
+    const auto & types = project_set_exprs_.getTypes();
+    const auto & names = project_set_exprs_.getNames();
 
-    for (size_t i = 0; i < input_header.columns(); ++i)
+    for (size_t i = 0; i < project_set_exprs_.getExpandCols(); ++i)
     {
-        const auto & old_col = input_header.getByPosition(i);
-        if (i < aggregating_expressions_columns_.size())
-        {
-            // do nothing with the aggregating columns.
-            cols.push_back(old_col);
-            continue;
-        }
-        if (old_col.type->isNullable())
-            cols.push_back(old_col);
+        String col_name;
+        if (!names[i].empty())
+            col_name = names[i];
         else
-        {
-            auto null_map = DB::ColumnUInt8::create(0, 0);
-            auto null_col = DB::ColumnNullable::create(old_col.column, std::move(null_map));
-            auto null_type = std::make_shared<DB::DataTypeNullable>(old_col.type);
-            cols.push_back(DB::ColumnWithTypeAndName(null_col, null_type, old_col.name));
-        }
+            col_name = "expand_" + std::to_string(i);
+        cols.push_back(DB::ColumnWithTypeAndName(types[i], col_name));
     }
-
-    // add group id column
-    auto grouping_id_col = DB::ColumnInt64::create(0, 0);
-    auto grouping_id_type = std::make_shared<DB::DataTypeInt64>();
-    cols.emplace_back(DB::ColumnWithTypeAndName(std::move(grouping_id_col), grouping_id_type, grouping_id_name_));
     return DB::Block(cols);
 }
 
@@ -89,7 +70,7 @@ void ExpandStep::transformPipeline(DB::QueryPipelineBuilder & pipeline, const DB
         DB::Processors new_processors;
         for (auto & output : outputs)
         {
-            auto expand_op = std::make_shared<ExpandTransform>(header, output_header, aggregating_expressions_columns, grouping_sets);
+            auto expand_op = std::make_shared<ExpandTransform>(header, output_header, project_set_exprs);
             new_processors.push_back(expand_op);
             DB::connect(*output, expand_op->getInputs().front());
         }
diff --git a/cpp-ch/local-engine/Operator/ExpandStep.h b/cpp-ch/local-engine/Operator/ExpandStep.h
index 3b12432df9cd..1de3bc4aefab 100644
--- a/cpp-ch/local-engine/Operator/ExpandStep.h
+++ b/cpp-ch/local-engine/Operator/ExpandStep.h
@@ -3,6 +3,7 @@
 #include <Core/Block.h>
 #include <Processors/QueryPlan/IQueryPlanStep.h>
 #include <Processors/QueryPlan/ITransformingStep.h>
+#include <Parser/ExpandField.h>
 
 namespace local_engine
 {
@@ -12,9 +13,7 @@ class ExpandStep : public DB::ITransformingStep
     // The input stream should only contain grouping columns.
     explicit ExpandStep(
         const DB::DataStream & input_stream_,
-        const std::vector<size_t> & aggregating_expressions_columns_,
-        const std::vector<std::set<size_t>> & grouping_sets_,
-        const std::string & grouping_id_name_);
+        const ExpandField & project_set_exprs_);
     ~ExpandStep() override = default;
 
     String getName() const override { return "ExpandStep"; }
@@ -22,9 +21,7 @@ class ExpandStep : public DB::ITransformingStep
     void transformPipeline(DB::QueryPipelineBuilder & pipeline, const DB::BuildQueryPipelineSettings & settings) override;
     void describePipeline(DB::IQueryPlanStep::FormatSettings & settings) const override;
 private:
-    std::vector<size_t> aggregating_expressions_columns;
-    std::vector<std::set<size_t>> grouping_sets;
-    std::string grouping_id_name;
+    ExpandField project_set_exprs;
     DB::Block header;
     DB::Block output_header;
 
@@ -32,7 +29,6 @@ class ExpandStep : public DB::ITransformingStep
 
     static DB::Block buildOutputHeader(
         const DB::Block & header,
-        const std::vector<size_t> & aggregating_expressions_columns_,
-        const std::string & grouping_id_name_);
+        const ExpandField & project_set_exprs_);
 };
 }
diff --git a/cpp-ch/local-engine/Operator/ExpandTransform.cpp b/cpp-ch/local-engine/Operator/ExpandTransform.cpp
index c083c1afa552..dadd2164c477 100644
--- a/cpp-ch/local-engine/Operator/ExpandTransform.cpp
+++ b/cpp-ch/local-engine/Operator/ExpandTransform.cpp
@@ -4,8 +4,10 @@
 #include <Columns/ColumnsNumber.h>
 #include <Columns/IColumn.h>
 #include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypeNullable.h>
 #include <Processors/IProcessor.h>
 
+#include "Common/Exception.h"
 #include <Common/logger_useful.h>
 #include <Poco/Logger.h>
 
@@ -14,11 +16,9 @@ namespace local_engine
 ExpandTransform::ExpandTransform(
     const DB::Block & input_,
     const DB::Block & output_,
-    const std::vector<size_t> & aggregating_expressions_columns_,
-    const std::vector<std::set<size_t>> & grouping_sets_)
+    const ExpandField & project_set_exprs_)
     : DB::IProcessor({input_}, {output_})
-    , aggregating_expressions_columns(aggregating_expressions_columns_)
-    , grouping_sets(grouping_sets_)
+    , project_set_exprs(project_set_exprs_)
 {}
 
 ExpandTransform::Status ExpandTransform::prepare()
@@ -68,43 +68,57 @@ ExpandTransform::Status ExpandTransform::prepare()
 void ExpandTransform::work()
 {
     assert(expanded_chunks.empty());
-    size_t agg_cols_size = aggregating_expressions_columns.size();
-    for (int set_id = 0; static_cast<size_t>(set_id) < grouping_sets.size(); ++set_id)
+    const auto & original_cols = input_chunk.getColumns();
+    size_t rows = input_chunk.getNumRows();
+
+    for (size_t i = 0; i < project_set_exprs.getExpandRows(); ++i)
     {
-        const auto & sets = grouping_sets[set_id];
         DB::Columns cols;
-        const auto & original_cols = input_chunk.getColumns();
-        for (size_t i = 0; i < original_cols.size(); ++i)
+        for (size_t j = 0; j < project_set_exprs.getExpandCols(); ++j)
         {
-            const auto & original_col = original_cols[i];
-            size_t rows = original_col->size();
-            if (i < agg_cols_size)
-            {
-                cols.push_back(original_col);
-                continue;
-            }
-            // the output columns should all be nullable.
-            if (!sets.contains(i))
-            {
-                auto null_map = DB::ColumnUInt8::create(rows, 1);
-                auto col = DB::ColumnNullable::create(original_col, std::move(null_map));
-                cols.push_back(std::move(col));
-            }
-            else
+            const auto & type = project_set_exprs.getTypes()[j];
+            const auto & kind = project_set_exprs.getKinds()[i][j];
+            const auto & field = project_set_exprs.getFields()[i][j];
+            
+            if (kind == EXPAND_FIELD_KIND_SELECTION)
             {
-                if (original_col->isNullable())
+                const auto & original_col = original_cols[field.get<Int32>()];
+                if (type->isNullable() == original_col->isNullable())
+                {
                     cols.push_back(original_col);
-                else
+                }
+                else if (type->isNullable() && !original_col->isNullable())
                 {
                     auto null_map = DB::ColumnUInt8::create(rows, 0);
                     auto col = DB::ColumnNullable::create(original_col, std::move(null_map));
                     cols.push_back(std::move(col));
                 }
+                else
+                {
+                    throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR,
+                        "Miss match nullable, column {} is nullable, but type {} is not nullable",
+                        original_col->getName(), type->getName());
+                }
+            }
+            else
+            {
+                if (field.isNull())
+                {
+                    // Add null column
+                    auto null_map = DB::ColumnUInt8::create(rows, 1);
+                    auto nested_type = DB::removeNullable(type);
+                    auto col = DB::ColumnNullable::create(nested_type->createColumn()->cloneResized(rows), std::move(null_map));
+                    cols.push_back(std::move(col));
+                }
+                else
+                {
+                    // Add constant column: gid, gpos, etc.
+                    auto col = type->createColumnConst(rows, field);
+                    cols.push_back(std::move(col->convertToFullColumnIfConst()));
+                }
             }
         }
-        auto id_col = DB::DataTypeInt64().createColumnConst(input_chunk.getNumRows(), set_id);
-        cols.push_back(std::move(id_col));
-        expanded_chunks.push_back(DB::Chunk(cols, input_chunk.getNumRows()));
+        expanded_chunks.push_back(DB::Chunk(cols, rows));
     }
     has_output = true;
     has_input = false;
diff --git a/cpp-ch/local-engine/Operator/ExpandTransorm.h b/cpp-ch/local-engine/Operator/ExpandTransorm.h
index b131c36cd809..e2579dddc39a 100644
--- a/cpp-ch/local-engine/Operator/ExpandTransorm.h
+++ b/cpp-ch/local-engine/Operator/ExpandTransorm.h
@@ -5,6 +5,7 @@
 #include <Processors/Chunk.h>
 #include <Processors/IProcessor.h>
 #include <Processors/Port.h>
+#include <Parser/ExpandField.h>
 namespace local_engine
 {
 // For handling substrait expand node.
@@ -21,16 +22,14 @@ class ExpandTransform : public DB::IProcessor
     ExpandTransform(
         const DB::Block & input_,
         const DB::Block & output_,
-        const std::vector<size_t> & aggregating_expressions_columns_,
-        const std::vector<std::set<size_t>> & grouping_sets_);
+        const ExpandField & project_set_exprs_);
 
     Status prepare() override;
     void work() override;
 
     DB::String getName() const override { return "ExpandTransform"; }
 private:
-    std::vector<size_t> aggregating_expressions_columns;
-    std::vector<std::set<size_t>> grouping_sets;
+    ExpandField project_set_exprs;
     bool has_input = false;
     bool has_output = false;
 
diff --git a/cpp-ch/local-engine/Parser/ExpandField.h b/cpp-ch/local-engine/Parser/ExpandField.h
new file mode 100644
index 000000000000..4c15f7f93c6e
--- /dev/null
+++ b/cpp-ch/local-engine/Parser/ExpandField.h
@@ -0,0 +1,42 @@
+#pragma once
+
+#include <Core/Field.h>
+#include <DataTypes/IDataType.h>
+
+namespace local_engine
+{
+
+enum ExpandFieldKind
+{
+    EXPAND_FIELD_KIND_SELECTION,
+    EXPAND_FIELD_KIND_LITERAL,
+};
+
+class ExpandField
+{
+public:
+    ExpandField() = default;
+    ExpandField(
+        const std::vector<std::string> & names_,
+        const std::vector<DB::DataTypePtr> & types_,
+        const std::vector<std::vector<ExpandFieldKind>> & kinds_,
+        const std::vector<std::vector<DB::Field>> & fields_):
+        names(names_), types(types_), kinds(kinds_), fields(fields_)
+    {}
+
+    const std::vector<std::string> & getNames() const { return names; }
+    const std::vector<DB::DataTypePtr> & getTypes() const { return types; }
+    const std::vector<std::vector<ExpandFieldKind>> & getKinds() const { return kinds; }
+    const std::vector<std::vector<DB::Field>> & getFields() const { return fields; }
+
+    size_t getExpandRows() const { return kinds.size(); }
+    size_t getExpandCols() const { return types.size(); }
+
+private:
+    std::vector<std::string> names;
+    std::vector<DB::DataTypePtr> types;
+    std::vector<std::vector<ExpandFieldKind>> kinds;
+    std::vector<std::vector<DB::Field>> fields;
+};
+
+}
diff --git a/cpp-ch/local-engine/Parser/ExpandRelParser.cpp b/cpp-ch/local-engine/Parser/ExpandRelParser.cpp
index f5d086a75ae1..5e5c5712c2ba 100644
--- a/cpp-ch/local-engine/Parser/ExpandRelParser.cpp
+++ b/cpp-ch/local-engine/Parser/ExpandRelParser.cpp
@@ -8,6 +8,8 @@
 #include <Core/Block.h>
 #include <Core/ColumnWithTypeAndName.h>
 #include <Processors/QueryPlan/ExpressionStep.h>
+#include <vector>
+#include <Parser/ExpandField.h>
 
 namespace DB
 {
@@ -23,62 +25,88 @@ ExpandRelParser::ExpandRelParser(SerializedPlanParser * plan_parser_)
     : RelParser(plan_parser_)
 {}
 
-DB::QueryPlanPtr
-ExpandRelParser::parse(DB::QueryPlanPtr query_plan, const substrait::Rel  & rel, std::list<const substrait::Rel*> & rel_stack)
+void updateType(DB::DataTypePtr & type, const DB::DataTypePtr & new_type)
 {
-    const auto & expand_rel = rel.group_id();
-    std::vector<size_t> aggregating_expressions_columns;
-    std::set<size_t> agg_cols_ref;
-    const auto & header = query_plan->getCurrentDataStream().header;
-    for (int i = 0; i < expand_rel.aggregate_expressions_size(); ++i)
+    if (type == nullptr || (!type->isNullable() && new_type->isNullable()))
     {
-        const auto & expr = expand_rel.aggregate_expressions(i);
-        if (expr.has_selection())
-        {
-            aggregating_expressions_columns.push_back(expr.selection().direct_reference().struct_field().field());
-            agg_cols_ref.insert(expr.selection().direct_reference().struct_field().field());
-        }
-        else
-        {
-            // FIXEME. see https://github.com/oap-project/gluten/pull/794
-            throw DB::Exception(
-                DB::ErrorCodes::LOGICAL_ERROR,
-                "Unsupported aggregating expression in expand node. {}. input header:{}.",
-                expr.ShortDebugString(),
-                header.dumpNames());
-        }
+        type = new_type;
     }
-    std::vector<std::set<size_t>> grouping_sets;
-    buildGroupingSets(expand_rel, grouping_sets);
-    // The input header is : aggregating columns + grouping columns.
-    auto expand_step = std::make_unique<ExpandStep>(
-        query_plan->getCurrentDataStream(), aggregating_expressions_columns, grouping_sets, expand_rel.group_name());
-    expand_step->setStepDescription("Expand step");
-    query_plan->addStep(std::move(expand_step));
-    return query_plan;
 }
 
-
-void ExpandRelParser::buildGroupingSets(const substrait::GroupIdRel & expand_rel, std::vector<std::set<size_t>> & grouping_sets)
+DB::QueryPlanPtr
+ExpandRelParser::parse(DB::QueryPlanPtr query_plan, const substrait::Rel & rel, std::list<const substrait::Rel*> & rel_stack)
 {
-    for (int i = 0; i < expand_rel.groupings_size(); ++i)
+    const auto & expand_rel = rel.expand();
+    const auto & header = query_plan->getCurrentDataStream().header;
+
+    std::vector<std::vector<ExpandFieldKind>> expand_kinds;
+    std::vector<std::vector<DB::Field>> expand_fields;
+    std::vector<DB::DataTypePtr> types;
+    std::vector<std::string> names;
+    std::set<String> distinct_names;
+
+    expand_kinds.reserve(expand_rel.fields_size());
+    expand_fields.reserve(expand_rel.fields_size());
+
+    for (const auto & projections: expand_rel.fields())
     {
-        const auto grouping_set_pb = expand_rel.groupings(i);
-        std::set<size_t> grouping_set;
-        for (int n = 0; n < grouping_set_pb.groupsets_expressions_size(); ++n)
+        auto expand_col_size = projections.switching_field().duplicates_size();
+    
+        std::vector<ExpandFieldKind> kinds;
+        std::vector<DB::Field> fields;
+
+        kinds.reserve(expand_col_size);
+        fields.reserve(expand_col_size);
+
+        if (types.empty()) types.resize(expand_col_size, nullptr);
+        if (names.empty()) names.resize(expand_col_size);
+
+        for (int i = 0; i < expand_col_size; ++i)
         {
-            const auto & expr = grouping_set_pb.groupsets_expressions(n);
-            if (expr.has_selection())
+            const auto & project_expr = projections.switching_field().duplicates(i);
+            if (project_expr.has_selection())
             {
-                grouping_set.insert(expr.selection().direct_reference().struct_field().field());
+                auto field = project_expr.selection().direct_reference().struct_field().field();
+                kinds.push_back(ExpandFieldKind::EXPAND_FIELD_KIND_SELECTION);
+                fields.push_back(field);
+                updateType(types[i], header.getByPosition(field).type);
+                const auto & name = header.getByPosition(field).name;
+                if (names[i].empty())
+                {
+                    if (distinct_names.contains(name))
+                    {
+                        auto unique_name = getUniqueName(name);
+                        distinct_names.emplace(unique_name);
+                        names[i] = unique_name;
+                    }
+                    else
+                    {
+                        distinct_names.emplace(name);
+                        names[i] = name;
+                    }
+                }
+            }
+            else if (project_expr.has_literal())
+            {
+                auto [type, field] = parseLiteral(project_expr.literal());
+                kinds.push_back(ExpandFieldKind::EXPAND_FIELD_KIND_LITERAL);
+                fields.push_back(field);
+                updateType(types[i], type);
             }
             else
             {
-                throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Unsupported expression in grouping sets");
+                throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Unsupported expression in projections");
             }
         }
-        grouping_sets.emplace_back(std::move(grouping_set));
+        expand_kinds.push_back(std::move(kinds));
+        expand_fields.push_back(std::move(fields));
     }
+
+    ExpandField expand_field(names, types, expand_kinds, expand_fields);
+    auto expand_step = std::make_unique<ExpandStep>(query_plan->getCurrentDataStream(), std::move(expand_field));
+    expand_step->setStepDescription("Expand Step");
+    query_plan->addStep(std::move(expand_step));
+    return query_plan;
 }
 
 void registerExpandRelParser(RelParserFactory & factory)
@@ -87,6 +115,6 @@ void registerExpandRelParser(RelParserFactory & factory)
     {
         return std::make_shared<ExpandRelParser>(plan_parser);
     };
-    factory.registerBuilder(substrait::Rel::RelTypeCase::kGroupId, builder);
+    factory.registerBuilder(substrait::Rel::RelTypeCase::kExpand, builder);
 }
 }
diff --git a/cpp-ch/local-engine/Parser/ExpandRelParser.h b/cpp-ch/local-engine/Parser/ExpandRelParser.h
index 02d0978c9f42..a29e349b8cd4 100644
--- a/cpp-ch/local-engine/Parser/ExpandRelParser.h
+++ b/cpp-ch/local-engine/Parser/ExpandRelParser.h
@@ -10,8 +10,6 @@ class ExpandRelParser : public RelParser
     explicit ExpandRelParser(SerializedPlanParser * plan_parser_);
     ~ExpandRelParser() override = default;
     DB::QueryPlanPtr
-    parse(DB::QueryPlanPtr query_plan, const substrait::Rel & sort_rel, std::list<const substrait::Rel *> & rel_stack_) override;
-private:
-    static void buildGroupingSets(const substrait::GroupIdRel & expand_rel, std::vector<std::set<size_t>> & grouping_sets);
+    parse(DB::QueryPlanPtr query_plan, const substrait::Rel & rel, std::list<const substrait::Rel *> & rel_stack_) override;
 };
 }
diff --git a/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp b/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp
index 0e179679dd4f..95612c98d69d 100644
--- a/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp
+++ b/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp
@@ -896,13 +896,13 @@ QueryPlanPtr SerializedPlanParser::parseOp(const substrait::Rel & rel, std::list
             query_plan = win_parser->parse(std::move(query_plan), rel, rel_stack);
             break;
         }
-        case substrait::Rel::RelTypeCase::kGroupId: {
+        case substrait::Rel::RelTypeCase::kExpand: {
             rel_stack.push_back(&rel);
-            const auto & expand_rel = rel.group_id();
+            const auto & expand_rel = rel.expand();
             query_plan = parseOp(expand_rel.input(), rel_stack);
             rel_stack.pop_back();
-            auto epand_parser = RelParserFactory::instance().getBuilder(substrait::Rel::RelTypeCase::kGroupId)(this);
-            query_plan = epand_parser->parse(std::move(query_plan), rel, rel_stack);
+            auto expand_parser = RelParserFactory::instance().getBuilder(substrait::Rel::RelTypeCase::kExpand)(this);
+            query_plan = expand_parser->parse(std::move(query_plan), rel, rel_stack);
             break;
         }
         default:
diff --git a/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala b/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala
index 70e9b9aca409..e44411b7a077 100644
--- a/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala
+++ b/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala
@@ -26,19 +26,12 @@ class ClickHouseTestSettings extends BackendTestSettings {
   enableSuite[GlutenDataFrameAggregateSuite]
     .exclude(
       "average", // [overwritten by Gluten - xxx]
-      "groupBy", // [overwritten by Gluten - xxx]
-      "count", // [overwritten by Gluten - xxx]
-      "null count", // [overwritten by Gluten - xxx]
       "multiple column distinct count", // [not urgent, function with multiple params]
       "agg without groups and functions", // [not urgent]
-      "zero moments", // [not urgent]
-      "moments", // [not urgent]
-      GlutenTestConstants.GLUTEN_TEST + "variance", // [not urgent]
       "collect functions structs", // [not urgent]
       "SPARK-31500: collect_set() of BinaryType returns duplicate elements", // [not urgent]
       "SPARK-17641: collect functions should not collect null values", // [not urgent]
       "collect functions should be able to cast to array type with no null values", // [not urgent]
-      "SPARK-14664: Decimal sum/avg over window should work.", // [wishlist] support decimal
       "SQL decimal test (used for catching certain decimal " +
         "handling bugs in aggregates)", // [wishlist] support decimal
       "SPARK-17616: distinct aggregate combined with a non-partial aggregate", // [not urgent]
@@ -47,23 +40,19 @@ class ClickHouseTestSettings extends BackendTestSettings {
       " before using it", // [not urgent]
       "max_by", // [not urgent]
       "min_by", // [not urgent]
-      "count_if", // [not urgent]
       "aggregation with filter"
     )
     .excludeByPrefix(
       "SPARK-22951", // [not urgent] dropDuplicates
       "SPARK-26021", // [not urgent] behavior on NaN and -0.0 are different
-      "SPARK-31620", // [not urgent] sum_if
       "SPARK-32136", // [not urgent] struct type
       "SPARK-32344", // [not urgent] FIRST/LAST
       "SPARK-34713", // [not urgent] struct type
-      "SPARK-38221", // [not urgent] struct type
       "SPARK-34716", // [not urgent] interval
       "SPARK-34837", // [not urgent] interval
       "SPARK-35412", // [not urgent] interval
       "SPARK-36926", // [wishlist] support decimal
       "SPARK-38185", // [not urgent] empty agg
-      "SPARK-18952", // [not urgent]
       "SPARK-32038" // [not urgent]
     )