From bd43690eddbb612e11a524db5fcb632c9cf751fd Mon Sep 17 00:00:00 2001 From: exmy Date: Wed, 26 Apr 2023 15:45:38 +0800 Subject: [PATCH] [GLUTEN-1392][CH] Support new ExpandRel (#1432) What changes were proposed in this pull request? support new ExpandRel introduced by #1361 (Fixes: #1392) How was this patch tested? unit tests --- .gitignore | 4 + .../backendsapi/clickhouse/CHBackend.scala | 2 + .../GlutenClickHouseTPCHParquetSuite.scala | 46 +++++++ cpp-ch/local-engine/Operator/ExpandStep.cpp | 45 ++----- cpp-ch/local-engine/Operator/ExpandStep.h | 12 +- .../local-engine/Operator/ExpandTransform.cpp | 72 ++++++----- cpp-ch/local-engine/Operator/ExpandTransorm.h | 7 +- cpp-ch/local-engine/Parser/ExpandField.h | 42 +++++++ .../local-engine/Parser/ExpandRelParser.cpp | 112 +++++++++++------- cpp-ch/local-engine/Parser/ExpandRelParser.h | 4 +- .../Parser/SerializedPlanParser.cpp | 8 +- .../clickhouse/ClickHouseTestSettings.scala | 11 -- 12 files changed, 232 insertions(+), 133 deletions(-) create mode 100644 cpp-ch/local-engine/Parser/ExpandField.h diff --git a/.gitignore b/.gitignore index b39acbdf63dd..d89be1976c42 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,10 @@ # vscode config .vscode +# vscode scala +.bloop +.metals + # Mobile Tools for Java (J2ME) .mtj.tmp/ diff --git a/backends-clickhouse/src/main/scala/io/glutenproject/backendsapi/clickhouse/CHBackend.scala b/backends-clickhouse/src/main/scala/io/glutenproject/backendsapi/clickhouse/CHBackend.scala index 1973e4e54a67..469c5f1b8b82 100644 --- a/backends-clickhouse/src/main/scala/io/glutenproject/backendsapi/clickhouse/CHBackend.scala +++ b/backends-clickhouse/src/main/scala/io/glutenproject/backendsapi/clickhouse/CHBackend.scala @@ -135,6 +135,8 @@ object CHBackendSettings extends BackendSettings with Logging { override def supportExpandExec(): Boolean = true + override def supportNewExpandContract(): Boolean = true + override def excludeScanExecFromCollapsedStage(): Boolean = SQLConf.get .getConfString(GLUTEN_CLICKHOUSE_SEP_SCAN_RDD, GLUTEN_CLICKHOUSE_SEP_SCAN_RDD_DEFAULT) diff --git a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetSuite.scala b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetSuite.scala index 787835daef75..18d6b0727682 100644 --- a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetSuite.scala +++ b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetSuite.scala @@ -666,6 +666,52 @@ class GlutenClickHouseTPCHParquetSuite extends GlutenClickHouseTPCHAbstractSuite compareResultsAgainstVanillaSpark(sql, true, { _ => }) } + test("expand with nullable type not match") { + val sql = + """ + |select a, n_regionkey, n_nationkey from + |(select nvl(n_name, "aaaa") as a, n_regionkey, n_nationkey from nation) + |group by n_regionkey, n_nationkey + |grouping sets((a, n_regionkey, n_nationkey),(a, n_regionkey), (a)) + |order by a, n_regionkey, n_nationkey + |""".stripMargin + runQueryAndCompare(sql)(checkOperatorMatch[ExpandExecTransformer]) + } + + test("expand col result") { + val sql = + """ + |select n_regionkey, n_nationkey, count(1) as cnt from nation + |group by n_regionkey, n_nationkey with rollup + |order by n_regionkey, n_nationkey, cnt + |""".stripMargin + runQueryAndCompare(sql)(checkOperatorMatch[ExpandExecTransformer]) + } + + test("expand with not nullable") { + val sql = + """ + |select a,b, sum(c) from + |(select nvl(n_nationkey, 0) as c, nvl(n_name, '') as b, nvl(n_nationkey, 0) as a from nation) + |group by a,b with rollup + |""".stripMargin + runQueryAndCompare(sql)(checkOperatorMatch[ExpandExecTransformer]) + } + + test("expand with function expr") { + val sql = + """ + |select + | n_name, + | count(distinct n_regionkey) as col1, + | count(distinct concat(n_regionkey, n_nationkey)) as col2 + |from nation + |group by n_name + |order by n_name, col1, col2 + |""".stripMargin + runQueryAndCompare(sql)(checkOperatorMatch[ExpandExecTransformer]) + } + test("test 'position/locate'") { runQueryAndCompare( """ diff --git a/cpp-ch/local-engine/Operator/ExpandStep.cpp b/cpp-ch/local-engine/Operator/ExpandStep.cpp index d69ae30236d5..ce38233963ae 100644 --- a/cpp-ch/local-engine/Operator/ExpandStep.cpp +++ b/cpp-ch/local-engine/Operator/ExpandStep.cpp @@ -32,16 +32,12 @@ static DB::ITransformingStep::Traits getTraits() ExpandStep::ExpandStep( const DB::DataStream & input_stream_, - const std::vector & aggregating_expressions_columns_, - const std::vector> & grouping_sets_, - const std::string & grouping_id_name_) + const ExpandField & project_set_exprs_) : DB::ITransformingStep( input_stream_, - buildOutputHeader(input_stream_.header, aggregating_expressions_columns_, grouping_id_name_), + buildOutputHeader(input_stream_.header, project_set_exprs_), getTraits()) - , aggregating_expressions_columns(aggregating_expressions_columns_) - , grouping_sets(grouping_sets_) - , grouping_id_name(grouping_id_name_) + , project_set_exprs(project_set_exprs_) { header = input_stream_.header; output_header = getOutputStream().header; @@ -49,36 +45,21 @@ ExpandStep::ExpandStep( DB::Block ExpandStep::buildOutputHeader( const DB::Block & input_header, - const std::vector & aggregating_expressions_columns_, - const std::string & grouping_id_name_) + const ExpandField & project_set_exprs_) { DB::ColumnsWithTypeAndName cols; - std::set agg_cols; + const auto & types = project_set_exprs_.getTypes(); + const auto & names = project_set_exprs_.getNames(); - for (size_t i = 0; i < input_header.columns(); ++i) + for (size_t i = 0; i < project_set_exprs_.getExpandCols(); ++i) { - const auto & old_col = input_header.getByPosition(i); - if (i < aggregating_expressions_columns_.size()) - { - // do nothing with the aggregating columns. - cols.push_back(old_col); - continue; - } - if (old_col.type->isNullable()) - cols.push_back(old_col); + String col_name; + if (!names[i].empty()) + col_name = names[i]; else - { - auto null_map = DB::ColumnUInt8::create(0, 0); - auto null_col = DB::ColumnNullable::create(old_col.column, std::move(null_map)); - auto null_type = std::make_shared(old_col.type); - cols.push_back(DB::ColumnWithTypeAndName(null_col, null_type, old_col.name)); - } + col_name = "expand_" + std::to_string(i); + cols.push_back(DB::ColumnWithTypeAndName(types[i], col_name)); } - - // add group id column - auto grouping_id_col = DB::ColumnInt64::create(0, 0); - auto grouping_id_type = std::make_shared(); - cols.emplace_back(DB::ColumnWithTypeAndName(std::move(grouping_id_col), grouping_id_type, grouping_id_name_)); return DB::Block(cols); } @@ -89,7 +70,7 @@ void ExpandStep::transformPipeline(DB::QueryPipelineBuilder & pipeline, const DB DB::Processors new_processors; for (auto & output : outputs) { - auto expand_op = std::make_shared(header, output_header, aggregating_expressions_columns, grouping_sets); + auto expand_op = std::make_shared(header, output_header, project_set_exprs); new_processors.push_back(expand_op); DB::connect(*output, expand_op->getInputs().front()); } diff --git a/cpp-ch/local-engine/Operator/ExpandStep.h b/cpp-ch/local-engine/Operator/ExpandStep.h index 3b12432df9cd..1de3bc4aefab 100644 --- a/cpp-ch/local-engine/Operator/ExpandStep.h +++ b/cpp-ch/local-engine/Operator/ExpandStep.h @@ -3,6 +3,7 @@ #include #include #include +#include namespace local_engine { @@ -12,9 +13,7 @@ class ExpandStep : public DB::ITransformingStep // The input stream should only contain grouping columns. explicit ExpandStep( const DB::DataStream & input_stream_, - const std::vector & aggregating_expressions_columns_, - const std::vector> & grouping_sets_, - const std::string & grouping_id_name_); + const ExpandField & project_set_exprs_); ~ExpandStep() override = default; String getName() const override { return "ExpandStep"; } @@ -22,9 +21,7 @@ class ExpandStep : public DB::ITransformingStep void transformPipeline(DB::QueryPipelineBuilder & pipeline, const DB::BuildQueryPipelineSettings & settings) override; void describePipeline(DB::IQueryPlanStep::FormatSettings & settings) const override; private: - std::vector aggregating_expressions_columns; - std::vector> grouping_sets; - std::string grouping_id_name; + ExpandField project_set_exprs; DB::Block header; DB::Block output_header; @@ -32,7 +29,6 @@ class ExpandStep : public DB::ITransformingStep static DB::Block buildOutputHeader( const DB::Block & header, - const std::vector & aggregating_expressions_columns_, - const std::string & grouping_id_name_); + const ExpandField & project_set_exprs_); }; } diff --git a/cpp-ch/local-engine/Operator/ExpandTransform.cpp b/cpp-ch/local-engine/Operator/ExpandTransform.cpp index c083c1afa552..dadd2164c477 100644 --- a/cpp-ch/local-engine/Operator/ExpandTransform.cpp +++ b/cpp-ch/local-engine/Operator/ExpandTransform.cpp @@ -4,8 +4,10 @@ #include #include #include +#include #include +#include "Common/Exception.h" #include #include @@ -14,11 +16,9 @@ namespace local_engine ExpandTransform::ExpandTransform( const DB::Block & input_, const DB::Block & output_, - const std::vector & aggregating_expressions_columns_, - const std::vector> & grouping_sets_) + const ExpandField & project_set_exprs_) : DB::IProcessor({input_}, {output_}) - , aggregating_expressions_columns(aggregating_expressions_columns_) - , grouping_sets(grouping_sets_) + , project_set_exprs(project_set_exprs_) {} ExpandTransform::Status ExpandTransform::prepare() @@ -68,43 +68,57 @@ ExpandTransform::Status ExpandTransform::prepare() void ExpandTransform::work() { assert(expanded_chunks.empty()); - size_t agg_cols_size = aggregating_expressions_columns.size(); - for (int set_id = 0; static_cast(set_id) < grouping_sets.size(); ++set_id) + const auto & original_cols = input_chunk.getColumns(); + size_t rows = input_chunk.getNumRows(); + + for (size_t i = 0; i < project_set_exprs.getExpandRows(); ++i) { - const auto & sets = grouping_sets[set_id]; DB::Columns cols; - const auto & original_cols = input_chunk.getColumns(); - for (size_t i = 0; i < original_cols.size(); ++i) + for (size_t j = 0; j < project_set_exprs.getExpandCols(); ++j) { - const auto & original_col = original_cols[i]; - size_t rows = original_col->size(); - if (i < agg_cols_size) - { - cols.push_back(original_col); - continue; - } - // the output columns should all be nullable. - if (!sets.contains(i)) - { - auto null_map = DB::ColumnUInt8::create(rows, 1); - auto col = DB::ColumnNullable::create(original_col, std::move(null_map)); - cols.push_back(std::move(col)); - } - else + const auto & type = project_set_exprs.getTypes()[j]; + const auto & kind = project_set_exprs.getKinds()[i][j]; + const auto & field = project_set_exprs.getFields()[i][j]; + + if (kind == EXPAND_FIELD_KIND_SELECTION) { - if (original_col->isNullable()) + const auto & original_col = original_cols[field.get()]; + if (type->isNullable() == original_col->isNullable()) + { cols.push_back(original_col); - else + } + else if (type->isNullable() && !original_col->isNullable()) { auto null_map = DB::ColumnUInt8::create(rows, 0); auto col = DB::ColumnNullable::create(original_col, std::move(null_map)); cols.push_back(std::move(col)); } + else + { + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, + "Miss match nullable, column {} is nullable, but type {} is not nullable", + original_col->getName(), type->getName()); + } + } + else + { + if (field.isNull()) + { + // Add null column + auto null_map = DB::ColumnUInt8::create(rows, 1); + auto nested_type = DB::removeNullable(type); + auto col = DB::ColumnNullable::create(nested_type->createColumn()->cloneResized(rows), std::move(null_map)); + cols.push_back(std::move(col)); + } + else + { + // Add constant column: gid, gpos, etc. + auto col = type->createColumnConst(rows, field); + cols.push_back(std::move(col->convertToFullColumnIfConst())); + } } } - auto id_col = DB::DataTypeInt64().createColumnConst(input_chunk.getNumRows(), set_id); - cols.push_back(std::move(id_col)); - expanded_chunks.push_back(DB::Chunk(cols, input_chunk.getNumRows())); + expanded_chunks.push_back(DB::Chunk(cols, rows)); } has_output = true; has_input = false; diff --git a/cpp-ch/local-engine/Operator/ExpandTransorm.h b/cpp-ch/local-engine/Operator/ExpandTransorm.h index b131c36cd809..e2579dddc39a 100644 --- a/cpp-ch/local-engine/Operator/ExpandTransorm.h +++ b/cpp-ch/local-engine/Operator/ExpandTransorm.h @@ -5,6 +5,7 @@ #include #include #include +#include namespace local_engine { // For handling substrait expand node. @@ -21,16 +22,14 @@ class ExpandTransform : public DB::IProcessor ExpandTransform( const DB::Block & input_, const DB::Block & output_, - const std::vector & aggregating_expressions_columns_, - const std::vector> & grouping_sets_); + const ExpandField & project_set_exprs_); Status prepare() override; void work() override; DB::String getName() const override { return "ExpandTransform"; } private: - std::vector aggregating_expressions_columns; - std::vector> grouping_sets; + ExpandField project_set_exprs; bool has_input = false; bool has_output = false; diff --git a/cpp-ch/local-engine/Parser/ExpandField.h b/cpp-ch/local-engine/Parser/ExpandField.h new file mode 100644 index 000000000000..4c15f7f93c6e --- /dev/null +++ b/cpp-ch/local-engine/Parser/ExpandField.h @@ -0,0 +1,42 @@ +#pragma once + +#include +#include + +namespace local_engine +{ + +enum ExpandFieldKind +{ + EXPAND_FIELD_KIND_SELECTION, + EXPAND_FIELD_KIND_LITERAL, +}; + +class ExpandField +{ +public: + ExpandField() = default; + ExpandField( + const std::vector & names_, + const std::vector & types_, + const std::vector> & kinds_, + const std::vector> & fields_): + names(names_), types(types_), kinds(kinds_), fields(fields_) + {} + + const std::vector & getNames() const { return names; } + const std::vector & getTypes() const { return types; } + const std::vector> & getKinds() const { return kinds; } + const std::vector> & getFields() const { return fields; } + + size_t getExpandRows() const { return kinds.size(); } + size_t getExpandCols() const { return types.size(); } + +private: + std::vector names; + std::vector types; + std::vector> kinds; + std::vector> fields; +}; + +} diff --git a/cpp-ch/local-engine/Parser/ExpandRelParser.cpp b/cpp-ch/local-engine/Parser/ExpandRelParser.cpp index f5d086a75ae1..5e5c5712c2ba 100644 --- a/cpp-ch/local-engine/Parser/ExpandRelParser.cpp +++ b/cpp-ch/local-engine/Parser/ExpandRelParser.cpp @@ -8,6 +8,8 @@ #include #include #include +#include +#include namespace DB { @@ -23,62 +25,88 @@ ExpandRelParser::ExpandRelParser(SerializedPlanParser * plan_parser_) : RelParser(plan_parser_) {} -DB::QueryPlanPtr -ExpandRelParser::parse(DB::QueryPlanPtr query_plan, const substrait::Rel & rel, std::list & rel_stack) +void updateType(DB::DataTypePtr & type, const DB::DataTypePtr & new_type) { - const auto & expand_rel = rel.group_id(); - std::vector aggregating_expressions_columns; - std::set agg_cols_ref; - const auto & header = query_plan->getCurrentDataStream().header; - for (int i = 0; i < expand_rel.aggregate_expressions_size(); ++i) + if (type == nullptr || (!type->isNullable() && new_type->isNullable())) { - const auto & expr = expand_rel.aggregate_expressions(i); - if (expr.has_selection()) - { - aggregating_expressions_columns.push_back(expr.selection().direct_reference().struct_field().field()); - agg_cols_ref.insert(expr.selection().direct_reference().struct_field().field()); - } - else - { - // FIXEME. see https://github.com/oap-project/gluten/pull/794 - throw DB::Exception( - DB::ErrorCodes::LOGICAL_ERROR, - "Unsupported aggregating expression in expand node. {}. input header:{}.", - expr.ShortDebugString(), - header.dumpNames()); - } + type = new_type; } - std::vector> grouping_sets; - buildGroupingSets(expand_rel, grouping_sets); - // The input header is : aggregating columns + grouping columns. - auto expand_step = std::make_unique( - query_plan->getCurrentDataStream(), aggregating_expressions_columns, grouping_sets, expand_rel.group_name()); - expand_step->setStepDescription("Expand step"); - query_plan->addStep(std::move(expand_step)); - return query_plan; } - -void ExpandRelParser::buildGroupingSets(const substrait::GroupIdRel & expand_rel, std::vector> & grouping_sets) +DB::QueryPlanPtr +ExpandRelParser::parse(DB::QueryPlanPtr query_plan, const substrait::Rel & rel, std::list & rel_stack) { - for (int i = 0; i < expand_rel.groupings_size(); ++i) + const auto & expand_rel = rel.expand(); + const auto & header = query_plan->getCurrentDataStream().header; + + std::vector> expand_kinds; + std::vector> expand_fields; + std::vector types; + std::vector names; + std::set distinct_names; + + expand_kinds.reserve(expand_rel.fields_size()); + expand_fields.reserve(expand_rel.fields_size()); + + for (const auto & projections: expand_rel.fields()) { - const auto grouping_set_pb = expand_rel.groupings(i); - std::set grouping_set; - for (int n = 0; n < grouping_set_pb.groupsets_expressions_size(); ++n) + auto expand_col_size = projections.switching_field().duplicates_size(); + + std::vector kinds; + std::vector fields; + + kinds.reserve(expand_col_size); + fields.reserve(expand_col_size); + + if (types.empty()) types.resize(expand_col_size, nullptr); + if (names.empty()) names.resize(expand_col_size); + + for (int i = 0; i < expand_col_size; ++i) { - const auto & expr = grouping_set_pb.groupsets_expressions(n); - if (expr.has_selection()) + const auto & project_expr = projections.switching_field().duplicates(i); + if (project_expr.has_selection()) { - grouping_set.insert(expr.selection().direct_reference().struct_field().field()); + auto field = project_expr.selection().direct_reference().struct_field().field(); + kinds.push_back(ExpandFieldKind::EXPAND_FIELD_KIND_SELECTION); + fields.push_back(field); + updateType(types[i], header.getByPosition(field).type); + const auto & name = header.getByPosition(field).name; + if (names[i].empty()) + { + if (distinct_names.contains(name)) + { + auto unique_name = getUniqueName(name); + distinct_names.emplace(unique_name); + names[i] = unique_name; + } + else + { + distinct_names.emplace(name); + names[i] = name; + } + } + } + else if (project_expr.has_literal()) + { + auto [type, field] = parseLiteral(project_expr.literal()); + kinds.push_back(ExpandFieldKind::EXPAND_FIELD_KIND_LITERAL); + fields.push_back(field); + updateType(types[i], type); } else { - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Unsupported expression in grouping sets"); + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Unsupported expression in projections"); } } - grouping_sets.emplace_back(std::move(grouping_set)); + expand_kinds.push_back(std::move(kinds)); + expand_fields.push_back(std::move(fields)); } + + ExpandField expand_field(names, types, expand_kinds, expand_fields); + auto expand_step = std::make_unique(query_plan->getCurrentDataStream(), std::move(expand_field)); + expand_step->setStepDescription("Expand Step"); + query_plan->addStep(std::move(expand_step)); + return query_plan; } void registerExpandRelParser(RelParserFactory & factory) @@ -87,6 +115,6 @@ void registerExpandRelParser(RelParserFactory & factory) { return std::make_shared(plan_parser); }; - factory.registerBuilder(substrait::Rel::RelTypeCase::kGroupId, builder); + factory.registerBuilder(substrait::Rel::RelTypeCase::kExpand, builder); } } diff --git a/cpp-ch/local-engine/Parser/ExpandRelParser.h b/cpp-ch/local-engine/Parser/ExpandRelParser.h index 02d0978c9f42..a29e349b8cd4 100644 --- a/cpp-ch/local-engine/Parser/ExpandRelParser.h +++ b/cpp-ch/local-engine/Parser/ExpandRelParser.h @@ -10,8 +10,6 @@ class ExpandRelParser : public RelParser explicit ExpandRelParser(SerializedPlanParser * plan_parser_); ~ExpandRelParser() override = default; DB::QueryPlanPtr - parse(DB::QueryPlanPtr query_plan, const substrait::Rel & sort_rel, std::list & rel_stack_) override; -private: - static void buildGroupingSets(const substrait::GroupIdRel & expand_rel, std::vector> & grouping_sets); + parse(DB::QueryPlanPtr query_plan, const substrait::Rel & rel, std::list & rel_stack_) override; }; } diff --git a/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp b/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp index 0e179679dd4f..95612c98d69d 100644 --- a/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp +++ b/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp @@ -896,13 +896,13 @@ QueryPlanPtr SerializedPlanParser::parseOp(const substrait::Rel & rel, std::list query_plan = win_parser->parse(std::move(query_plan), rel, rel_stack); break; } - case substrait::Rel::RelTypeCase::kGroupId: { + case substrait::Rel::RelTypeCase::kExpand: { rel_stack.push_back(&rel); - const auto & expand_rel = rel.group_id(); + const auto & expand_rel = rel.expand(); query_plan = parseOp(expand_rel.input(), rel_stack); rel_stack.pop_back(); - auto epand_parser = RelParserFactory::instance().getBuilder(substrait::Rel::RelTypeCase::kGroupId)(this); - query_plan = epand_parser->parse(std::move(query_plan), rel, rel_stack); + auto expand_parser = RelParserFactory::instance().getBuilder(substrait::Rel::RelTypeCase::kExpand)(this); + query_plan = expand_parser->parse(std::move(query_plan), rel, rel_stack); break; } default: diff --git a/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala b/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala index 70e9b9aca409..e44411b7a077 100644 --- a/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala +++ b/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala @@ -26,19 +26,12 @@ class ClickHouseTestSettings extends BackendTestSettings { enableSuite[GlutenDataFrameAggregateSuite] .exclude( "average", // [overwritten by Gluten - xxx] - "groupBy", // [overwritten by Gluten - xxx] - "count", // [overwritten by Gluten - xxx] - "null count", // [overwritten by Gluten - xxx] "multiple column distinct count", // [not urgent, function with multiple params] "agg without groups and functions", // [not urgent] - "zero moments", // [not urgent] - "moments", // [not urgent] - GlutenTestConstants.GLUTEN_TEST + "variance", // [not urgent] "collect functions structs", // [not urgent] "SPARK-31500: collect_set() of BinaryType returns duplicate elements", // [not urgent] "SPARK-17641: collect functions should not collect null values", // [not urgent] "collect functions should be able to cast to array type with no null values", // [not urgent] - "SPARK-14664: Decimal sum/avg over window should work.", // [wishlist] support decimal "SQL decimal test (used for catching certain decimal " + "handling bugs in aggregates)", // [wishlist] support decimal "SPARK-17616: distinct aggregate combined with a non-partial aggregate", // [not urgent] @@ -47,23 +40,19 @@ class ClickHouseTestSettings extends BackendTestSettings { " before using it", // [not urgent] "max_by", // [not urgent] "min_by", // [not urgent] - "count_if", // [not urgent] "aggregation with filter" ) .excludeByPrefix( "SPARK-22951", // [not urgent] dropDuplicates "SPARK-26021", // [not urgent] behavior on NaN and -0.0 are different - "SPARK-31620", // [not urgent] sum_if "SPARK-32136", // [not urgent] struct type "SPARK-32344", // [not urgent] FIRST/LAST "SPARK-34713", // [not urgent] struct type - "SPARK-38221", // [not urgent] struct type "SPARK-34716", // [not urgent] interval "SPARK-34837", // [not urgent] interval "SPARK-35412", // [not urgent] interval "SPARK-36926", // [wishlist] support decimal "SPARK-38185", // [not urgent] empty agg - "SPARK-18952", // [not urgent] "SPARK-32038" // [not urgent] )