-
Notifications
You must be signed in to change notification settings - Fork 442
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' into gluten-1367
- Loading branch information
Showing
161 changed files
with
29,445 additions
and
89 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
cmake_minimum_required(VERSION 3.20) | ||
set(CH_SOURCE_DIR ${CMAKE_SOURCE_DIR}/ClickHouse CACHE STRING "ClickHouse source dir") | ||
|
||
project(libch LANGUAGES C CXX ASM) | ||
file(GLOB clickhouse_files "${CH_SOURCE_DIR}/*") | ||
if ("${CH_SOURCE_DIR}" STREQUAL "${CMAKE_SOURCE_DIR}/ClickHouse") | ||
if ("${CH_SOURCE_DIR}" STREQUAL "${CMAKE_SOURCE_DIR}/ClickHouse") | ||
if (NOT clickhouse_files) | ||
execute_process(COMMAND git clone --recursive --depth 1 https://github.com/Kyligence/ClickHouse.git ${CH_SOURCE_DIR} | ||
OUTPUT_VARIABLE download_ch) | ||
execute_process(COMMAND cd ${CH_SOURCE_DIR} && git pull && git submodule update --init --recursive --force --depth 1 | ||
OUTPUT_VARIABLE download_ch) | ||
endif() | ||
endif() | ||
else() | ||
if (NOT clickhouse_files) | ||
# Checking out *all* submodules takes > 5 min. Therefore, the smoke build ("FastTest") in CI initializes only the set of | ||
# submodules minimally needed for a build and we cannot assume here that all submodules are populated. | ||
message(ERROR "clickhouse ${CH_SOURCE_DIR} is missing or empty. to fix try run:") | ||
message(STATUS " git clone --recursive --depth 1 https://github.com/Kyligence/ClickHouse.git ${CMAKE_SOURCE_DIR}") | ||
endif() | ||
endif() | ||
|
||
if (NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/local-engine/proto/substrait") | ||
execute_process(COMMAND ln -s ${CMAKE_CURRENT_SOURCE_DIR}/../gluten-core/src/main/resources/substrait/proto/substrait ${CMAKE_CURRENT_SOURCE_DIR}/local-engine/proto/substrait) | ||
endif () | ||
|
||
if (NOT EXISTS "${CH_SOURCE_DIR}/utils/extern-local-engine/") | ||
execute_process(COMMAND ln -s ${CMAKE_CURRENT_SOURCE_DIR}/local-engine ${CH_SOURCE_DIR}/utils/extern-local-engine) | ||
endif () | ||
|
||
set(CH_BINARY_DIR "${CMAKE_CURRENT_SOURCE_DIR}/build") | ||
|
||
add_custom_command( | ||
USES_TERMINAL | ||
COMMAND | ||
bash -c | ||
\"cmake -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} | ||
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} | ||
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} | ||
-DENABLE_PROTOBUF=ON | ||
-DENABLE_TESTS=OFF | ||
-DENABLE_JEMALLOC=ON | ||
-DENABLE_MULTITARGET_CODE=ON | ||
-DENABLE_EXTERN_LOCAL_ENGINE=ON | ||
-S ${CH_SOURCE_DIR} -G Ninja -B ${CH_BINARY_DIR} && | ||
cmake --build ${CH_BINARY_DIR} --target ch\" | ||
OUTPUT _build_ch | ||
) | ||
|
||
add_custom_target(build_ch ALL DEPENDS _build_ch) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
# How to build | ||
项目依赖ClickHouse,有两种选择,一种是自动下载ClickHouse源码到当前目录, | ||
另一种手动clone ClickHouse代码(https://github.com/Kyligence/ClickHouse.git)并通过cmake参数CH_SOURCE_DIR指定。 | ||
|
||
代码开发推荐使用外部ClickHouse源码,下面说明如果绑定外部ClickHouse项目进行编译 | ||
## 克隆ClickHouse | ||
```shell | ||
export CH_SOURCE_DIR=/tmp/ClickHouse #可以选择自己的目录 | ||
git clone https://github.com/Kyligence/ClickHouse.git ${CH_SOURCE_DIR} | ||
cd ${CH_SOURCE_DIR} | ||
# 更新submodule | ||
git submodule update --init --recursive --depth 1 | ||
``` | ||
|
||
## 编译CH依赖 | ||
构建target build_ch,生成所有的静态链接库依赖。并刷新cmake build目录。 | ||
```shell | ||
export GLUTEN_SOURCE=$(pwd) | ||
cmake -G Ninja -S ${GLUTEN_SOURCE}/cpp-ch -B ${GLUTEN_SOURCE}/cpp-ch/build_ch -DCH_SOURCE_DIR=${CH_SOURCE_DIR} -DCMAKE_C_COMPILER=clang-15 -DCMAKE_CXX_COMPILER=clang++-15 -DCMAKE_BUILD_TYPE=Release | ||
cmake --build ${GLUTEN_SOURCE}/cpp-ch/build_ch --target build_ch | ||
``` | ||
|
||
动态链接库位于`cpp-ch/build/utils/extern-local-engine/libch.so` | ||
|
||
# 模块拆分原理 | ||
local_engine目录与ClickHouse项目通过软链接的方式关联,cpp-ch下的cmake会在ClickHouse创建一个utils/extern-local-engine的软链接。 | ||
整体开发方式可以与之前保持一致,只是git提交需要通过gluten项目完成。 | ||
新增cmake option: | ||
* ENABLE_EXTERN_LOCAL_ENGINE=ON 在导入ClickHouse时指定启用extern-local-engine | ||
|
||
# 一些问题 | ||
启动时需要指定LD_PRELOAD={path of libch.so},目的是让libch.so内的jemalloc最先被加载。 | ||
|
||
spark-submit --conf spark.executorEnv.LD_PRELOAD=/path/to/your/library | ||
|
||
# 新的Jenkins CI | ||
https://cicd-aws.kyligence.com/job/Gluten/job/gluten-ci/ | ||
公共只读账号:gluten/hN2xX3uQ4m |
84 changes: 84 additions & 0 deletions
84
cpp-ch/local-engine/AggregateFunctions/AggregateFunctionPartialMerge.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
#include <AggregateFunctions/AggregateFunctionPartialMerge.h> | ||
#include <AggregateFunctions/AggregateFunctionCombinatorFactory.h> | ||
#include <DataTypes/DataTypeAggregateFunction.h> | ||
|
||
|
||
using namespace DB; | ||
|
||
namespace DB | ||
{ | ||
namespace ErrorCodes | ||
{ | ||
extern const int ILLEGAL_TYPE_OF_ARGUMENT; | ||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; | ||
} | ||
} | ||
|
||
namespace local_engine | ||
{ | ||
|
||
namespace | ||
{ | ||
|
||
class AggregateFunctionCombinatorPartialMerge final : public IAggregateFunctionCombinator | ||
{ | ||
public: | ||
String getName() const override { return "PartialMerge"; } | ||
|
||
DataTypes transformArguments(const DataTypes & arguments) const override | ||
{ | ||
if (arguments.size() != 1) | ||
throw Exception("Incorrect number of arguments for aggregate function with " + getName() + " suffix", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); | ||
|
||
const DataTypePtr & argument = arguments[0]; | ||
|
||
const DataTypeAggregateFunction * function = typeid_cast<const DataTypeAggregateFunction *>(argument.get()); | ||
if (!function) | ||
throw Exception("Illegal type " + argument->getName() + " of argument for aggregate function with " + getName() + " suffix" | ||
+ " must be AggregateFunction(...)", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); | ||
|
||
const DataTypeAggregateFunction * function2 = typeid_cast<const DataTypeAggregateFunction *>(function->getArgumentsDataTypes()[0].get()); | ||
if (function2) { | ||
return transformArguments(function->getArgumentsDataTypes()); | ||
} | ||
return function->getArgumentsDataTypes(); | ||
} | ||
|
||
AggregateFunctionPtr transformAggregateFunction( | ||
const AggregateFunctionPtr & nested_function, | ||
const AggregateFunctionProperties &, | ||
const DataTypes & arguments, | ||
const Array & params) const override | ||
{ | ||
DataTypePtr & argument = const_cast<DataTypePtr &>(arguments[0]); | ||
|
||
const DataTypeAggregateFunction * function = typeid_cast<const DataTypeAggregateFunction *>(argument.get()); | ||
if (!function) | ||
throw Exception("Illegal type " + argument->getName() + " of argument for aggregate function with " + getName() + " suffix" | ||
+ " must be AggregateFunction(...)", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); | ||
|
||
while (nested_function->getName() != function->getFunctionName()) { | ||
argument = function->getArgumentsDataTypes()[0]; | ||
function = typeid_cast<const DataTypeAggregateFunction *>(function->getArgumentsDataTypes()[0].get()); | ||
if (!function) | ||
throw Exception("Illegal type " + argument->getName() + " of argument for aggregate function with " + getName() + " suffix" | ||
+ " must be AggregateFunction(...)", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); | ||
} | ||
|
||
if (nested_function->getName() != function->getFunctionName()) | ||
throw Exception("Illegal type " + argument->getName() + " of argument for aggregate function with " + getName() + " suffix" | ||
+ ", because it corresponds to different aggregate function: " + function->getFunctionName() + " instead of " + nested_function->getName(), | ||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); | ||
|
||
return std::make_shared<AggregateFunctionPartialMerge>(nested_function, argument, params); | ||
} | ||
}; | ||
|
||
} | ||
|
||
void registerAggregateFunctionCombinatorPartialMerge(AggregateFunctionCombinatorFactory & factory) | ||
{ | ||
factory.registerCombinator(std::make_shared<AggregateFunctionCombinatorPartialMerge>()); | ||
} | ||
|
||
} |
Oops, something went wrong.