From ba487e10fdc3aaf8d11fe90f1a2984ba0cd34d84 Mon Sep 17 00:00:00 2001 From: fanjia <1095948736@qq.com> Date: Wed, 15 Jun 2022 11:54:49 +0800 Subject: [PATCH 01/21] update sink template code --- .../seatunnel/clickhouse/config/Config.java | 2 + .../clickhouse/sink/ClickhouseSink.java | 100 ++++++++++++++++++ .../clickhouse/sink/ClickhouseSinkWriter.java | 55 ++++++++++ .../clickhouse/source/ClickhouseSource.java | 14 +-- .../clickhouse/state/CKAggCommitInfo.java | 23 ++++ .../clickhouse/state/CKCommitInfo.java | 23 ++++ .../clickhouse/state/ClickhouseSinkState.java | 23 ++++ .../clickhouse/util/ClickhouseUtil.java | 40 +++++++ 8 files changed, 269 insertions(+), 11 deletions(-) create mode 100644 seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/ClickhouseSink.java create mode 100644 seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/ClickhouseSinkWriter.java create mode 100644 seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/state/CKAggCommitInfo.java create mode 100644 seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/state/CKCommitInfo.java create mode 100644 seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/state/ClickhouseSinkState.java create mode 100644 seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/util/ClickhouseUtil.java diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/config/Config.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/config/Config.java index 65b7af7c6d3..ccc2dd9a7d2 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/config/Config.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/config/Config.java @@ -32,4 +32,6 @@ public class Config { public static final String PASSWORD = "password"; + public static final String TABLE = "table"; + } diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/ClickhouseSink.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/ClickhouseSink.java new file mode 100644 index 00000000000..4b47d9f7595 --- /dev/null +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/ClickhouseSink.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.clickhouse.sink; + +import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.DATABASE; +import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.NODE_ADDRESS; +import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.PASSWORD; +import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.TABLE; +import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.USERNAME; + +import org.apache.seatunnel.api.common.PrepareFailException; +import org.apache.seatunnel.api.common.SeaTunnelContext; +import org.apache.seatunnel.api.serialization.DefaultSerializer; +import org.apache.seatunnel.api.serialization.Serializer; +import org.apache.seatunnel.api.sink.SeaTunnelSink; +import org.apache.seatunnel.api.sink.SinkWriter; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.common.config.CheckConfigUtil; +import org.apache.seatunnel.common.config.CheckResult; +import org.apache.seatunnel.common.constants.PluginType; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.state.CKAggCommitInfo; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.state.CKCommitInfo; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.state.ClickhouseSinkState; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.util.ClickhouseUtil; + +import org.apache.seatunnel.shade.com.typesafe.config.Config; + +import com.clickhouse.client.ClickHouseNode; +import com.google.auto.service.AutoService; + +import java.io.IOException; +import java.util.List; +import java.util.Optional; + +@AutoService(SeaTunnelSink.class) +public class ClickhouseSink implements SeaTunnelSink { + + private SeaTunnelContext seaTunnelContext; + + private List servers; + + private String table; + + @Override + public String getPluginName() { + return "Clickhouse"; + } + + @Override + public void prepare(Config config) throws PrepareFailException { + CheckResult result = CheckConfigUtil.checkAllExists(config, NODE_ADDRESS, DATABASE, TABLE, USERNAME, PASSWORD); + if (!result.isSuccess()) { + throw new PrepareFailException(getPluginName(), PluginType.SINK, result.getMsg()); + } + servers = ClickhouseUtil.createNodes(config.getString(NODE_ADDRESS), config.getString(DATABASE), + config.getString(USERNAME), config.getString(PASSWORD)); + table = config.getString(TABLE); + + } + + @Override + public SinkWriter createWriter(SinkWriter.Context context) throws IOException { + return new ClickhouseSinkWriter(context); + } + + @Override + public SinkWriter restoreWriter(SinkWriter.Context context, List states) throws IOException { + return SeaTunnelSink.super.restoreWriter(context, states); + } + + @Override + public Optional> getWriterStateSerializer() { + return Optional.of(new DefaultSerializer<>()); + } + + @Override + public SeaTunnelContext getSeaTunnelContext() { + return seaTunnelContext; + } + + @Override + public void setSeaTunnelContext(SeaTunnelContext seaTunnelContext) { + this.seaTunnelContext = seaTunnelContext; + } +} diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/ClickhouseSinkWriter.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/ClickhouseSinkWriter.java new file mode 100644 index 00000000000..d7208d4c161 --- /dev/null +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/ClickhouseSinkWriter.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.clickhouse.sink; + +import org.apache.seatunnel.api.sink.SinkWriter; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.state.CKCommitInfo; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.state.ClickhouseSinkState; + +import java.io.IOException; +import java.util.Optional; + +public class ClickhouseSinkWriter implements SinkWriter { + + private SinkWriter.Context context; + + ClickhouseSinkWriter(SinkWriter.Context context) { + this.context = context; + } + + @Override + public void write(SeaTunnelRow element) throws IOException { + + } + + @Override + public Optional prepareCommit() throws IOException { + return Optional.empty(); + } + + @Override + public void abortPrepare() { + + } + + @Override + public void close() throws IOException { + + } +} diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/source/ClickhouseSource.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/source/ClickhouseSource.java index 59e6b5cdba1..fef34c541c5 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/source/ClickhouseSource.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/source/ClickhouseSource.java @@ -38,22 +38,19 @@ import org.apache.seatunnel.common.config.CheckResult; import org.apache.seatunnel.common.constants.PluginType; import org.apache.seatunnel.connectors.seatunnel.clickhouse.state.ClickhouseSourceState; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.util.ClickhouseUtil; import org.apache.seatunnel.connectors.seatunnel.clickhouse.util.TypeConvertUtil; import org.apache.seatunnel.shade.com.typesafe.config.Config; import com.clickhouse.client.ClickHouseClient; -import com.clickhouse.client.ClickHouseCredentials; import com.clickhouse.client.ClickHouseException; import com.clickhouse.client.ClickHouseFormat; import com.clickhouse.client.ClickHouseNode; -import com.clickhouse.client.ClickHouseProtocol; import com.clickhouse.client.ClickHouseResponse; import com.google.auto.service.AutoService; -import java.util.Arrays; import java.util.List; -import java.util.stream.Collectors; @AutoService(SeaTunnelSource.class) public class ClickhouseSource implements SeaTunnelSource { @@ -74,13 +71,8 @@ public void prepare(Config config) throws PrepareFailException { if (!result.isSuccess()) { throw new PrepareFailException(getPluginName(), PluginType.SOURCE, result.getMsg()); } - servers = Arrays.stream(config.getString(NODE_ADDRESS).split(",")).map(address -> { - String[] nodeAndPort = address.split(":", 2); - return ClickHouseNode.builder().host(nodeAndPort[0]).port(ClickHouseProtocol.HTTP, - Integer.parseInt(nodeAndPort[1])).database(config.getString(DATABASE)) - .credentials(ClickHouseCredentials.fromUserAndPassword(config.getString(USERNAME), - config.getString(PASSWORD))).build(); - }).collect(Collectors.toList()); + servers = ClickhouseUtil.createNodes(config.getString(NODE_ADDRESS), config.getString(DATABASE), + config.getString(USERNAME), config.getString(PASSWORD)); sql = config.getString(SQL); try (ClickHouseClient client = ClickHouseClient.newInstance(servers.get(0).getProtocol()); diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/state/CKAggCommitInfo.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/state/CKAggCommitInfo.java new file mode 100644 index 00000000000..2de15ac9ca9 --- /dev/null +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/state/CKAggCommitInfo.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.clickhouse.state; + +import java.io.Serializable; + +public class CKAggCommitInfo implements Serializable { +} diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/state/CKCommitInfo.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/state/CKCommitInfo.java new file mode 100644 index 00000000000..99464801ddd --- /dev/null +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/state/CKCommitInfo.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.clickhouse.state; + +import java.io.Serializable; + +public class CKCommitInfo implements Serializable { +} diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/state/ClickhouseSinkState.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/state/ClickhouseSinkState.java new file mode 100644 index 00000000000..28d9dc2ed4f --- /dev/null +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/state/ClickhouseSinkState.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.clickhouse.state; + +import java.io.Serializable; + +public class ClickhouseSinkState implements Serializable { +} diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/util/ClickhouseUtil.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/util/ClickhouseUtil.java new file mode 100644 index 00000000000..38c835831c7 --- /dev/null +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/util/ClickhouseUtil.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.clickhouse.util; + +import com.clickhouse.client.ClickHouseCredentials; +import com.clickhouse.client.ClickHouseNode; +import com.clickhouse.client.ClickHouseProtocol; + +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + +public class ClickhouseUtil { + + public static List createNodes(String nodeAddress, String database, String username, + String password) { + return Arrays.stream(nodeAddress.split(",")).map(address -> { + String[] nodeAndPort = address.split(":", 2); + return ClickHouseNode.builder().host(nodeAndPort[0]).port(ClickHouseProtocol.HTTP, + Integer.parseInt(nodeAndPort[1])).database(database) + .credentials(ClickHouseCredentials.fromUserAndPassword(username, password)).build(); + }).collect(Collectors.toList()); + } + +} From 45efe70ba2768c782aeb54e5ce37bde957adba4b Mon Sep 17 00:00:00 2001 From: fanjia <1095948736@qq.com> Date: Wed, 15 Jun 2022 14:43:55 +0800 Subject: [PATCH 02/21] fix plugin discovery maybe repeat --- .../seatunnel/plugin/discovery/AbstractPluginDiscovery.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/AbstractPluginDiscovery.java b/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/AbstractPluginDiscovery.java index cc8427d9d40..edbd788129d 100644 --- a/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/AbstractPluginDiscovery.java +++ b/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/AbstractPluginDiscovery.java @@ -62,14 +62,14 @@ public List getPluginJarPaths(List pluginIdentifiers) { return pluginIdentifiers.stream() .map(this::getPluginJarPath) .filter(Optional::isPresent) - .map(Optional::get) + .map(Optional::get).distinct() .collect(Collectors.toList()); } @Override public List getAllPlugins(List pluginIdentifiers) { return pluginIdentifiers.stream() - .map(this::getPluginInstance) + .map(this::getPluginInstance).distinct() .collect(Collectors.toList()); } From 784ddbf623952f767360c553c8c45df46b906767 Mon Sep 17 00:00:00 2001 From: fanjia <1095948736@qq.com> Date: Fri, 17 Jun 2022 14:21:12 +0800 Subject: [PATCH 03/21] add clickhouse client sink --- .../api/table/type/SeaTunnelRowType.java | 9 + .../pom.xml | 7 + .../seatunnel/clickhouse/config/Config.java | 86 ++++++- .../clickhouse/config/ReaderOption.java | 117 +++++++++ .../seatunnel/clickhouse/shard/Shard.java | 98 ++++++++ .../clickhouse/shard/ShardMetadata.java | 145 +++++++++++ .../clickhouse/sink/ClickhouseSink.java | 100 -------- .../clickhouse/sink/DistributedEngine.java | 58 +++++ .../sink/client/ClickhouseBatchStatement.java | 51 ++++ .../sink/client/ClickhouseProxy.java | 186 ++++++++++++++ .../sink/client/ClickhouseSink.java | 166 +++++++++++++ .../sink/client/ClickhouseSinkWriter.java | 231 ++++++++++++++++++ .../clickhouse/sink/client/ShardRouter.java | 97 ++++++++ .../clickhouse/sink/file/ClickhouseTable.java | 117 +++++++++ .../ArrayInjectFunction.java} | 37 +-- .../sink/inject/BigDecimalInjectFunction.java | 34 +++ .../inject/ClickhouseFieldInjectFunction.java | 46 ++++ .../sink/inject/DateInjectFunction.java | 39 +++ .../sink/inject/DateTimeInjectFunction.java | 39 +++ .../sink/inject/DoubleInjectFunction.java | 42 ++++ .../sink/inject/FloatInjectFunction.java | 39 +++ .../sink/inject/IntInjectFunction.java | 39 +++ .../sink/inject/LongInjectFunction.java | 37 +++ .../sink/inject/StringInjectFunction.java | 38 +++ .../seatunnel/clickhouse/tool/IntHolder.java | 35 +++ 25 files changed, 1760 insertions(+), 133 deletions(-) create mode 100644 seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/config/ReaderOption.java create mode 100644 seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/shard/Shard.java create mode 100644 seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/shard/ShardMetadata.java delete mode 100644 seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/ClickhouseSink.java create mode 100644 seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/DistributedEngine.java create mode 100644 seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseBatchStatement.java create mode 100644 seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseProxy.java create mode 100644 seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSink.java create mode 100644 seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSinkWriter.java create mode 100644 seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ShardRouter.java create mode 100644 seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseTable.java rename seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/{ClickhouseSinkWriter.java => inject/ArrayInjectFunction.java} (51%) create mode 100644 seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/BigDecimalInjectFunction.java create mode 100644 seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/ClickhouseFieldInjectFunction.java create mode 100644 seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/DateInjectFunction.java create mode 100644 seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/DateTimeInjectFunction.java create mode 100644 seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/DoubleInjectFunction.java create mode 100644 seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/FloatInjectFunction.java create mode 100644 seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/IntInjectFunction.java create mode 100644 seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/LongInjectFunction.java create mode 100644 seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/StringInjectFunction.java create mode 100644 seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/tool/IntHolder.java diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/SeaTunnelRowType.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/SeaTunnelRowType.java index b08d172f359..a1c20355260 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/SeaTunnelRowType.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/SeaTunnelRowType.java @@ -66,4 +66,13 @@ public String getFieldName(int index) { public SeaTunnelDataType getFieldType(int index) { return fieldTypes[index]; } + + public int indexOf(String fieldName) { + for (int i = 0; i < fieldNames.length; i++) { + if (fieldNames[i].equals(fieldName)) { + return i; + } + } + throw new IllegalArgumentException(String.format("can't find field %s", fieldName)); + } } diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/pom.xml b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/pom.xml index e3fb148aa00..3a1b9f9353c 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/pom.xml +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/pom.xml @@ -42,6 +42,13 @@ clickhouse-http-client 0.3.2-patch9 + + + com.clickhouse + clickhouse-jdbc + 0.3.2-patch9 + + \ No newline at end of file diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/config/Config.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/config/Config.java index ccc2dd9a7d2..f20f47071ba 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/config/Config.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/config/Config.java @@ -17,21 +17,95 @@ package org.apache.seatunnel.connectors.seatunnel.clickhouse.config; -/** - * The config of clickhouse - */ public class Config { - public static final String NODE_ADDRESS = "node_address"; + /** + * Bulk size of clickhouse jdbc + */ + public static final String BULK_SIZE = "bulk_size"; - public static final String DATABASE = "database"; + /** + * Clickhouse jdbc retry time + */ + public static final String RETRY = "retry"; + + /** + * Clickhouse fields + */ + public static final String FIELDS = "fields"; public static final String SQL = "sql"; + /** + * Clickhouse server host + */ + public static final String HOST = "host"; + + /** + * Clickhouse table name + */ + public static final String TABLE = "table"; + + /** + * Clickhouse database name + */ + public static final String DATABASE = "database"; + + /** + * Clickhouse server username + */ public static final String USERNAME = "username"; + /** + * Clickhouse server password + */ public static final String PASSWORD = "password"; - public static final String TABLE = "table"; + /** + * Split mode when table is distributed engine + */ + public static final String SPLIT_MODE = "split_mode"; + + /** + * When split_mode is true, the sharding_key use for split + */ + public static final String SHARDING_KEY = "sharding_key"; + + /** + * The retry code when use clickhouse jdbc + */ + public static final String RETRY_CODES = "retry_codes"; + + /** + * ClickhouseFile sink connector used clickhouse-local program's path + */ + public static final String CLICKHOUSE_LOCAL_PATH = "clickhouse_local_path"; + + /** + * The method of copy Clickhouse file + */ + public static final String COPY_METHOD = "copy_method"; + + /** + * The size of each batch read temporary data into local file. + */ + public static final String TMP_BATCH_CACHE_LINE = "tmp_batch_cache_line"; + + /** + * Clickhouse server node is free-password. + */ + public static final String NODE_FREE_PASSWORD = "node_free_password"; + + /** + * The password of Clickhouse server node + */ + public static final String NODE_PASS = "node_pass"; + + /** + * The address of Clickhouse server node + */ + public static final String NODE_ADDRESS = "node_address"; + + public static final String CLICKHOUSE_PREFIX = "clickhouse."; } diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/config/ReaderOption.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/config/ReaderOption.java new file mode 100644 index 00000000000..a7f453dffc1 --- /dev/null +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/config/ReaderOption.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.clickhouse.config; + +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.shard.ShardMetadata; + +import java.io.Serializable; +import java.util.List; +import java.util.Map; +import java.util.Properties; + +public class ReaderOption implements Serializable { + + private ShardMetadata shardMetadata; + private List retryCodes; + + private List fields; + + private Map tableSchema; + private SeaTunnelRowType seaTunnelRowType; + private Properties properties; + private int retry; + private int bulkSize; + + public ReaderOption(ShardMetadata shardMetadata, SeaTunnelRowType seaTunnelRowType, + Properties properties, List fields, + List retryCodes, Map tableSchema, int retry, int bulkSize) { + this.shardMetadata = shardMetadata; + this.properties = properties; + this.seaTunnelRowType = seaTunnelRowType; + this.fields = fields; + this.retryCodes = retryCodes; + this.tableSchema = tableSchema; + this.retry = retry; + this.bulkSize = bulkSize; + } + + public Properties getProperties() { + return properties; + } + + public void setProperties(Properties properties) { + this.properties = properties; + } + + public ShardMetadata getShardMetadata() { + return shardMetadata; + } + + public void setShardMetadata(ShardMetadata shardMetadata) { + this.shardMetadata = shardMetadata; + } + + public SeaTunnelRowType getSeaTunnelRowType() { + return seaTunnelRowType; + } + + public void setSeaTunnelRowType(SeaTunnelRowType seaTunnelRowType) { + this.seaTunnelRowType = seaTunnelRowType; + } + + public Map getTableSchema() { + return tableSchema; + } + + public void setTableSchema(Map tableSchema) { + this.tableSchema = tableSchema; + } + + public List getFields() { + return fields; + } + + public void setFields(List fields) { + this.fields = fields; + } + + public List getRetryCodes() { + return retryCodes; + } + + public void setRetryCodes(List retryCodes) { + this.retryCodes = retryCodes; + } + + public int getRetry() { + return retry; + } + + public void setRetry(int retry) { + this.retry = retry; + } + + public int getBulkSize() { + return bulkSize; + } + + public void setBulkSize(int bulkSize) { + this.bulkSize = bulkSize; + } +} diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/shard/Shard.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/shard/Shard.java new file mode 100644 index 00000000000..d99c5968d57 --- /dev/null +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/shard/Shard.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.clickhouse.shard; + +import com.clickhouse.client.ClickHouseCredentials; +import com.clickhouse.client.ClickHouseNode; + +import java.io.Serializable; +import java.net.InetSocketAddress; +import java.util.Objects; + +public class Shard implements Serializable { + private static final long serialVersionUID = -1L; + + private final int shardNum; + private final int replicaNum; + + private final ClickHouseNode node; + + // cache the hash code + private int hashCode = -1; + + public Shard(int shardNum, + int shardWeight, + int replicaNum, + String hostname, + String hostAddress, + int port, + String database, + String username, + String password) { + this.shardNum = shardNum; + this.replicaNum = replicaNum; + this.node = ClickHouseNode.builder().host(hostname).address(InetSocketAddress.createUnresolved(hostAddress, + port)).database(database).weight(shardWeight).credentials(ClickHouseCredentials.fromUserAndPassword(username, password)).build(); + } + + public Shard(int shardNum, int replicaNum, ClickHouseNode node) { + this.shardNum = shardNum; + this.replicaNum = replicaNum; + this.node = node; + } + + public int getShardNum() { + return shardNum; + } + + public int getReplicaNum() { + return replicaNum; + } + + public ClickHouseNode getNode() { + return node; + } + + public String getJdbcUrl() { + return "jdbc:clickhouse://" + node.getAddress().getAddress().getHostAddress() + + ":" + node.getAddress().getPort() + "/" + node.getDatabase().get(); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + Shard shard = (Shard) o; + return shardNum == shard.shardNum + && replicaNum == shard.replicaNum + && hashCode == shard.hashCode + && Objects.equals(node, shard.node); + } + + @Override + public int hashCode() { + if (hashCode == -1) { + hashCode = Objects.hash(shardNum, replicaNum, node, hashCode); + } + return hashCode; + } +} diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/shard/ShardMetadata.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/shard/ShardMetadata.java new file mode 100644 index 00000000000..3c01922f1be --- /dev/null +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/shard/ShardMetadata.java @@ -0,0 +1,145 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.clickhouse.shard; + +import java.io.Serializable; +import java.util.Objects; + +public class ShardMetadata implements Serializable { + + private static final long serialVersionUID = -1L; + + private String shardKey; + private String shardKeyType; + private String database; + private String table; + private boolean splitMode; + private Shard defaultShard; + private String username; + private String password; + + public ShardMetadata(String shardKey, + String shardKeyType, + String database, + String table, + boolean splitMode, + Shard defaultShard, + String username, + String password) { + this.shardKey = shardKey; + this.shardKeyType = shardKeyType; + this.database = database; + this.table = table; + this.splitMode = splitMode; + this.defaultShard = defaultShard; + this.username = username; + this.password = password; + } + + public String getShardKey() { + return shardKey; + } + + public void setShardKey(String shardKey) { + this.shardKey = shardKey; + } + + public String getShardKeyType() { + return shardKeyType; + } + + public void setShardKeyType(String shardKeyType) { + this.shardKeyType = shardKeyType; + } + + public String getDatabase() { + return database; + } + + public void setDatabase(String database) { + this.database = database; + } + + public String getTable() { + return table; + } + + public void setTable(String table) { + this.table = table; + } + + public boolean getSplitMode() { + return splitMode; + } + + public void setSplitMode(boolean splitMode) { + this.splitMode = splitMode; + } + + public Shard getDefaultShard() { + return defaultShard; + } + + public void setDefaultShard(Shard defaultShard) { + this.defaultShard = defaultShard; + } + + public boolean isSplitMode() { + return splitMode; + } + + public String getUsername() { + return username; + } + + public void setUsername(String username) { + this.username = username; + } + + public String getPassword() { + return password; + } + + public void setPassword(String password) { + this.password = password; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + ShardMetadata that = (ShardMetadata) o; + return splitMode == that.splitMode + && Objects.equals(shardKey, that.shardKey) + && Objects.equals(shardKeyType, that.shardKeyType) + && Objects.equals(database, that.database) + && Objects.equals(table, that.table) + && Objects.equals(defaultShard, that.defaultShard) + && Objects.equals(username, that.username) + && Objects.equals(password, that.password); + } + + @Override + public int hashCode() { + return Objects.hash(shardKey, shardKeyType, database, table, splitMode, defaultShard, username, password); + } +} diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/ClickhouseSink.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/ClickhouseSink.java deleted file mode 100644 index 4b47d9f7595..00000000000 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/ClickhouseSink.java +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.clickhouse.sink; - -import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.DATABASE; -import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.NODE_ADDRESS; -import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.PASSWORD; -import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.TABLE; -import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.USERNAME; - -import org.apache.seatunnel.api.common.PrepareFailException; -import org.apache.seatunnel.api.common.SeaTunnelContext; -import org.apache.seatunnel.api.serialization.DefaultSerializer; -import org.apache.seatunnel.api.serialization.Serializer; -import org.apache.seatunnel.api.sink.SeaTunnelSink; -import org.apache.seatunnel.api.sink.SinkWriter; -import org.apache.seatunnel.api.table.type.SeaTunnelRow; -import org.apache.seatunnel.common.config.CheckConfigUtil; -import org.apache.seatunnel.common.config.CheckResult; -import org.apache.seatunnel.common.constants.PluginType; -import org.apache.seatunnel.connectors.seatunnel.clickhouse.state.CKAggCommitInfo; -import org.apache.seatunnel.connectors.seatunnel.clickhouse.state.CKCommitInfo; -import org.apache.seatunnel.connectors.seatunnel.clickhouse.state.ClickhouseSinkState; -import org.apache.seatunnel.connectors.seatunnel.clickhouse.util.ClickhouseUtil; - -import org.apache.seatunnel.shade.com.typesafe.config.Config; - -import com.clickhouse.client.ClickHouseNode; -import com.google.auto.service.AutoService; - -import java.io.IOException; -import java.util.List; -import java.util.Optional; - -@AutoService(SeaTunnelSink.class) -public class ClickhouseSink implements SeaTunnelSink { - - private SeaTunnelContext seaTunnelContext; - - private List servers; - - private String table; - - @Override - public String getPluginName() { - return "Clickhouse"; - } - - @Override - public void prepare(Config config) throws PrepareFailException { - CheckResult result = CheckConfigUtil.checkAllExists(config, NODE_ADDRESS, DATABASE, TABLE, USERNAME, PASSWORD); - if (!result.isSuccess()) { - throw new PrepareFailException(getPluginName(), PluginType.SINK, result.getMsg()); - } - servers = ClickhouseUtil.createNodes(config.getString(NODE_ADDRESS), config.getString(DATABASE), - config.getString(USERNAME), config.getString(PASSWORD)); - table = config.getString(TABLE); - - } - - @Override - public SinkWriter createWriter(SinkWriter.Context context) throws IOException { - return new ClickhouseSinkWriter(context); - } - - @Override - public SinkWriter restoreWriter(SinkWriter.Context context, List states) throws IOException { - return SeaTunnelSink.super.restoreWriter(context, states); - } - - @Override - public Optional> getWriterStateSerializer() { - return Optional.of(new DefaultSerializer<>()); - } - - @Override - public SeaTunnelContext getSeaTunnelContext() { - return seaTunnelContext; - } - - @Override - public void setSeaTunnelContext(SeaTunnelContext seaTunnelContext) { - this.seaTunnelContext = seaTunnelContext; - } -} diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/DistributedEngine.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/DistributedEngine.java new file mode 100644 index 00000000000..6a15d591977 --- /dev/null +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/DistributedEngine.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.clickhouse.sink; + +import java.io.Serializable; + +public class DistributedEngine implements Serializable { + + private static final long serialVersionUID = -1L; + private String clusterName; + private String database; + private String table; + + public DistributedEngine(String clusterName, String database, String table) { + this.clusterName = clusterName; + this.database = database; + this.table = table; + } + + public String getClusterName() { + return clusterName; + } + + public void setClusterName(String clusterName) { + this.clusterName = clusterName; + } + + public String getDatabase() { + return database; + } + + public void setDatabase(String database) { + this.database = database; + } + + public String getTable() { + return table; + } + + public void setTable(String table) { + this.table = table; + } +} diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseBatchStatement.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseBatchStatement.java new file mode 100644 index 00000000000..d0574444351 --- /dev/null +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseBatchStatement.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.client; + +import org.apache.seatunnel.connectors.seatunnel.clickhouse.tool.IntHolder; + +import ru.yandex.clickhouse.ClickHouseConnectionImpl; +import ru.yandex.clickhouse.ClickHousePreparedStatementImpl; + +public class ClickhouseBatchStatement { + + private final ClickHouseConnectionImpl clickHouseConnection; + private final ClickHousePreparedStatementImpl preparedStatement; + private final IntHolder intHolder; + + public ClickhouseBatchStatement(ClickHouseConnectionImpl clickHouseConnection, + ClickHousePreparedStatementImpl preparedStatement, + IntHolder intHolder) { + this.clickHouseConnection = clickHouseConnection; + this.preparedStatement = preparedStatement; + this.intHolder = intHolder; + } + + public ClickHouseConnectionImpl getClickHouseConnection() { + return clickHouseConnection; + } + + public ClickHousePreparedStatementImpl getPreparedStatement() { + return preparedStatement; + } + + public IntHolder getIntHolder() { + return intHolder; + } + +} diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseProxy.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseProxy.java new file mode 100644 index 00000000000..73c3770184f --- /dev/null +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseProxy.java @@ -0,0 +1,186 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.client; + +import org.apache.seatunnel.common.utils.JsonUtils; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.shard.Shard; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.DistributedEngine; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.file.ClickhouseTable; + +import com.clickhouse.client.ClickHouseClient; +import com.clickhouse.client.ClickHouseException; +import com.clickhouse.client.ClickHouseFormat; +import com.clickhouse.client.ClickHouseNode; +import com.clickhouse.client.ClickHouseRecord; +import com.clickhouse.client.ClickHouseRequest; +import com.clickhouse.client.ClickHouseResponse; +import com.fasterxml.jackson.core.type.TypeReference; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.stream.Collectors; + +@SuppressWarnings("magicnumber") +public class ClickhouseProxy { + + private final ClickHouseRequest clickhouseRequest; + private final ClickHouseClient client; + + private Map shardToDataSource = new ConcurrentHashMap<>(16); + + public ClickhouseProxy(ClickHouseNode node) { + this.client = ClickHouseClient.newInstance(node.getProtocol()); + this.clickhouseRequest = + client.connect(node).format(ClickHouseFormat.RowBinaryWithNamesAndTypes); + + } + + public ClickHouseRequest getClickhouseConnection() { + return this.clickhouseRequest; + } + + public ClickHouseRequest getClickhouseConnection(Shard shard) { + ClickHouseClient c = shardToDataSource.computeIfAbsent(shard, + s -> ClickHouseClient.newInstance(s.getNode().getProtocol())); + return c.connect(shard.getNode()).format(ClickHouseFormat.RowBinaryWithNamesAndTypes); + } + + public DistributedEngine getClickhouseDistributedTable(String database, String table) { + ClickHouseRequest request = getClickhouseConnection(); + return getClickhouseDistributedTable(request, database, table); + } + + public DistributedEngine getClickhouseDistributedTable(ClickHouseRequest connection, String database, + String table) { + String sql = String.format("select engine_full from system.tables where database = '%s' and name = '%s' and engine = 'Distributed'", database, table); + try (ClickHouseResponse response = connection.query(sql).executeAndWait()) { + ClickHouseRecord record = response.firstRecord(); + if (record != null) { + // engineFull field will be like : Distributed(cluster, database, table[, sharding_key[, policy_name]]) + String engineFull = record.getValue(1).asString(); + List infos = Arrays.stream(engineFull.substring(12).split(",")) + .map(s -> s.replace("'", "").trim()).collect(Collectors.toList()); + return new DistributedEngine(infos.get(0), infos.get(1), infos.get(2).replace("\\)", "").trim()); + } + throw new RuntimeException("Cannot get distributed table from clickhouse, resultSet is empty"); + } catch (ClickHouseException e) { + throw new RuntimeException("Cannot get distributed table from clickhouse", e); + } + } + + /** + * Get ClickHouse table schema, the key is fileName, value is value type. + * + * @param table table name. + * @return schema map. + */ + public Map getClickhouseTableSchema(String table) { + ClickHouseRequest request = getClickhouseConnection(); + return getClickhouseTableSchema(request, table); + } + + public Map getClickhouseTableSchema(ClickHouseRequest request, String table) { + String sql = "desc " + table; + Map schema = new LinkedHashMap<>(); + try (ClickHouseResponse response = request.query(sql).executeAndWait()) { + response.records().forEach(r -> schema.put(r.getValue(1).asString(), r.getValue(2).asString())); + } catch (ClickHouseException e) { + throw new RuntimeException("Cannot get table schema from clickhouse", e); + } + return schema; + } + + /** + * Get the shard of the given cluster. + * + * @param connection clickhouse connection. + * @param clusterName cluster name. + * @param database database of the shard. + * @param port port of the shard. + * @return shard list. + */ + public List getClusterShardList(ClickHouseRequest connection, String clusterName, + String database, int port, String username, String password) { + String sql = "select shard_num,shard_weight,replica_num,host_name,host_address,port from system.clusters where cluster = '" + clusterName + "'"; + List shardList = new ArrayList<>(); + try (ClickHouseResponse response = connection.query(sql).executeAndWait()) { + response.records().forEach(r -> { + shardList.add(new Shard( + r.getValue(1).asInteger(), + r.getValue(2).asInteger(), + r.getValue(3).asInteger(), + r.getValue(4).asString(), + r.getValue(5).asString(), + port, database, username, password)); + }); + return shardList; + } catch (ClickHouseException e) { + throw new RuntimeException("Cannot get cluster shard list from clickhouse", e); + } + } + + /** + * Get ClickHouse table info. + * + * @param database database of the table. + * @param table table name of the table. + * @return clickhouse table info. + */ + public ClickhouseTable getClickhouseTable(String database, String table) { + String sql = String.format("select engine,create_table_query,engine_full,data_paths from system.tables where database = '%s' and name = '%s'", database, table); + try (ClickHouseResponse response = clickhouseRequest.query(sql).executeAndWait()) { + if (!response.stream().findAny().isPresent()) { + throw new RuntimeException("Cannot get table from clickhouse, resultSet is empty"); + } + ClickHouseRecord record = response.firstRecord(); + String engine = record.getValue(1).asString(); + String createTableDDL = record.getValue(2).asString(); + String engineFull = record.getValue(3).asString(); + List dataPaths = JsonUtils.parseObject(record.getValue(4).asString().replaceAll("'", "\""), + new TypeReference>() { + }); + DistributedEngine distributedEngine = null; + if ("Distributed".equals(engine)) { + distributedEngine = getClickhouseDistributedTable(clickhouseRequest, database, table); + } + return new ClickhouseTable( + database, + table, + distributedEngine, + engine, + createTableDDL, + engineFull, + dataPaths, + getClickhouseTableSchema(clickhouseRequest, table)); + } catch (ClickHouseException e) { + throw new RuntimeException("Cannot get clickhouse table", e); + } + + } + + public void close() { + if (this.client != null) { + this.client.close(); + } + shardToDataSource.values().forEach(ClickHouseClient::close); + } +} diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSink.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSink.java new file mode 100644 index 00000000000..730488d0739 --- /dev/null +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSink.java @@ -0,0 +1,166 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.client; + +import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.BULK_SIZE; +import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.CLICKHOUSE_PREFIX; +import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.DATABASE; +import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.FIELDS; +import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.NODE_ADDRESS; +import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.PASSWORD; +import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.RETRY; +import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.RETRY_CODES; +import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.SHARDING_KEY; +import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.SPLIT_MODE; +import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.TABLE; +import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.USERNAME; + +import org.apache.seatunnel.api.common.PrepareFailException; +import org.apache.seatunnel.api.common.SeaTunnelContext; +import org.apache.seatunnel.api.serialization.DefaultSerializer; +import org.apache.seatunnel.api.serialization.Serializer; +import org.apache.seatunnel.api.sink.SeaTunnelSink; +import org.apache.seatunnel.api.sink.SinkWriter; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.common.config.CheckConfigUtil; +import org.apache.seatunnel.common.config.CheckResult; +import org.apache.seatunnel.common.config.TypesafeConfigUtils; +import org.apache.seatunnel.common.constants.PluginType; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.config.ReaderOption; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.shard.Shard; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.shard.ShardMetadata; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.state.CKAggCommitInfo; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.state.CKCommitInfo; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.state.ClickhouseSinkState; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.util.ClickhouseUtil; + +import org.apache.seatunnel.shade.com.typesafe.config.Config; +import org.apache.seatunnel.shade.com.typesafe.config.ConfigFactory; + +import com.clickhouse.client.ClickHouseNode; +import com.clickhouse.client.ClickHouseRequest; +import com.google.auto.service.AutoService; +import com.google.common.collect.ImmutableMap; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Properties; + +@AutoService(SeaTunnelSink.class) +public class ClickhouseSink implements SeaTunnelSink { + + private SeaTunnelContext seaTunnelContext; + private SeaTunnelRowType seaTunnelRowType; + private ReaderOption option; + + @Override + public String getPluginName() { + return "Clickhouse"; + } + + @SuppressWarnings("checkstyle:MagicNumber") + @Override + public void prepare(Config config) throws PrepareFailException { + CheckResult result = CheckConfigUtil.checkAllExists(config, NODE_ADDRESS, DATABASE, TABLE, USERNAME, PASSWORD); + if (!result.isSuccess()) { + throw new PrepareFailException(getPluginName(), PluginType.SINK, result.getMsg()); + } + Map defaultConfig = ImmutableMap.builder() + .put(BULK_SIZE, 20_000) + .put(RETRY_CODES, new ArrayList<>()) + .put(RETRY, 1) + .put(SPLIT_MODE, false) + .build(); + + config = config.withFallback(ConfigFactory.parseMap(defaultConfig)); + + List nodes = ClickhouseUtil.createNodes(config.getString(NODE_ADDRESS), + config.getString(DATABASE), config.getString(USERNAME), config.getString(PASSWORD)); + + Properties clickhouseProperties = new Properties(); + if (TypesafeConfigUtils.hasSubConfig(config, CLICKHOUSE_PREFIX)) { + TypesafeConfigUtils.extractSubConfig(config, CLICKHOUSE_PREFIX, false).entrySet().forEach(e -> { + clickhouseProperties.put(e.getKey(), String.valueOf(e.getValue().unwrapped())); + }); + } + clickhouseProperties.put("user", config.getString(USERNAME)); + clickhouseProperties.put("password", config.getString(PASSWORD)); + + ClickhouseProxy proxy = new ClickhouseProxy(nodes.get(0)); + ClickHouseRequest request = proxy.getClickhouseConnection(); + Map tableSchema = proxy.getClickhouseTableSchema(request, config.getString(TABLE)); + String shardKey = TypesafeConfigUtils.getConfig(config, SHARDING_KEY, ""); + String shardKeyType = tableSchema.get(shardKey); + + ShardMetadata metadata = new ShardMetadata( + shardKey, + shardKeyType, + config.getString(DATABASE), + config.getString(TABLE), + config.getBoolean(SPLIT_MODE), + new Shard(1, 1, nodes.get(0)), config.getString(USERNAME), config.getString(PASSWORD)); + List fields = new ArrayList<>(); + if (config.hasPath(FIELDS)) { + fields.addAll(config.getStringList(FIELDS)); + // check if the fields exist in schema + for (String field : fields) { + if (!tableSchema.containsKey(field)) { + throw new RuntimeException("Field " + field + " does not exist in table " + config.getString(TABLE)); + } + } + } else { + fields.addAll(tableSchema.keySet()); + } + this.option = new ReaderOption(metadata, seaTunnelRowType, clickhouseProperties, fields, + config.getIntList(RETRY_CODES), tableSchema, config.getInt(RETRY), config.getInt(BULK_SIZE)); + } + + @Override + public SinkWriter createWriter(SinkWriter.Context context) throws IOException { + return new ClickhouseSinkWriter(option, context); + } + + @Override + public SinkWriter restoreWriter(SinkWriter.Context context, List states) throws IOException { + return SeaTunnelSink.super.restoreWriter(context, states); + } + + @Override + public Optional> getWriterStateSerializer() { + return Optional.of(new DefaultSerializer<>()); + } + + @Override + public void setTypeInfo(SeaTunnelRowType seaTunnelRowType) { + this.seaTunnelRowType = seaTunnelRowType; + } + + @Override + public SeaTunnelContext getSeaTunnelContext() { + return seaTunnelContext; + } + + @Override + public void setSeaTunnelContext(SeaTunnelContext seaTunnelContext) { + this.seaTunnelContext = seaTunnelContext; + } +} diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSinkWriter.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSinkWriter.java new file mode 100644 index 00000000000..dda07255d90 --- /dev/null +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSinkWriter.java @@ -0,0 +1,231 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.client; + +import org.apache.seatunnel.api.sink.SinkWriter; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.common.config.Common; +import org.apache.seatunnel.common.utils.RetryUtils; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.config.ReaderOption; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.shard.Shard; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.inject.ArrayInjectFunction; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.inject.BigDecimalInjectFunction; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.inject.ClickhouseFieldInjectFunction; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.inject.DateInjectFunction; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.inject.DateTimeInjectFunction; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.inject.DoubleInjectFunction; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.inject.FloatInjectFunction; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.inject.IntInjectFunction; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.inject.LongInjectFunction; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.inject.StringInjectFunction; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.state.CKCommitInfo; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.state.ClickhouseSinkState; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.tool.IntHolder; + +import com.google.common.collect.Lists; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import ru.yandex.clickhouse.BalancedClickhouseDataSource; +import ru.yandex.clickhouse.ClickHouseConnectionImpl; +import ru.yandex.clickhouse.ClickHousePreparedStatementImpl; +import ru.yandex.clickhouse.ClickHouseStatement; + +import java.io.IOException; +import java.sql.SQLException; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +public class ClickhouseSinkWriter implements SinkWriter { + + private static final Logger LOGGER = LoggerFactory.getLogger(ClickhouseSinkWriter.class); + + private final SinkWriter.Context context; + private final ReaderOption option; + private transient RetryUtils.RetryMaterial retryMaterial; + private final ShardRouter shardRouter; + private final ClickhouseProxy proxy; + private final String prepareSql; + private final Map statementMap; + private final Map fieldInjectFunctionMap; + private static final ClickhouseFieldInjectFunction DEFAULT_INJECT_FUNCTION = new StringInjectFunction(); + + ClickhouseSinkWriter(ReaderOption option, SinkWriter.Context context) { + this.option = option; + this.context = context; + + retryMaterial = new RetryUtils.RetryMaterial(option.getRetry(), true, exception -> { + if (exception instanceof SQLException) { + SQLException sqlException = (SQLException) exception; + return option.getRetryCodes().contains(sqlException.getErrorCode()); + } + return false; + }); + + this.proxy = new ClickhouseProxy(option.getShardMetadata().getDefaultShard().getNode()); + this.fieldInjectFunctionMap = initFieldInjectFunctionMap(); + this.shardRouter = new ShardRouter(proxy, option.getShardMetadata()); + this.prepareSql = initPrepareSQL(); + this.statementMap = initStatementMap(); + } + + @Override + public void write(SeaTunnelRow element) throws IOException { + + Object shardKey = null; + if (StringUtils.isNotEmpty(this.option.getShardMetadata().getShardKey())) { + int i = this.option.getSeaTunnelRowType().indexOf(this.option.getShardMetadata().getShardKey()); + shardKey = element.getField(i); + } + ClickhouseBatchStatement statement = statementMap.get(shardRouter.getShard(shardKey)); + ClickHousePreparedStatementImpl clickHouseStatement = statement.getPreparedStatement(); + IntHolder sizeHolder = statement.getIntHolder(); + // add into batch + addIntoBatch(element, clickHouseStatement); + sizeHolder.setValue(sizeHolder.getValue() + 1); + // flush batch + if (sizeHolder.getValue() >= option.getBulkSize()) { + flush(clickHouseStatement); + sizeHolder.setValue(0); + } + } + + @Override + public Optional prepareCommit() throws IOException { + return Optional.empty(); + } + + @Override + public void abortPrepare() { + + } + + @Override + public void close() throws IOException { + this.proxy.close(); + for (ClickhouseBatchStatement batchStatement : statementMap.values()) { + try (ClickHouseConnectionImpl needClosedConnection = batchStatement.getClickHouseConnection(); + ClickHousePreparedStatementImpl needClosedStatement = batchStatement.getPreparedStatement()) { + IntHolder intHolder = batchStatement.getIntHolder(); + if (intHolder.getValue() > 0) { + flush(needClosedStatement); + intHolder.setValue(0); + } + } catch (SQLException e) { + throw new RuntimeException("Failed to close prepared statement.", e); + } + } + } + + private void addIntoBatch(SeaTunnelRow row, ClickHousePreparedStatementImpl clickHouseStatement) { + try { + for (int i = 0; i < option.getFields().size(); i++) { + String fieldName = option.getFields().get(i); + Object fieldValue = row.getField(option.getSeaTunnelRowType().indexOf(fieldName)); + if (fieldValue == null) { + // field does not exist in row + // todo: do we need to transform to default value of each type + clickHouseStatement.setObject(i + 1, null); + continue; + } + String fieldType = option.getTableSchema().get(fieldName); + fieldInjectFunctionMap + .getOrDefault(fieldType, DEFAULT_INJECT_FUNCTION) + .injectFields(clickHouseStatement, i + 1, fieldValue); + } + clickHouseStatement.addBatch(); + } catch (SQLException e) { + throw new RuntimeException("Add row data into batch error", e); + } + } + + private void flush(ClickHouseStatement clickHouseStatement) { + RetryUtils.Execution execution = () -> { + clickHouseStatement.executeBatch(); + return null; + }; + try { + RetryUtils.retryWithException(execution, retryMaterial); + } catch (Exception e) { + throw new RuntimeException("Clickhouse execute batch statement error", e); + } + } + + private Map initStatementMap() { + Map result = new HashMap<>(Common.COLLECTION_SIZE); + shardRouter.getShards().forEach((weight, s) -> { + try { + ClickHouseConnectionImpl clickhouseConnection = + (ClickHouseConnectionImpl) new BalancedClickhouseDataSource(s.getJdbcUrl(), + this.option.getProperties()).getConnection(); + ClickHousePreparedStatementImpl preparedStatement = + (ClickHousePreparedStatementImpl) clickhouseConnection.prepareStatement(prepareSql); + IntHolder intHolder = new IntHolder(); + ClickhouseBatchStatement batchStatement = + new ClickhouseBatchStatement(clickhouseConnection, preparedStatement, intHolder); + result.put(s, batchStatement); + } catch (SQLException e) { + throw new RuntimeException("Clickhouse prepare statement error", e); + } + }); + return result; + } + + private String initPrepareSQL() { + String[] placeholder = new String[option.getFields().size()]; + Arrays.fill(placeholder, "?"); + + return String.format("INSERT INTO %s (%s) VALUES (%s)", + shardRouter.getShardTable(), + String.join(",", option.getFields()), + String.join(",", placeholder)); + } + + private Map initFieldInjectFunctionMap() { + Map result = new HashMap<>(Common.COLLECTION_SIZE); + List clickhouseFieldInjectFunctions = Lists.newArrayList( + new ArrayInjectFunction(), + new BigDecimalInjectFunction(), + new DateInjectFunction(), + new DateTimeInjectFunction(), + new DoubleInjectFunction(), + new FloatInjectFunction(), + new IntInjectFunction(), + new LongInjectFunction(), + new StringInjectFunction() + ); + ClickhouseFieldInjectFunction defaultFunction = new StringInjectFunction(); + // get field type + for (String field : this.option.getFields()) { + ClickhouseFieldInjectFunction function = defaultFunction; + String fieldType = this.option.getTableSchema().get(field); + for (ClickhouseFieldInjectFunction clickhouseFieldInjectFunction : clickhouseFieldInjectFunctions) { + if (clickhouseFieldInjectFunction.isCurrentFieldType(fieldType)) { + function = clickhouseFieldInjectFunction; + break; + } + } + result.put(field, function); + } + return result; + } + +} diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ShardRouter.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ShardRouter.java new file mode 100644 index 00000000000..ee4681d1774 --- /dev/null +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ShardRouter.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.client; + +import org.apache.seatunnel.connectors.seatunnel.clickhouse.shard.Shard; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.shard.ShardMetadata; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.DistributedEngine; + +import com.clickhouse.client.ClickHouseRequest; +import net.jpountz.xxhash.XXHash64; +import net.jpountz.xxhash.XXHashFactory; +import org.apache.commons.lang3.StringUtils; + +import java.io.Serializable; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.util.List; +import java.util.TreeMap; +import java.util.concurrent.ThreadLocalRandom; + +public class ShardRouter implements Serializable { + + private static final long serialVersionUID = -1L; + + private String shardTable; + private final String table; + private int shardWeightCount; + private final TreeMap shards; + private final String shardKey; + private final String shardKeyType; + private final boolean splitMode; + + private final XXHash64 hashInstance = XXHashFactory.fastestInstance().hash64(); + private final ThreadLocalRandom threadLocalRandom = ThreadLocalRandom.current(); + + public ShardRouter(ClickhouseProxy proxy, ShardMetadata shardMetadata) { + this.shards = new TreeMap<>(); + this.shardKey = shardMetadata.getShardKey(); + this.shardKeyType = shardMetadata.getShardKeyType(); + this.splitMode = shardMetadata.getSplitMode(); + this.table = shardMetadata.getTable(); + if (StringUtils.isNotEmpty(shardKey) && StringUtils.isEmpty(shardKeyType)) { + throw new IllegalArgumentException("Shard key " + shardKey + " not found in table " + table); + } + ClickHouseRequest connection = proxy.getClickhouseConnection(); + if (splitMode) { + DistributedEngine localTable = proxy.getClickhouseDistributedTable(connection, shardMetadata.getDatabase(), table); + this.shardTable = localTable.getTable(); + List shardList = proxy.getClusterShardList(connection, localTable.getClusterName(), + localTable.getDatabase(), shardMetadata.getDefaultShard().getNode().getPort(), + shardMetadata.getUsername(), shardMetadata.getPassword()); + int weight = 0; + for (Shard shard : shardList) { + shards.put(weight, shard); + weight += shard.getNode().getWeight(); + } + shardWeightCount = weight; + } else { + shards.put(0, shardMetadata.getDefaultShard()); + } + } + + public String getShardTable() { + return splitMode ? shardTable : table; + } + + public Shard getShard(Object shardValue) { + if (!splitMode) { + return shards.firstEntry().getValue(); + } + if (StringUtils.isEmpty(shardKey) || shardValue == null) { + return shards.lowerEntry(threadLocalRandom.nextInt(shardWeightCount + 1)).getValue(); + } + int offset = (int) (hashInstance.hash(ByteBuffer.wrap(shardValue.toString().getBytes(StandardCharsets.UTF_8)), + 0) & Long.MAX_VALUE % shardWeightCount); + return shards.lowerEntry(offset + 1).getValue(); + } + + public TreeMap getShards() { + return shards; + } +} diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseTable.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseTable.java new file mode 100644 index 00000000000..a6c8e0fe015 --- /dev/null +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseTable.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.file; + +import org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.DistributedEngine; + +import java.util.List; +import java.util.Map; + +public class ClickhouseTable { + + private String database; + private String tableName; + private String engine; + private String engineFull; + private String createTableDDL; + private List dataPaths; + private final DistributedEngine distributedEngine; + private Map tableSchema; + + public ClickhouseTable(String database, + String tableName, + DistributedEngine distributedEngine, + String engine, + String createTableDDL, + String engineFull, + List dataPaths, + Map tableSchema) { + this.database = database; + this.tableName = tableName; + this.distributedEngine = distributedEngine; + this.engine = engine; + this.engineFull = engineFull; + this.createTableDDL = createTableDDL; + this.dataPaths = dataPaths; + this.tableSchema = tableSchema; + } + + public String getTableName() { + return tableName; + } + + public void setTableName(String tableName) { + this.tableName = tableName; + } + + public String getDatabase() { + return database; + } + + public void setDatabase(String database) { + this.database = database; + } + + public String getEngine() { + return engine; + } + + public void setEngine(String engine) { + this.engine = engine; + } + + public String getEngineFull() { + return engineFull; + } + + public void setEngineFull(String engineFull) { + this.engineFull = engineFull; + } + + public String getCreateTableDDL() { + return createTableDDL; + } + + public void setCreateTableDDL(String createTableDDL) { + this.createTableDDL = createTableDDL; + } + + public List getDataPaths() { + return dataPaths; + } + + public void setDataPaths(List dataPaths) { + this.dataPaths = dataPaths; + } + + public Map getTableSchema() { + return tableSchema; + } + + public void setTableSchema(Map tableSchema) { + this.tableSchema = tableSchema; + } + + public String getLocalTableName() { + if (distributedEngine != null) { + return distributedEngine.getTable(); + } else { + return tableName; + } + } +} diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/ClickhouseSinkWriter.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/ArrayInjectFunction.java similarity index 51% rename from seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/ClickhouseSinkWriter.java rename to seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/ArrayInjectFunction.java index d7208d4c161..63648b3d039 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/ClickhouseSinkWriter.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/ArrayInjectFunction.java @@ -15,41 +15,24 @@ * limitations under the License. */ -package org.apache.seatunnel.connectors.seatunnel.clickhouse.sink; +package org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.inject; -import org.apache.seatunnel.api.sink.SinkWriter; -import org.apache.seatunnel.api.table.type.SeaTunnelRow; -import org.apache.seatunnel.connectors.seatunnel.clickhouse.state.CKCommitInfo; -import org.apache.seatunnel.connectors.seatunnel.clickhouse.state.ClickhouseSinkState; +import ru.yandex.clickhouse.ClickHousePreparedStatementImpl; -import java.io.IOException; -import java.util.Optional; +import java.sql.SQLException; +import java.util.regex.Pattern; -public class ClickhouseSinkWriter implements SinkWriter { +public class ArrayInjectFunction implements ClickhouseFieldInjectFunction { - private SinkWriter.Context context; - - ClickhouseSinkWriter(SinkWriter.Context context) { - this.context = context; - } - - @Override - public void write(SeaTunnelRow element) throws IOException { - - } + private static final Pattern PATTERN = Pattern.compile("(Array.*)"); @Override - public Optional prepareCommit() throws IOException { - return Optional.empty(); + public void injectFields(ClickHousePreparedStatementImpl statement, int index, Object value) throws SQLException { + statement.setArray(index, (java.sql.Array) value); } @Override - public void abortPrepare() { - - } - - @Override - public void close() throws IOException { - + public boolean isCurrentFieldType(String fieldType) { + return PATTERN.matcher(fieldType).matches(); } } diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/BigDecimalInjectFunction.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/BigDecimalInjectFunction.java new file mode 100644 index 00000000000..1ae0cb69f1e --- /dev/null +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/BigDecimalInjectFunction.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.inject; + +import ru.yandex.clickhouse.ClickHousePreparedStatementImpl; + +import java.sql.SQLException; + +public class BigDecimalInjectFunction implements ClickhouseFieldInjectFunction { + @Override + public void injectFields(ClickHousePreparedStatementImpl statement, int index, Object value) throws SQLException { + statement.setBigDecimal(index, (java.math.BigDecimal) value); + } + + @Override + public boolean isCurrentFieldType(String fieldType) { + return "Decimal".equals(fieldType); + } +} diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/ClickhouseFieldInjectFunction.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/ClickhouseFieldInjectFunction.java new file mode 100644 index 00000000000..ae4625cd22f --- /dev/null +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/ClickhouseFieldInjectFunction.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.inject; + +import ru.yandex.clickhouse.ClickHousePreparedStatementImpl; + +import java.sql.SQLException; + +/** + * Injects a field into a ClickHouse statement, used to transform a java type into a ClickHouse type. + */ +public interface ClickhouseFieldInjectFunction { + + /** + * Inject the value into the statement. + * + * @param statement statement to inject into + * @param value value to inject + * @param index index in the statement + */ + void injectFields(ClickHousePreparedStatementImpl statement, int index, Object value) throws SQLException; + + /** + * If the fieldType need to be injected by the current function. + * + * @param fieldType field type to inject + * @return true if the fieldType need to be injected by the current function + */ + boolean isCurrentFieldType(String fieldType); + +} diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/DateInjectFunction.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/DateInjectFunction.java new file mode 100644 index 00000000000..2f4ced88fbf --- /dev/null +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/DateInjectFunction.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.inject; + +import ru.yandex.clickhouse.ClickHousePreparedStatementImpl; + +import java.sql.Date; +import java.sql.SQLException; + +public class DateInjectFunction implements ClickhouseFieldInjectFunction { + @Override + public void injectFields(ClickHousePreparedStatementImpl statement, int index, Object value) throws SQLException { + if (value instanceof Date) { + statement.setDate(index, (Date) value); + } else { + statement.setDate(index, Date.valueOf(value.toString())); + } + } + + @Override + public boolean isCurrentFieldType(String fieldType) { + return "Date".equals(fieldType); + } +} diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/DateTimeInjectFunction.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/DateTimeInjectFunction.java new file mode 100644 index 00000000000..76acd7b6306 --- /dev/null +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/DateTimeInjectFunction.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.inject; + +import ru.yandex.clickhouse.ClickHousePreparedStatementImpl; + +import java.sql.SQLException; +import java.sql.Timestamp; + +public class DateTimeInjectFunction implements ClickhouseFieldInjectFunction { + @Override + public void injectFields(ClickHousePreparedStatementImpl statement, int index, Object value) throws SQLException { + if (value instanceof Timestamp) { + statement.setTimestamp(index, (Timestamp) value); + } else { + statement.setTimestamp(index, Timestamp.valueOf(value.toString())); + } + } + + @Override + public boolean isCurrentFieldType(String fieldType) { + return "DateTime".equals(fieldType); + } +} diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/DoubleInjectFunction.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/DoubleInjectFunction.java new file mode 100644 index 00000000000..99e82971fde --- /dev/null +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/DoubleInjectFunction.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.inject; + +import ru.yandex.clickhouse.ClickHousePreparedStatementImpl; + +import java.math.BigDecimal; +import java.sql.SQLException; + +public class DoubleInjectFunction implements ClickhouseFieldInjectFunction { + @Override + public void injectFields(ClickHousePreparedStatementImpl statement, int index, Object value) throws SQLException { + if (value instanceof BigDecimal) { + statement.setDouble(index, ((BigDecimal) value).doubleValue()); + } else { + statement.setDouble(index, (Double) value); + } + } + + @Override + public boolean isCurrentFieldType(String fieldType) { + return "UInt32".equals(fieldType) + || "UInt64".equals(fieldType) + || "Int64".equals(fieldType) + || "Float64".equals(fieldType); + } +} diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/FloatInjectFunction.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/FloatInjectFunction.java new file mode 100644 index 00000000000..884620d9fae --- /dev/null +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/FloatInjectFunction.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.inject; + +import ru.yandex.clickhouse.ClickHousePreparedStatementImpl; + +import java.math.BigDecimal; +import java.sql.SQLException; + +public class FloatInjectFunction implements ClickhouseFieldInjectFunction { + @Override + public void injectFields(ClickHousePreparedStatementImpl statement, int index, Object value) throws SQLException { + if (value instanceof BigDecimal) { + statement.setFloat(index, ((BigDecimal) value).floatValue()); + } else { + statement.setFloat(index, (Float) value); + } + } + + @Override + public boolean isCurrentFieldType(String fieldType) { + return "Float32".equals(fieldType); + } +} diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/IntInjectFunction.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/IntInjectFunction.java new file mode 100644 index 00000000000..8a75ec554d7 --- /dev/null +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/IntInjectFunction.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.inject; + +import ru.yandex.clickhouse.ClickHousePreparedStatementImpl; + +import java.sql.SQLException; + +public class IntInjectFunction implements ClickhouseFieldInjectFunction { + @Override + public void injectFields(ClickHousePreparedStatementImpl statement, int index, Object value) throws SQLException { + statement.setInt(index, (int) value); + } + + @Override + public boolean isCurrentFieldType(String fieldType) { + return "Int8".equals(fieldType) + || "UInt8".equals(fieldType) + || "Int16".equals(fieldType) + || "UInt16".equals(fieldType) + || "Int32".equals(fieldType); + } + +} diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/LongInjectFunction.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/LongInjectFunction.java new file mode 100644 index 00000000000..116fb592eb5 --- /dev/null +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/LongInjectFunction.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.inject; + +import ru.yandex.clickhouse.ClickHousePreparedStatementImpl; + +import java.sql.SQLException; + +public class LongInjectFunction implements ClickhouseFieldInjectFunction { + + @Override + public void injectFields(ClickHousePreparedStatementImpl statement, int index, Object value) throws SQLException { + statement.setLong(index, (Long) value); + } + + @Override + public boolean isCurrentFieldType(String fieldType) { + return "UInt32".equals(fieldType) + || "UInt64".equals(fieldType) + || "Int64".equals(fieldType); + } +} diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/StringInjectFunction.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/StringInjectFunction.java new file mode 100644 index 00000000000..bddd679d7d9 --- /dev/null +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/StringInjectFunction.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.inject; + +import ru.yandex.clickhouse.ClickHousePreparedStatementImpl; + +import java.sql.SQLException; +import java.util.regex.Pattern; + +public class StringInjectFunction implements ClickhouseFieldInjectFunction { + + private static final Pattern LOW_CARDINALITY_PATTERN = Pattern.compile("LowCardinality\\((.*)\\)"); + + @Override + public void injectFields(ClickHousePreparedStatementImpl statement, int index, Object value) throws SQLException { + statement.setString(index, value.toString()); + } + + @Override + public boolean isCurrentFieldType(String fieldType) { + return "String".equals(fieldType) || LOW_CARDINALITY_PATTERN.matcher(fieldType).matches(); + } +} diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/tool/IntHolder.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/tool/IntHolder.java new file mode 100644 index 00000000000..02e7be5966d --- /dev/null +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/tool/IntHolder.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.clickhouse.tool; + +import java.io.Serializable; + +public class IntHolder implements Serializable { + + private static final long serialVersionUID = -1L; + + private int value; + + public int getValue() { + return value; + } + + public void setValue(int value) { + this.value = value; + } +} From 1860699795d5986316ca675ab2681575e6a8f5f7 Mon Sep 17 00:00:00 2001 From: fanjia <1095948736@qq.com> Date: Sat, 18 Jun 2022 20:40:09 +0800 Subject: [PATCH 04/21] add clickhouse file sink --- .../plugin-mapping.properties | 2 + .../clickhouse/sink/ClickhouseBatchSink.java | 3 - .../pom.xml | 5 + .../config/ClickhouseFileCopyMethod.java | 42 +++++++ .../sink/file/ClickhouseFileSink.java | 83 +++++++++++++ .../sink/file/ClickhouseFileSinkWriter.java | 53 ++++++++ .../clickhouse/sink/file/FileTransfer.java | 31 +++++ .../clickhouse/sink/file/ScpFileTransfer.java | 115 ++++++++++++++++++ 8 files changed, 331 insertions(+), 3 deletions(-) create mode 100644 seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/config/ClickhouseFileCopyMethod.java create mode 100644 seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSink.java create mode 100644 seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSinkWriter.java create mode 100644 seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/FileTransfer.java create mode 100644 seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ScpFileTransfer.java diff --git a/seatunnel-connectors/plugin-mapping.properties b/seatunnel-connectors/plugin-mapping.properties index e92ddbaa300..e8e7c4cd522 100644 --- a/seatunnel-connectors/plugin-mapping.properties +++ b/seatunnel-connectors/plugin-mapping.properties @@ -91,3 +91,5 @@ seatunnel.source.Kafka = seatunnel-connector-seatunnel-kafka seatunnel.sink.Kafka = seatunnel-connector-seatunnel-kafka seatunnel.source.Socket = seatunnel-connector-seatunnel-socket seatunnel.sink.Hive = seatunnel-connector-seatunnel-hive +seatunnel.sink.Clickhouse = seatunnel-connector-seatunnel-clickhouse +seatunnel.sink.ClickhouseFile = seatunnel-connector-seatunnel-clickhouse diff --git a/seatunnel-connectors/seatunnel-connectors-flink/seatunnel-connector-flink-clickhouse/src/main/java/org/apache/seatunnel/flink/clickhouse/sink/ClickhouseBatchSink.java b/seatunnel-connectors/seatunnel-connectors-flink/seatunnel-connector-flink-clickhouse/src/main/java/org/apache/seatunnel/flink/clickhouse/sink/ClickhouseBatchSink.java index 620e921e11f..36ab1a3d59f 100644 --- a/seatunnel-connectors/seatunnel-connectors-flink/seatunnel-connector-flink-clickhouse/src/main/java/org/apache/seatunnel/flink/clickhouse/sink/ClickhouseBatchSink.java +++ b/seatunnel-connectors/seatunnel-connectors-flink/seatunnel-connector-flink-clickhouse/src/main/java/org/apache/seatunnel/flink/clickhouse/sink/ClickhouseBatchSink.java @@ -46,8 +46,6 @@ import org.apache.flink.types.Row; import ru.yandex.clickhouse.ClickHouseConnection; -import javax.annotation.Nullable; - import java.sql.SQLException; import java.util.ArrayList; import java.util.HashMap; @@ -73,7 +71,6 @@ public Config getConfig() { return config; } - @Nullable @Override public void outputBatch(FlinkEnvironment env, DataSet dataSet) { ClickhouseOutputFormat clickhouseOutputFormat = new ClickhouseOutputFormat(config, shardMetadata, fields, tableSchema); diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/pom.xml b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/pom.xml index 3a1b9f9353c..2a8e42fe5a7 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/pom.xml +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/pom.xml @@ -36,6 +36,11 @@ ${project.version} + + org.apache.sshd + sshd-scp + + com.clickhouse diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/config/ClickhouseFileCopyMethod.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/config/ClickhouseFileCopyMethod.java new file mode 100644 index 00000000000..cec1f48bb73 --- /dev/null +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/config/ClickhouseFileCopyMethod.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.clickhouse.config; + +public enum ClickhouseFileCopyMethod { + SCP("scp"), + RSYNC("rsync"), + ; + private final String name; + + ClickhouseFileCopyMethod(String name) { + this.name = name; + } + + public String getName() { + return name; + } + + public static ClickhouseFileCopyMethod from(String name) { + for (ClickhouseFileCopyMethod clickhouseFileCopyMethod : ClickhouseFileCopyMethod.values()) { + if (clickhouseFileCopyMethod.getName().equalsIgnoreCase(name)) { + return clickhouseFileCopyMethod; + } + } + throw new IllegalArgumentException("Unknown ClickhouseFileCopyMethod: " + name); + } +} diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSink.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSink.java new file mode 100644 index 00000000000..6cdadfa254e --- /dev/null +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSink.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.file; + +import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.CLICKHOUSE_LOCAL_PATH; +import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.COPY_METHOD; +import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.DATABASE; +import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.HOST; +import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.PASSWORD; +import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.TABLE; +import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.USERNAME; + +import org.apache.seatunnel.api.common.PrepareFailException; +import org.apache.seatunnel.api.common.SeaTunnelContext; +import org.apache.seatunnel.api.sink.SeaTunnelSink; +import org.apache.seatunnel.api.sink.SinkWriter; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.common.config.CheckConfigUtil; +import org.apache.seatunnel.common.config.CheckResult; +import org.apache.seatunnel.common.constants.PluginType; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.config.ClickhouseFileCopyMethod; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.state.CKAggCommitInfo; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.state.CKCommitInfo; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.state.ClickhouseSinkState; + +import org.apache.seatunnel.shade.com.typesafe.config.Config; +import org.apache.seatunnel.shade.com.typesafe.config.ConfigFactory; + +import com.google.common.collect.ImmutableMap; + +import java.io.IOException; +import java.util.Map; + +public class ClickhouseFileSink implements SeaTunnelSink { + + @Override + public String getPluginName() { + return "ClickhouseFile"; + } + + @Override + public void prepare(Config config) throws PrepareFailException { + CheckResult checkResult = CheckConfigUtil.checkAllExists(config, HOST, TABLE, DATABASE, USERNAME, PASSWORD, CLICKHOUSE_LOCAL_PATH); + if (!checkResult.isSuccess()) { + throw new PrepareFailException(getPluginName(), PluginType.SINK, checkResult.getMsg()); + } + Map defaultConfigs = ImmutableMap.builder() + .put(COPY_METHOD, ClickhouseFileCopyMethod.SCP.getName()) + .build(); + + config = config.withFallback(ConfigFactory.parseMap(defaultConfigs)); + } + + @Override + public SinkWriter createWriter(SinkWriter.Context context) throws IOException { + return new ClickhouseFileSinkWriter(); + } + + @Override + public SeaTunnelContext getSeaTunnelContext() { + return null; + } + + @Override + public void setSeaTunnelContext(SeaTunnelContext seaTunnelContext) { + + } +} diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSinkWriter.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSinkWriter.java new file mode 100644 index 00000000000..cd1958c1882 --- /dev/null +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSinkWriter.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.file; + +import org.apache.seatunnel.api.sink.SinkWriter; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.state.CKCommitInfo; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.state.ClickhouseSinkState; + +import java.io.IOException; +import java.util.Optional; + +public class ClickhouseFileSinkWriter implements SinkWriter { + + ClickhouseFileSinkWriter() { + + } + + @Override + public void write(SeaTunnelRow element) throws IOException { + + } + + @Override + public Optional prepareCommit() throws IOException { + return Optional.empty(); + } + + @Override + public void abortPrepare() { + + } + + @Override + public void close() throws IOException { + + } +} diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/FileTransfer.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/FileTransfer.java new file mode 100644 index 00000000000..ca581f0ea0e --- /dev/null +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/FileTransfer.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.file; + +import java.util.List; + +public interface FileTransfer { + + void init(); + + void transferAndChown(String sourcePath, String targetPath); + + void transferAndChown(List sourcePath, String targetPath); + + void close(); +} diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ScpFileTransfer.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ScpFileTransfer.java new file mode 100644 index 00000000000..c45af141205 --- /dev/null +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ScpFileTransfer.java @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.file; + +import org.apache.sshd.client.SshClient; +import org.apache.sshd.client.session.ClientSession; +import org.apache.sshd.scp.client.ScpClient; +import org.apache.sshd.scp.client.ScpClientCreator; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +public class ScpFileTransfer implements FileTransfer { + + private static final int SCP_PORT = 22; + + private final String host; + private final String password; + + private ScpClient scpClient; + private ClientSession clientSession; + private SshClient sshClient; + + public ScpFileTransfer(String host, String password) { + this.host = host; + this.password = password; + } + + @Override + public void init() { + try { + sshClient = SshClient.setUpDefaultClient(); + sshClient.start(); + clientSession = sshClient.connect("root", host, SCP_PORT).verify().getSession(); + if (password != null) { + clientSession.addPasswordIdentity(password); + } + if (!clientSession.auth().verify().isSuccess()) { + throw new IOException("ssh host " + host + "authentication failed"); + } + scpClient = ScpClientCreator.instance().createScpClient(clientSession); + } catch (IOException e) { + throw new RuntimeException("Failed to connect to host: " + host + " by user: root on port 22", e); + } + } + + @Override + public void transferAndChown(String sourcePath, String targetPath) { + try { + scpClient.upload( + sourcePath, + targetPath, + ScpClient.Option.Recursive, + ScpClient.Option.TargetIsDirectory, + ScpClient.Option.PreserveAttributes); + } catch (IOException e) { + throw new RuntimeException("Scp failed to transfer file: " + sourcePath + " to: " + targetPath, e); + } + // remote exec command to change file owner. Only file owner equal with server's clickhouse user can + // make ATTACH command work. + List command = new ArrayList<>(); + command.add("ls"); + command.add("-l"); + command.add(targetPath.substring(0, targetPath.lastIndexOf("/"))); + command.add("/ | tail -n 1 | awk '{print $3}' | xargs -t -i chown -R {}:{} " + targetPath); + try { + clientSession.executeRemoteCommand(String.join(" ", command)); + } catch (IOException e) { + throw new RuntimeException("Failed to execute remote command: " + command, e); + } + } + + @Override + public void transferAndChown(List sourcePaths, String targetPath) { + if (sourcePaths == null) { + throw new IllegalArgumentException("sourcePath is null"); + } + sourcePaths.forEach(sourcePath -> transferAndChown(sourcePath, targetPath)); + } + + @Override + public void close() { + if (clientSession != null && clientSession.isOpen()) { + try { + clientSession.close(); + } catch (IOException e) { + throw new RuntimeException("Failed to close ssh session", e); + } + } + if (sshClient != null && sshClient.isOpen()) { + sshClient.stop(); + try { + sshClient.close(); + } catch (IOException e) { + throw new RuntimeException("Failed to close ssh client", e); + } + } + } +} From ba6be12dbc1e043c773edf7b29d16523d333cee6 Mon Sep 17 00:00:00 2001 From: Hisoka Date: Mon, 20 Jun 2022 18:12:32 +0800 Subject: [PATCH 05/21] add clickhouse file sink --- .../sink/client/ClickhouseProxy.java | 2 +- .../sink/file/ClickhouseFileSink.java | 50 ++++++++++++++++++- 2 files changed, 49 insertions(+), 3 deletions(-) diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseProxy.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseProxy.java index 73c3770184f..6760a802f67 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseProxy.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseProxy.java @@ -45,7 +45,7 @@ public class ClickhouseProxy { private final ClickHouseRequest clickhouseRequest; private final ClickHouseClient client; - private Map shardToDataSource = new ConcurrentHashMap<>(16); + private final Map shardToDataSource = new ConcurrentHashMap<>(16); public ClickhouseProxy(ClickHouseNode node) { this.client = ClickHouseClient.newInstance(node.getProtocol()); diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSink.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSink.java index 6cdadfa254e..0bfb55b4f7d 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSink.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSink.java @@ -20,8 +20,11 @@ import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.CLICKHOUSE_LOCAL_PATH; import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.COPY_METHOD; import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.DATABASE; +import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.FIELDS; import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.HOST; +import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.NODE_ADDRESS; import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.PASSWORD; +import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.SHARDING_KEY; import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.TABLE; import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.USERNAME; @@ -32,22 +35,38 @@ import org.apache.seatunnel.api.table.type.SeaTunnelRow; import org.apache.seatunnel.common.config.CheckConfigUtil; import org.apache.seatunnel.common.config.CheckResult; +import org.apache.seatunnel.common.config.TypesafeConfigUtils; import org.apache.seatunnel.common.constants.PluginType; import org.apache.seatunnel.connectors.seatunnel.clickhouse.config.ClickhouseFileCopyMethod; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.shard.Shard; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.shard.ShardMetadata; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.client.ClickhouseProxy; import org.apache.seatunnel.connectors.seatunnel.clickhouse.state.CKAggCommitInfo; import org.apache.seatunnel.connectors.seatunnel.clickhouse.state.CKCommitInfo; import org.apache.seatunnel.connectors.seatunnel.clickhouse.state.ClickhouseSinkState; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.util.ClickhouseUtil; import org.apache.seatunnel.shade.com.typesafe.config.Config; import org.apache.seatunnel.shade.com.typesafe.config.ConfigFactory; +import com.clickhouse.client.ClickHouseNode; +import com.clickhouse.client.ClickHouseRequest; import com.google.common.collect.ImmutableMap; import java.io.IOException; +import java.sql.SQLException; +import java.util.HashMap; +import java.util.List; import java.util.Map; public class ClickhouseFileSink implements SeaTunnelSink { + private SeaTunnelContext seaTunnelContext; + private Config config; + private ShardMetadata shardMetadata; + private Map tableSchema = new HashMap<>(); + private List fields; + @Override public String getPluginName() { return "ClickhouseFile"; @@ -64,6 +83,33 @@ public void prepare(Config config) throws PrepareFailException { .build(); config = config.withFallback(ConfigFactory.parseMap(defaultConfigs)); + List nodes = ClickhouseUtil.createNodes(config.getString(NODE_ADDRESS), + config.getString(DATABASE), config.getString(USERNAME), config.getString(PASSWORD)); + + ClickhouseProxy clickhouseClient = new ClickhouseProxy(nodes.get(0)); + ClickHouseRequest connection = clickhouseClient.getClickhouseConnection(); + tableSchema = clickhouseClient.getClickhouseTableSchema(connection, config.getString(TABLE)); + String shardKey = TypesafeConfigUtils.getConfig(this.config, SHARDING_KEY, ""); + String shardKeyType = tableSchema.get(shardKey); + shardMetadata = new ShardMetadata( + shardKey, + shardKeyType, + config.getString(DATABASE), + config.getString(TABLE), + false, // we don't need to set splitMode in clickhouse file mode. + new Shard(1, 1, nodes.get(0)), config.getString(USERNAME), config.getString(PASSWORD)); + + if (this.config.hasPath(FIELDS)) { + fields = this.config.getStringList(FIELDS); + // check if the fields exist in schema + for (String field : fields) { + if (!tableSchema.containsKey(field)) { + throw new RuntimeException("Field " + field + " does not exist in table " + config.getString(TABLE)); + } + } + } else { + fields.addAll(tableSchema.keySet()); + } } @Override @@ -73,11 +119,11 @@ public SinkWriter createWriter( @Override public SeaTunnelContext getSeaTunnelContext() { - return null; + return seaTunnelContext; } @Override public void setSeaTunnelContext(SeaTunnelContext seaTunnelContext) { - + this.seaTunnelContext = seaTunnelContext; } } From d4c937563a0eb2d7dce22ef43764ca60c2cf961a Mon Sep 17 00:00:00 2001 From: Hisoka Date: Wed, 22 Jun 2022 14:58:34 +0800 Subject: [PATCH 06/21] support clickhouse file sink --- pom.xml | 7 + .../pom.xml | 10 + .../clickhouse/config/FileReaderOption.java | 116 +++++++++++ .../sink/client/ClickhouseSink.java | 14 +- .../sink/file/ClickhouseFileSink.java | 59 ++++-- .../sink/file/ClickhouseFileSinkWriter.java | 194 +++++++++++++++++- 6 files changed, 374 insertions(+), 26 deletions(-) create mode 100644 seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/config/FileReaderOption.java diff --git a/pom.xml b/pom.xml index 504b8cc480c..1d4b472f602 100644 --- a/pom.xml +++ b/pom.xml @@ -140,6 +140,7 @@ 2.2.0 2.6.0 3.4 + 2.8.0 3.3.0 provided provided @@ -434,6 +435,12 @@ ${commons-lang3.version} + + commons-io + commons-io + ${commons-io.version} + + org.apache.flink flink-csv diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/pom.xml b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/pom.xml index 2a8e42fe5a7..6f3ebbb598f 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/pom.xml +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/pom.xml @@ -41,6 +41,16 @@ sshd-scp + + org.apache.commons + commons-lang3 + + + + commons-io + commons-io + + com.clickhouse diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/config/FileReaderOption.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/config/FileReaderOption.java new file mode 100644 index 00000000000..3a2f175699a --- /dev/null +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/config/FileReaderOption.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.clickhouse.config; + +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.shard.ShardMetadata; + +import java.io.Serializable; +import java.util.List; +import java.util.Map; + +public class FileReaderOption implements Serializable { + + private ShardMetadata shardMetadata; + private Map tableSchema; + private List fields; + private String clickhouseLocalPath; + private ClickhouseFileCopyMethod copyMethod; + private boolean nodeFreePass; + private Map nodePassword; + private SeaTunnelRowType seaTunnelRowType; + + public FileReaderOption(ShardMetadata shardMetadata, Map tableSchema, + List fields, String clickhouseLocalPath, + ClickhouseFileCopyMethod copyMethod, boolean nodeFreePass, + Map nodePassword, + SeaTunnelRowType seaTunnelRowType) { + this.shardMetadata = shardMetadata; + this.tableSchema = tableSchema; + this.fields = fields; + this.clickhouseLocalPath = clickhouseLocalPath; + this.copyMethod = copyMethod; + this.nodeFreePass = nodeFreePass; + this.nodePassword = nodePassword; + this.seaTunnelRowType = seaTunnelRowType; + } + + public SeaTunnelRowType getSeaTunnelRowType() { + return seaTunnelRowType; + } + + public void setSeaTunnelRowType(SeaTunnelRowType seaTunnelRowType) { + this.seaTunnelRowType = seaTunnelRowType; + } + + public boolean isNodeFreePass() { + return nodeFreePass; + } + + public void setNodeFreePass(boolean nodeFreePass) { + this.nodeFreePass = nodeFreePass; + } + + public String getClickhouseLocalPath() { + return clickhouseLocalPath; + } + + public void setClickhouseLocalPath(String clickhouseLocalPath) { + this.clickhouseLocalPath = clickhouseLocalPath; + } + + public ClickhouseFileCopyMethod getCopyMethod() { + return copyMethod; + } + + public void setCopyMethod(ClickhouseFileCopyMethod copyMethod) { + this.copyMethod = copyMethod; + } + + public Map getNodePassword() { + return nodePassword; + } + + public void setNodePassword(Map nodePassword) { + this.nodePassword = nodePassword; + } + + public ShardMetadata getShardMetadata() { + return shardMetadata; + } + + public void setShardMetadata(ShardMetadata shardMetadata) { + this.shardMetadata = shardMetadata; + } + + public Map getTableSchema() { + return tableSchema; + } + + public void setTableSchema(Map tableSchema) { + this.tableSchema = tableSchema; + } + + public List getFields() { + return fields; + } + + public void setFields(List fields) { + this.fields = fields; + } +} diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSink.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSink.java index 730488d0739..a3e8c89437d 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSink.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSink.java @@ -54,7 +54,6 @@ import org.apache.seatunnel.shade.com.typesafe.config.ConfigFactory; import com.clickhouse.client.ClickHouseNode; -import com.clickhouse.client.ClickHouseRequest; import com.google.auto.service.AutoService; import com.google.common.collect.ImmutableMap; @@ -106,11 +105,13 @@ public void prepare(Config config) throws PrepareFailException { clickhouseProperties.put("password", config.getString(PASSWORD)); ClickhouseProxy proxy = new ClickhouseProxy(nodes.get(0)); - ClickHouseRequest request = proxy.getClickhouseConnection(); - Map tableSchema = proxy.getClickhouseTableSchema(request, config.getString(TABLE)); - String shardKey = TypesafeConfigUtils.getConfig(config, SHARDING_KEY, ""); - String shardKeyType = tableSchema.get(shardKey); - + Map tableSchema = proxy.getClickhouseTableSchema(config.getString(TABLE)); + String shardKey = null; + String shardKeyType = null; + if (config.hasPath(SHARDING_KEY)) { + shardKey = config.getString(SHARDING_KEY); + shardKeyType = tableSchema.get(shardKey); + } ShardMetadata metadata = new ShardMetadata( shardKey, shardKeyType, @@ -130,6 +131,7 @@ public void prepare(Config config) throws PrepareFailException { } else { fields.addAll(tableSchema.keySet()); } + proxy.close(); this.option = new ReaderOption(metadata, seaTunnelRowType, clickhouseProperties, fields, config.getIntList(RETRY_CODES), tableSchema, config.getInt(RETRY), config.getInt(BULK_SIZE)); } diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSink.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSink.java index 0bfb55b4f7d..dca68d79ff9 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSink.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSink.java @@ -23,6 +23,8 @@ import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.FIELDS; import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.HOST; import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.NODE_ADDRESS; +import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.NODE_FREE_PASSWORD; +import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.NODE_PASS; import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.PASSWORD; import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.SHARDING_KEY; import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.TABLE; @@ -33,39 +35,39 @@ import org.apache.seatunnel.api.sink.SeaTunnelSink; import org.apache.seatunnel.api.sink.SinkWriter; import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; import org.apache.seatunnel.common.config.CheckConfigUtil; import org.apache.seatunnel.common.config.CheckResult; import org.apache.seatunnel.common.config.TypesafeConfigUtils; import org.apache.seatunnel.common.constants.PluginType; import org.apache.seatunnel.connectors.seatunnel.clickhouse.config.ClickhouseFileCopyMethod; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.config.FileReaderOption; import org.apache.seatunnel.connectors.seatunnel.clickhouse.shard.Shard; import org.apache.seatunnel.connectors.seatunnel.clickhouse.shard.ShardMetadata; import org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.client.ClickhouseProxy; import org.apache.seatunnel.connectors.seatunnel.clickhouse.state.CKAggCommitInfo; import org.apache.seatunnel.connectors.seatunnel.clickhouse.state.CKCommitInfo; import org.apache.seatunnel.connectors.seatunnel.clickhouse.state.ClickhouseSinkState; - import org.apache.seatunnel.connectors.seatunnel.clickhouse.util.ClickhouseUtil; + import org.apache.seatunnel.shade.com.typesafe.config.Config; import org.apache.seatunnel.shade.com.typesafe.config.ConfigFactory; import com.clickhouse.client.ClickHouseNode; -import com.clickhouse.client.ClickHouseRequest; import com.google.common.collect.ImmutableMap; import java.io.IOException; -import java.sql.SQLException; -import java.util.HashMap; +import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; public class ClickhouseFileSink implements SeaTunnelSink { private SeaTunnelContext seaTunnelContext; - private Config config; - private ShardMetadata shardMetadata; - private Map tableSchema = new HashMap<>(); - private List fields; + private SeaTunnelRowType seaTunnelRowType; + private FileReaderOption readerOption; @Override public String getPluginName() { @@ -86,21 +88,24 @@ public void prepare(Config config) throws PrepareFailException { List nodes = ClickhouseUtil.createNodes(config.getString(NODE_ADDRESS), config.getString(DATABASE), config.getString(USERNAME), config.getString(PASSWORD)); - ClickhouseProxy clickhouseClient = new ClickhouseProxy(nodes.get(0)); - ClickHouseRequest connection = clickhouseClient.getClickhouseConnection(); - tableSchema = clickhouseClient.getClickhouseTableSchema(connection, config.getString(TABLE)); - String shardKey = TypesafeConfigUtils.getConfig(this.config, SHARDING_KEY, ""); - String shardKeyType = tableSchema.get(shardKey); - shardMetadata = new ShardMetadata( + ClickhouseProxy proxy = new ClickhouseProxy(nodes.get(0)); + Map tableSchema = proxy.getClickhouseTableSchema(config.getString(TABLE)); + String shardKey = null; + String shardKeyType = null; + if (config.hasPath(SHARDING_KEY)) { + shardKey = config.getString(SHARDING_KEY); + shardKeyType = tableSchema.get(shardKey); + } + ShardMetadata shardMetadata = new ShardMetadata( shardKey, shardKeyType, config.getString(DATABASE), config.getString(TABLE), false, // we don't need to set splitMode in clickhouse file mode. new Shard(1, 1, nodes.get(0)), config.getString(USERNAME), config.getString(PASSWORD)); - - if (this.config.hasPath(FIELDS)) { - fields = this.config.getStringList(FIELDS); + List fields; + if (config.hasPath(FIELDS)) { + fields = config.getStringList(FIELDS); // check if the fields exist in schema for (String field : fields) { if (!tableSchema.containsKey(field)) { @@ -108,13 +113,29 @@ public void prepare(Config config) throws PrepareFailException { } } } else { - fields.addAll(tableSchema.keySet()); + fields = new ArrayList<>(tableSchema.keySet()); } + Map nodePassword = Collections.emptyMap(); + if (!TypesafeConfigUtils.getConfig(config, NODE_FREE_PASSWORD, true)) { + nodePassword = config.getObjectList(NODE_PASS).stream() + .collect(Collectors.toMap( + configObject -> configObject.toConfig().getString(NODE_ADDRESS), + configObject -> configObject.toConfig().getString(PASSWORD))); + } + proxy.close(); + this.readerOption = new FileReaderOption(shardMetadata, tableSchema, fields, config.getString(CLICKHOUSE_LOCAL_PATH), + ClickhouseFileCopyMethod.from(config.getString(COPY_METHOD)), + TypesafeConfigUtils.getConfig(config, NODE_FREE_PASSWORD, true), nodePassword, this.seaTunnelRowType); + } + + @Override + public void setTypeInfo(SeaTunnelRowType seaTunnelRowType) { + this.seaTunnelRowType = seaTunnelRowType; } @Override public SinkWriter createWriter(SinkWriter.Context context) throws IOException { - return new ClickhouseFileSinkWriter(); + return new ClickhouseFileSinkWriter(readerOption, context); } @Override diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSinkWriter.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSinkWriter.java index cd1958c1882..bf0575dfa3a 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSinkWriter.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSinkWriter.java @@ -19,21 +19,89 @@ import org.apache.seatunnel.api.sink.SinkWriter; import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.common.config.Common; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.config.ClickhouseFileCopyMethod; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.config.FileReaderOption; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.shard.Shard; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.client.ClickhouseProxy; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.client.ShardRouter; import org.apache.seatunnel.connectors.seatunnel.clickhouse.state.CKCommitInfo; import org.apache.seatunnel.connectors.seatunnel.clickhouse.state.ClickhouseSinkState; +import com.clickhouse.client.ClickHouseException; +import com.clickhouse.client.ClickHouseRequest; +import org.apache.commons.io.FileUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedReader; +import java.io.File; import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.charset.StandardCharsets; +import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import java.util.Optional; +import java.util.UUID; +import java.util.function.Function; +import java.util.stream.Collectors; public class ClickhouseFileSinkWriter implements SinkWriter { + private static final Logger LOGGER = LoggerFactory.getLogger(ClickhouseFileSinkWriter.class); + private static final String CLICKHOUSE_LOCAL_FILE_PREFIX = "/tmp/clickhouse-local/flink-file"; + private static final int UUID_LENGTH = 10; + private final FileReaderOption readerOption; + private final ShardRouter shardRouter; + private final ClickhouseProxy proxy; + private final ClickhouseTable clickhouseTable; + private final Map> shardLocalDataPaths; + private final Map> rowCache; + + public ClickhouseFileSinkWriter(FileReaderOption readerOption, Context context) { + this.readerOption = readerOption; + proxy = new ClickhouseProxy(this.readerOption.getShardMetadata().getDefaultShard().getNode()); + shardRouter = new ShardRouter(proxy, this.readerOption.getShardMetadata()); + clickhouseTable = proxy.getClickhouseTable(this.readerOption.getShardMetadata().getDatabase(), + this.readerOption.getShardMetadata().getTable()); + rowCache = new HashMap<>(Common.COLLECTION_SIZE); - ClickhouseFileSinkWriter() { + nodePasswordCheck(); + // find file local save path of each node + shardLocalDataPaths = shardRouter.getShards().values().stream() + .collect(Collectors.toMap( + Function.identity(), + shard -> { + ClickhouseTable shardTable = proxy.getClickhouseTable(shard.getNode().getDatabase().get(), + clickhouseTable.getLocalTableName()); + return shardTable.getDataPaths(); + })); } @Override public void write(SeaTunnelRow element) throws IOException { + Shard shard = shardRouter.getShard(element); + rowCache.computeIfAbsent(shard, k -> new ArrayList<>()).add(element); + } + private void nodePasswordCheck() { + if (!this.readerOption.isNodeFreePass()) { + shardRouter.getShards().values().forEach(shard -> { + if (!this.readerOption.getNodePassword().containsKey(shard.getNode().getAddress().getAddress().getHostAddress()) + && !this.readerOption.getNodePassword().containsKey(shard.getNode().getAddress().getHostName())) { + throw new RuntimeException("Cannot find password of shard " + shard.getNode().getAddress().getAddress().getHostAddress()); + } + }); + } } @Override @@ -48,6 +116,130 @@ public void abortPrepare() { @Override public void close() throws IOException { + for (Map.Entry> entry : rowCache.entrySet()) { + Shard shard = entry.getKey(); + List rows = entry.getValue(); + flush(shard, rows); + rows.clear(); + } + } + + private void flush(Shard shard, List rows) { + try { + // generate clickhouse local file + List clickhouseLocalFiles = generateClickhouseLocalFiles(shard, rows); + // move file to server + attachClickhouseLocalFileToServer(shard, clickhouseLocalFiles); + // clear local file + clearLocalFileDirectory(clickhouseLocalFiles); + } catch (Exception e) { + throw new RuntimeException("Flush data into clickhouse file error", e); + } + } + + private List generateClickhouseLocalFiles(Shard shard, List rows) throws IOException, + InterruptedException { + if (rows.isEmpty()) { + return Collections.emptyList(); + } + String uuid = UUID.randomUUID().toString().substring(0, UUID_LENGTH).replaceAll("-", "_"); + String clickhouseLocalFile = String.format("%s/%s", CLICKHOUSE_LOCAL_FILE_PREFIX, uuid); + FileUtils.forceMkdir(new File(clickhouseLocalFile)); + String clickhouseLocalFileTmpFile = clickhouseLocalFile + "/local_data.log"; + FileChannel fileChannel = FileChannel.open(Paths.get(clickhouseLocalFileTmpFile), StandardOpenOption.WRITE, + StandardOpenOption.READ, StandardOpenOption.CREATE_NEW); + String data = rows.stream() + .map(row -> this.readerOption.getFields().stream().map(field -> row.getField(this.readerOption.getSeaTunnelRowType().indexOf(field)).toString()) + .collect(Collectors.joining("\t"))) + .collect(Collectors.joining("\n")); + MappedByteBuffer buffer = fileChannel.map(FileChannel.MapMode.READ_WRITE, fileChannel.size(), + data.getBytes(StandardCharsets.UTF_8).length); + buffer.put(data.getBytes(StandardCharsets.UTF_8)); + List command = new ArrayList<>(); + command.add("cat"); + command.add(clickhouseLocalFileTmpFile); + command.add("|"); + + command.addAll(Arrays.stream(this.readerOption.getClickhouseLocalPath().trim().split(" ")).collect(Collectors.toList())); + command.add("local"); + command.add("-S"); + command.add("\"" + this.readerOption.getFields().stream().map(field -> field + " " + readerOption.getTableSchema().get(field)).collect(Collectors.joining(",")) + "\""); + command.add("-N"); + command.add("\"" + "temp_table" + uuid + "\""); + command.add("-q"); + command.add(String.format( + "\"%s; INSERT INTO TABLE %s SELECT %s FROM temp_table%s;\"", + clickhouseTable.getCreateTableDDL().replace(clickhouseTable.getDatabase() + ".", "").replaceAll("`", ""), + clickhouseTable.getLocalTableName(), + readerOption.getTableSchema().entrySet().stream().map(entry -> { + if (readerOption.getFields().contains(entry.getKey())) { + return entry.getKey(); + } else { + return "NULL"; + } + }).collect(Collectors.joining(",")), + uuid)); + command.add("--path"); + command.add("\"" + clickhouseLocalFile + "\""); + LOGGER.info("Generate clickhouse local file command: {}", String.join(" ", command)); + ProcessBuilder processBuilder = new ProcessBuilder("bash", "-c", String.join(" ", command)); + Process start = processBuilder.start(); + // we just wait for the process to finish + try (InputStream inputStream = start.getInputStream(); + InputStreamReader inputStreamReader = new InputStreamReader(inputStream); + BufferedReader bufferedReader = new BufferedReader(inputStreamReader)) { + String line; + while ((line = bufferedReader.readLine()) != null) { + LOGGER.info(line); + } + } + start.waitFor(); + File file = new File(clickhouseLocalFile + "/data/_local/" + clickhouseTable.getLocalTableName()); + if (!file.exists()) { + throw new RuntimeException("clickhouse local file not exists"); + } + File[] files = file.listFiles(); + if (files == null) { + throw new RuntimeException("clickhouse local file not exists"); + } + return Arrays.stream(files) + .filter(File::isDirectory) + .filter(f -> !"detached".equals(f.getName())) + .map(File::getAbsolutePath).collect(Collectors.toList()); + } + + private void attachClickhouseLocalFileToServer(Shard shard, List clickhouseLocalFiles) throws ClickHouseException { + if (ClickhouseFileCopyMethod.SCP.equals(this.readerOption.getCopyMethod())) { + String hostAddress = shard.getNode().getAddress().getAddress().getHostAddress(); + String password = readerOption.getNodePassword().getOrDefault(hostAddress, null); + FileTransfer fileTransfer = new ScpFileTransfer(hostAddress, password); + fileTransfer.init(); + fileTransfer.transferAndChown(clickhouseLocalFiles, shardLocalDataPaths.get(shard).get(0) + "detached/"); + fileTransfer.close(); + } else { + throw new RuntimeException("unsupported clickhouse file copy method " + readerOption.getCopyMethod()); + } + + ClickHouseRequest request = proxy.getClickhouseConnection(shard); + for (String clickhouseLocalFile : clickhouseLocalFiles) { + request.query(String.format("ALTER TABLE %s ATTACH PART '%s'", + clickhouseTable.getLocalTableName(), + clickhouseLocalFile.substring(clickhouseLocalFile.lastIndexOf("/") + 1))).executeAndWait(); + } } + + private void clearLocalFileDirectory(List clickhouseLocalFiles) { + String clickhouseLocalFile = clickhouseLocalFiles.get(0); + String localFileDir = clickhouseLocalFile.substring(0, CLICKHOUSE_LOCAL_FILE_PREFIX.length() + UUID_LENGTH + 1); + try { + File file = new File(localFileDir); + if (file.exists()) { + FileUtils.deleteDirectory(new File(localFileDir)); + } + } catch (IOException e) { + throw new RuntimeException("Unable to delete directory " + localFileDir, e); + } + } + } From 36a97732b68345149c5767fbbe3285cbd164ae56 Mon Sep 17 00:00:00 2001 From: Hisoka Date: Thu, 23 Jun 2022 10:21:48 +0800 Subject: [PATCH 07/21] fix clickhouse client sink bug and fix SinkWriter should be serialized bug. --- .../apache/seatunnel/api/sink/SinkWriter.java | 2 +- .../sink/ClickhouseOutputFormat.java | 2 +- .../seatunnel/clickhouse/config/Config.java | 10 ---- .../clickhouse/config/FileReaderOption.java | 4 +- .../clickhouse/config/ReaderOption.java | 27 +-------- .../seatunnel/clickhouse/shard/Shard.java | 2 +- .../sink/client/ClickhouseProxy.java | 58 ++++++++++++++----- .../sink/client/ClickhouseSink.java | 25 ++++---- .../sink/client/ClickhouseSinkWriter.java | 39 +++++++------ .../clickhouse/sink/client/ShardRouter.java | 4 +- .../sink/file/ClickhouseFileSink.java | 7 ++- .../sink/file/ClickhouseFileSinkWriter.java | 10 ++-- .../inject/ClickhouseFieldInjectFunction.java | 3 +- .../sink/inject/StringInjectFunction.java | 5 +- .../sink/AbstractSparkWriterConverter.java | 9 ++- .../spark/sink/SparkDataSourceWriter.java | 22 +++---- .../sink/SparkDataSourceWriterConverter.java | 8 +-- .../spark/sink/SparkDataWriter.java | 21 ++++--- .../spark/sink/SparkDataWriterFactory.java | 19 +++--- .../translation/spark/sink/SparkSink.java | 16 ++--- .../spark/sink/SparkStreamWriter.java | 8 +-- .../sink/SparkStreamWriterConverter.java | 8 +-- 22 files changed, 157 insertions(+), 152 deletions(-) diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SinkWriter.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SinkWriter.java index 56f97bac931..268d3d40e6a 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SinkWriter.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SinkWriter.java @@ -74,7 +74,7 @@ default List snapshotState(long checkpointId) throws IOException { */ void close() throws IOException; - interface Context { + interface Context extends Serializable{ /** * Gets the configuration with which Job was started. diff --git a/seatunnel-connectors/seatunnel-connectors-flink/seatunnel-connector-flink-clickhouse/src/main/java/org/apache/seatunnel/flink/clickhouse/sink/ClickhouseOutputFormat.java b/seatunnel-connectors/seatunnel-connectors-flink/seatunnel-connector-flink-clickhouse/src/main/java/org/apache/seatunnel/flink/clickhouse/sink/ClickhouseOutputFormat.java index cb392b9a0b2..efd4eb7d504 100644 --- a/seatunnel-connectors/seatunnel-connectors-flink/seatunnel-connector-flink-clickhouse/src/main/java/org/apache/seatunnel/flink/clickhouse/sink/ClickhouseOutputFormat.java +++ b/seatunnel-connectors/seatunnel-connectors-flink/seatunnel-connector-flink-clickhouse/src/main/java/org/apache/seatunnel/flink/clickhouse/sink/ClickhouseOutputFormat.java @@ -224,7 +224,7 @@ private Map initFieldInjectFunctionMap() break; } } - result.put(field, function); + result.put(fieldType, function); } return result; } diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/config/Config.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/config/Config.java index f20f47071ba..4b219c100e5 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/config/Config.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/config/Config.java @@ -24,11 +24,6 @@ public class Config { */ public static final String BULK_SIZE = "bulk_size"; - /** - * Clickhouse jdbc retry time - */ - public static final String RETRY = "retry"; - /** * Clickhouse fields */ @@ -71,11 +66,6 @@ public class Config { */ public static final String SHARDING_KEY = "sharding_key"; - /** - * The retry code when use clickhouse jdbc - */ - public static final String RETRY_CODES = "retry_codes"; - /** * ClickhouseFile sink connector used clickhouse-local program's path */ diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/config/FileReaderOption.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/config/FileReaderOption.java index 3a2f175699a..16e58e44617 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/config/FileReaderOption.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/config/FileReaderOption.java @@ -38,8 +38,7 @@ public class FileReaderOption implements Serializable { public FileReaderOption(ShardMetadata shardMetadata, Map tableSchema, List fields, String clickhouseLocalPath, ClickhouseFileCopyMethod copyMethod, boolean nodeFreePass, - Map nodePassword, - SeaTunnelRowType seaTunnelRowType) { + Map nodePassword) { this.shardMetadata = shardMetadata; this.tableSchema = tableSchema; this.fields = fields; @@ -47,7 +46,6 @@ public FileReaderOption(ShardMetadata shardMetadata, Map tableSc this.copyMethod = copyMethod; this.nodeFreePass = nodeFreePass; this.nodePassword = nodePassword; - this.seaTunnelRowType = seaTunnelRowType; } public SeaTunnelRowType getSeaTunnelRowType() { diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/config/ReaderOption.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/config/ReaderOption.java index a7f453dffc1..084f54bcc98 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/config/ReaderOption.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/config/ReaderOption.java @@ -28,26 +28,19 @@ public class ReaderOption implements Serializable { private ShardMetadata shardMetadata; - private List retryCodes; - private List fields; private Map tableSchema; private SeaTunnelRowType seaTunnelRowType; private Properties properties; - private int retry; private int bulkSize; - public ReaderOption(ShardMetadata shardMetadata, SeaTunnelRowType seaTunnelRowType, - Properties properties, List fields, - List retryCodes, Map tableSchema, int retry, int bulkSize) { + public ReaderOption(ShardMetadata shardMetadata, + Properties properties, List fields, Map tableSchema, int bulkSize) { this.shardMetadata = shardMetadata; this.properties = properties; - this.seaTunnelRowType = seaTunnelRowType; this.fields = fields; - this.retryCodes = retryCodes; this.tableSchema = tableSchema; - this.retry = retry; this.bulkSize = bulkSize; } @@ -91,22 +84,6 @@ public void setFields(List fields) { this.fields = fields; } - public List getRetryCodes() { - return retryCodes; - } - - public void setRetryCodes(List retryCodes) { - this.retryCodes = retryCodes; - } - - public int getRetry() { - return retry; - } - - public void setRetry(int retry) { - this.retry = retry; - } - public int getBulkSize() { return bulkSize; } diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/shard/Shard.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/shard/Shard.java index d99c5968d57..2fe72632540 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/shard/Shard.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/shard/Shard.java @@ -69,7 +69,7 @@ public ClickHouseNode getNode() { } public String getJdbcUrl() { - return "jdbc:clickhouse://" + node.getAddress().getAddress().getHostAddress() + return "jdbc:clickhouse://" + node.getAddress().getHostName() + ":" + node.getAddress().getPort() + "/" + node.getDatabase().get(); } diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseProxy.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseProxy.java index 6760a802f67..ac8b5cf9aed 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseProxy.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseProxy.java @@ -17,7 +17,6 @@ package org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.client; -import org.apache.seatunnel.common.utils.JsonUtils; import org.apache.seatunnel.connectors.seatunnel.clickhouse.shard.Shard; import org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.DistributedEngine; import org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.file.ClickhouseTable; @@ -29,7 +28,6 @@ import com.clickhouse.client.ClickHouseRecord; import com.clickhouse.client.ClickHouseRequest; import com.clickhouse.client.ClickHouseResponse; -import com.fasterxml.jackson.core.type.TypeReference; import java.util.ArrayList; import java.util.Arrays; @@ -73,10 +71,11 @@ public DistributedEngine getClickhouseDistributedTable(ClickHouseRequest conn String table) { String sql = String.format("select engine_full from system.tables where database = '%s' and name = '%s' and engine = 'Distributed'", database, table); try (ClickHouseResponse response = connection.query(sql).executeAndWait()) { - ClickHouseRecord record = response.firstRecord(); - if (record != null) { + List records = response.stream().collect(Collectors.toList()); + if (!records.isEmpty()) { + ClickHouseRecord record = records.get(0); // engineFull field will be like : Distributed(cluster, database, table[, sharding_key[, policy_name]]) - String engineFull = record.getValue(1).asString(); + String engineFull = record.getValue(0).asString(); List infos = Arrays.stream(engineFull.substring(12).split(",")) .map(s -> s.replace("'", "").trim()).collect(Collectors.toList()); return new DistributedEngine(infos.get(0), infos.get(1), infos.get(2).replace("\\)", "").trim()); @@ -102,7 +101,7 @@ public Map getClickhouseTableSchema(ClickHouseRequest request String sql = "desc " + table; Map schema = new LinkedHashMap<>(); try (ClickHouseResponse response = request.query(sql).executeAndWait()) { - response.records().forEach(r -> schema.put(r.getValue(1).asString(), r.getValue(2).asString())); + response.records().forEach(r -> schema.put(r.getValue(0).asString(), r.getValue(1).asString())); } catch (ClickHouseException e) { throw new RuntimeException("Cannot get table schema from clickhouse", e); } @@ -125,11 +124,11 @@ public List getClusterShardList(ClickHouseRequest connection, String c try (ClickHouseResponse response = connection.query(sql).executeAndWait()) { response.records().forEach(r -> { shardList.add(new Shard( + r.getValue(0).asInteger(), r.getValue(1).asInteger(), r.getValue(2).asInteger(), - r.getValue(3).asInteger(), + r.getValue(3).asString(), r.getValue(4).asString(), - r.getValue(5).asString(), port, database, username, password)); }); return shardList; @@ -148,19 +147,29 @@ public List getClusterShardList(ClickHouseRequest connection, String c public ClickhouseTable getClickhouseTable(String database, String table) { String sql = String.format("select engine,create_table_query,engine_full,data_paths from system.tables where database = '%s' and name = '%s'", database, table); try (ClickHouseResponse response = clickhouseRequest.query(sql).executeAndWait()) { - if (!response.stream().findAny().isPresent()) { + List records = response.stream().collect(Collectors.toList()); + if (records.isEmpty()) { throw new RuntimeException("Cannot get table from clickhouse, resultSet is empty"); } - ClickHouseRecord record = response.firstRecord(); - String engine = record.getValue(1).asString(); - String createTableDDL = record.getValue(2).asString(); - String engineFull = record.getValue(3).asString(); - List dataPaths = JsonUtils.parseObject(record.getValue(4).asString().replaceAll("'", "\""), - new TypeReference>() { - }); + ClickHouseRecord record = records.get(0); + String engine = record.getValue(0).asString(); + String createTableDDL = record.getValue(1).asString(); + String engineFull = record.getValue(2).asString(); + List dataPaths = record.getValue(3).asTuple().stream().map(Object::toString).collect(Collectors.toList()); DistributedEngine distributedEngine = null; if ("Distributed".equals(engine)) { distributedEngine = getClickhouseDistributedTable(clickhouseRequest, database, table); + String localTableSQL = String.format("select engine,create_table_query from system.tables where database = '%s' and name = '%s'", + distributedEngine.getDatabase(), distributedEngine.getTable()); + try (ClickHouseResponse rs = clickhouseRequest.query(localTableSQL).executeAndWait()) { + List localTableRecords = rs.stream().collect(Collectors.toList()); + if (localTableRecords.isEmpty()) { + throw new RuntimeException("Cannot get table from clickhouse, resultSet is empty"); + } + String localEngine = localTableRecords.get(0).getValue(0).asString(); + String createLocalTableDDL = localTableRecords.get(0).getValue(1).asString(); + createTableDDL = localizationEngine(localEngine, createLocalTableDDL); + } } return new ClickhouseTable( database, @@ -177,6 +186,23 @@ public ClickhouseTable getClickhouseTable(String database, String table) { } + /** + * Localization the engine in clickhouse local table's createTableDDL to support specific engine. + * For example: change ReplicatedMergeTree to MergeTree. + * + * @param engine original engine of clickhouse local table + * @param ddl createTableDDL of clickhouse local table + * @return createTableDDL of clickhouse local table which can support specific engine + * TODO: support more engine + */ + public String localizationEngine(String engine, String ddl) { + if ("ReplicatedMergeTree".equalsIgnoreCase(engine)) { + return ddl.replaceAll("ReplicatedMergeTree(\\([^\\)]*\\))", "MergeTree()"); + } else { + return ddl; + } + } + public void close() { if (this.client != null) { this.client.close(); diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSink.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSink.java index a3e8c89437d..30c0e55c4a7 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSink.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSink.java @@ -23,8 +23,6 @@ import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.FIELDS; import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.NODE_ADDRESS; import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.PASSWORD; -import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.RETRY; -import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.RETRY_CODES; import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.SHARDING_KEY; import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.SPLIT_MODE; import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.TABLE; @@ -45,6 +43,7 @@ import org.apache.seatunnel.connectors.seatunnel.clickhouse.config.ReaderOption; import org.apache.seatunnel.connectors.seatunnel.clickhouse.shard.Shard; import org.apache.seatunnel.connectors.seatunnel.clickhouse.shard.ShardMetadata; +import org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.file.ClickhouseTable; import org.apache.seatunnel.connectors.seatunnel.clickhouse.state.CKAggCommitInfo; import org.apache.seatunnel.connectors.seatunnel.clickhouse.state.CKCommitInfo; import org.apache.seatunnel.connectors.seatunnel.clickhouse.state.ClickhouseSinkState; @@ -68,7 +67,6 @@ public class ClickhouseSink implements SeaTunnelSink { private SeaTunnelContext seaTunnelContext; - private SeaTunnelRowType seaTunnelRowType; private ReaderOption option; @Override @@ -85,8 +83,6 @@ public void prepare(Config config) throws PrepareFailException { } Map defaultConfig = ImmutableMap.builder() .put(BULK_SIZE, 20_000) - .put(RETRY_CODES, new ArrayList<>()) - .put(RETRY, 1) .put(SPLIT_MODE, false) .build(); @@ -108,9 +104,17 @@ public void prepare(Config config) throws PrepareFailException { Map tableSchema = proxy.getClickhouseTableSchema(config.getString(TABLE)); String shardKey = null; String shardKeyType = null; - if (config.hasPath(SHARDING_KEY)) { - shardKey = config.getString(SHARDING_KEY); - shardKeyType = tableSchema.get(shardKey); + if (config.getBoolean(SPLIT_MODE)) { + ClickhouseTable table = proxy.getClickhouseTable(config.getString(DATABASE), + config.getString(TABLE)); + if (!"Distributed".equals(table.getEngine())) { + throw new IllegalArgumentException("split mode only support table which engine is " + + "'Distributed' engine at now"); + } + if (config.hasPath(SHARDING_KEY)) { + shardKey = config.getString(SHARDING_KEY); + shardKeyType = tableSchema.get(shardKey); + } } ShardMetadata metadata = new ShardMetadata( shardKey, @@ -132,8 +136,7 @@ public void prepare(Config config) throws PrepareFailException { fields.addAll(tableSchema.keySet()); } proxy.close(); - this.option = new ReaderOption(metadata, seaTunnelRowType, clickhouseProperties, fields, - config.getIntList(RETRY_CODES), tableSchema, config.getInt(RETRY), config.getInt(BULK_SIZE)); + this.option = new ReaderOption(metadata, clickhouseProperties, fields, tableSchema, config.getInt(BULK_SIZE)); } @Override @@ -153,7 +156,7 @@ public Optional> getWriterStateSerializer() { @Override public void setTypeInfo(SeaTunnelRowType seaTunnelRowType) { - this.seaTunnelRowType = seaTunnelRowType; + this.option.setSeaTunnelRowType(seaTunnelRowType); } @Override diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSinkWriter.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSinkWriter.java index dda07255d90..74e2b84e999 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSinkWriter.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSinkWriter.java @@ -20,7 +20,6 @@ import org.apache.seatunnel.api.sink.SinkWriter; import org.apache.seatunnel.api.table.type.SeaTunnelRow; import org.apache.seatunnel.common.config.Common; -import org.apache.seatunnel.common.utils.RetryUtils; import org.apache.seatunnel.connectors.seatunnel.clickhouse.config.ReaderOption; import org.apache.seatunnel.connectors.seatunnel.clickhouse.shard.Shard; import org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.inject.ArrayInjectFunction; @@ -53,6 +52,8 @@ import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.regex.Matcher; +import java.util.regex.Pattern; public class ClickhouseSinkWriter implements SinkWriter { @@ -60,26 +61,20 @@ public class ClickhouseSinkWriter implements SinkWriter statementMap; private final Map fieldInjectFunctionMap; private static final ClickhouseFieldInjectFunction DEFAULT_INJECT_FUNCTION = new StringInjectFunction(); + private static final Pattern NULLABLE = Pattern.compile("Nullable\\((.*)\\)"); + private static final Pattern LOW_CARDINALITY = Pattern.compile("LowCardinality\\((.*)\\)"); + ClickhouseSinkWriter(ReaderOption option, SinkWriter.Context context) { this.option = option; this.context = context; - retryMaterial = new RetryUtils.RetryMaterial(option.getRetry(), true, exception -> { - if (exception instanceof SQLException) { - SQLException sqlException = (SQLException) exception; - return option.getRetryCodes().contains(sqlException.getErrorCode()); - } - return false; - }); - this.proxy = new ClickhouseProxy(option.getShardMetadata().getDefaultShard().getNode()); this.fieldInjectFunctionMap = initFieldInjectFunctionMap(); this.shardRouter = new ShardRouter(proxy, option.getShardMetadata()); @@ -158,12 +153,8 @@ private void addIntoBatch(SeaTunnelRow row, ClickHousePreparedStatementImpl clic } private void flush(ClickHouseStatement clickHouseStatement) { - RetryUtils.Execution execution = () -> { - clickHouseStatement.executeBatch(); - return null; - }; try { - RetryUtils.retryWithException(execution, retryMaterial); + clickHouseStatement.executeBatch(); } catch (Exception e) { throw new RuntimeException("Clickhouse execute batch statement error", e); } @@ -218,14 +209,26 @@ private Map initFieldInjectFunctionMap() ClickhouseFieldInjectFunction function = defaultFunction; String fieldType = this.option.getTableSchema().get(field); for (ClickhouseFieldInjectFunction clickhouseFieldInjectFunction : clickhouseFieldInjectFunctions) { - if (clickhouseFieldInjectFunction.isCurrentFieldType(fieldType)) { + if (clickhouseFieldInjectFunction.isCurrentFieldType(unwrapCommonPrefix(fieldType))) { function = clickhouseFieldInjectFunction; break; } } - result.put(field, function); + result.put(fieldType, function); } return result; } + private String unwrapCommonPrefix(String fieldType) { + Matcher nullMatcher = NULLABLE.matcher(fieldType); + Matcher lowMatcher = LOW_CARDINALITY.matcher(fieldType); + if (nullMatcher.matches()) { + return nullMatcher.group(1); + } else if (lowMatcher.matches()) { + return lowMatcher.group(1); + } else { + return fieldType; + } + } + } diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ShardRouter.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ShardRouter.java index ee4681d1774..4471f8157d6 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ShardRouter.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ShardRouter.java @@ -45,7 +45,7 @@ public class ShardRouter implements Serializable { private final String shardKeyType; private final boolean splitMode; - private final XXHash64 hashInstance = XXHashFactory.fastestInstance().hash64(); + private static final XXHash64 HASH_INSTANCE = XXHashFactory.fastestInstance().hash64(); private final ThreadLocalRandom threadLocalRandom = ThreadLocalRandom.current(); public ShardRouter(ClickhouseProxy proxy, ShardMetadata shardMetadata) { @@ -86,7 +86,7 @@ public Shard getShard(Object shardValue) { if (StringUtils.isEmpty(shardKey) || shardValue == null) { return shards.lowerEntry(threadLocalRandom.nextInt(shardWeightCount + 1)).getValue(); } - int offset = (int) (hashInstance.hash(ByteBuffer.wrap(shardValue.toString().getBytes(StandardCharsets.UTF_8)), + int offset = (int) (HASH_INSTANCE.hash(ByteBuffer.wrap(shardValue.toString().getBytes(StandardCharsets.UTF_8)), 0) & Long.MAX_VALUE % shardWeightCount); return shards.lowerEntry(offset + 1).getValue(); } diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSink.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSink.java index dca68d79ff9..0b37c7ae162 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSink.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSink.java @@ -54,6 +54,7 @@ import org.apache.seatunnel.shade.com.typesafe.config.ConfigFactory; import com.clickhouse.client.ClickHouseNode; +import com.google.auto.service.AutoService; import com.google.common.collect.ImmutableMap; import java.io.IOException; @@ -63,10 +64,10 @@ import java.util.Map; import java.util.stream.Collectors; +@AutoService(SeaTunnelSink.class) public class ClickhouseFileSink implements SeaTunnelSink { private SeaTunnelContext seaTunnelContext; - private SeaTunnelRowType seaTunnelRowType; private FileReaderOption readerOption; @Override @@ -125,12 +126,12 @@ public void prepare(Config config) throws PrepareFailException { proxy.close(); this.readerOption = new FileReaderOption(shardMetadata, tableSchema, fields, config.getString(CLICKHOUSE_LOCAL_PATH), ClickhouseFileCopyMethod.from(config.getString(COPY_METHOD)), - TypesafeConfigUtils.getConfig(config, NODE_FREE_PASSWORD, true), nodePassword, this.seaTunnelRowType); + TypesafeConfigUtils.getConfig(config, NODE_FREE_PASSWORD, true), nodePassword); } @Override public void setTypeInfo(SeaTunnelRowType seaTunnelRowType) { - this.seaTunnelRowType = seaTunnelRowType; + this.readerOption.setSeaTunnelRowType(seaTunnelRowType); } @Override diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSinkWriter.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSinkWriter.java index bf0575dfa3a..e89e9a27c18 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSinkWriter.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSinkWriter.java @@ -96,9 +96,9 @@ public void write(SeaTunnelRow element) throws IOException { private void nodePasswordCheck() { if (!this.readerOption.isNodeFreePass()) { shardRouter.getShards().values().forEach(shard -> { - if (!this.readerOption.getNodePassword().containsKey(shard.getNode().getAddress().getAddress().getHostAddress()) - && !this.readerOption.getNodePassword().containsKey(shard.getNode().getAddress().getHostName())) { - throw new RuntimeException("Cannot find password of shard " + shard.getNode().getAddress().getAddress().getHostAddress()); + if (!this.readerOption.getNodePassword().containsKey(shard.getNode().getAddress().getHostName()) + && !this.readerOption.getNodePassword().containsKey(shard.getNode().getHost())) { + throw new RuntimeException("Cannot find password of shard " + shard.getNode().getAddress().getHostName()); } }); } @@ -127,7 +127,7 @@ public void close() throws IOException { private void flush(Shard shard, List rows) { try { // generate clickhouse local file - List clickhouseLocalFiles = generateClickhouseLocalFiles(shard, rows); + List clickhouseLocalFiles = generateClickhouseLocalFiles(rows); // move file to server attachClickhouseLocalFileToServer(shard, clickhouseLocalFiles); // clear local file @@ -137,7 +137,7 @@ private void flush(Shard shard, List rows) { } } - private List generateClickhouseLocalFiles(Shard shard, List rows) throws IOException, + private List generateClickhouseLocalFiles(List rows) throws IOException, InterruptedException { if (rows.isEmpty()) { return Collections.emptyList(); diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/ClickhouseFieldInjectFunction.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/ClickhouseFieldInjectFunction.java index ae4625cd22f..1ee8c76afa8 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/ClickhouseFieldInjectFunction.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/ClickhouseFieldInjectFunction.java @@ -19,12 +19,13 @@ import ru.yandex.clickhouse.ClickHousePreparedStatementImpl; +import java.io.Serializable; import java.sql.SQLException; /** * Injects a field into a ClickHouse statement, used to transform a java type into a ClickHouse type. */ -public interface ClickhouseFieldInjectFunction { +public interface ClickhouseFieldInjectFunction extends Serializable { /** * Inject the value into the statement. diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/StringInjectFunction.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/StringInjectFunction.java index bddd679d7d9..0e9cdf3b8a0 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/StringInjectFunction.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/StringInjectFunction.java @@ -20,12 +20,9 @@ import ru.yandex.clickhouse.ClickHousePreparedStatementImpl; import java.sql.SQLException; -import java.util.regex.Pattern; public class StringInjectFunction implements ClickhouseFieldInjectFunction { - private static final Pattern LOW_CARDINALITY_PATTERN = Pattern.compile("LowCardinality\\((.*)\\)"); - @Override public void injectFields(ClickHousePreparedStatementImpl statement, int index, Object value) throws SQLException { statement.setString(index, value.toString()); @@ -33,6 +30,6 @@ public void injectFields(ClickHousePreparedStatementImpl statement, int index, O @Override public boolean isCurrentFieldType(String fieldType) { - return "String".equals(fieldType) || LOW_CARDINALITY_PATTERN.matcher(fieldType).matches(); + return "String".equals(fieldType); } } diff --git a/seatunnel-translation/seatunnel-translation-spark/src/main/java/org/apache/seatunnel/translation/spark/sink/AbstractSparkWriterConverter.java b/seatunnel-translation/seatunnel-translation-spark/src/main/java/org/apache/seatunnel/translation/spark/sink/AbstractSparkWriterConverter.java index ed50d2bf2be..97a3d15ac4e 100644 --- a/seatunnel-translation/seatunnel-translation-spark/src/main/java/org/apache/seatunnel/translation/spark/sink/AbstractSparkWriterConverter.java +++ b/seatunnel-translation/seatunnel-translation-spark/src/main/java/org/apache/seatunnel/translation/spark/sink/AbstractSparkWriterConverter.java @@ -19,6 +19,7 @@ import org.apache.seatunnel.api.sink.SinkAggregatedCommitter; import org.apache.seatunnel.api.sink.SinkCommitter; +import org.apache.seatunnel.api.sink.SinkWriter; import org.apache.spark.sql.types.StructType; @@ -26,15 +27,19 @@ public abstract class AbstractSparkWriterConverter { + protected final SinkWriter.Context context; protected final SinkCommitter sinkCommitter; protected final SinkAggregatedCommitter sinkAggregatedCommitter; protected final StructType schema; + protected final String sinkString; - AbstractSparkWriterConverter(@Nullable SinkCommitter sinkCommitter, + AbstractSparkWriterConverter(SinkWriter.Context context, SinkCommitter sinkCommitter, @Nullable SinkAggregatedCommitter sinkAggregatedCommitter, - StructType schema) { + StructType schema, String sinkString) { + this.context = context; this.sinkCommitter = sinkCommitter; this.sinkAggregatedCommitter = sinkAggregatedCommitter; this.schema = schema; + this.sinkString = sinkString; } } diff --git a/seatunnel-translation/seatunnel-translation-spark/src/main/java/org/apache/seatunnel/translation/spark/sink/SparkDataSourceWriter.java b/seatunnel-translation/seatunnel-translation-spark/src/main/java/org/apache/seatunnel/translation/spark/sink/SparkDataSourceWriter.java index 59360bdf573..5aa7bf62420 100644 --- a/seatunnel-translation/seatunnel-translation-spark/src/main/java/org/apache/seatunnel/translation/spark/sink/SparkDataSourceWriter.java +++ b/seatunnel-translation/seatunnel-translation-spark/src/main/java/org/apache/seatunnel/translation/spark/sink/SparkDataSourceWriter.java @@ -20,7 +20,6 @@ import org.apache.seatunnel.api.sink.SinkAggregatedCommitter; import org.apache.seatunnel.api.sink.SinkCommitter; import org.apache.seatunnel.api.sink.SinkWriter; -import org.apache.seatunnel.api.table.type.SeaTunnelRow; import org.apache.spark.sql.catalyst.InternalRow; import org.apache.spark.sql.sources.v2.writer.DataSourceWriter; @@ -40,26 +39,27 @@ public class SparkDataSourceWriter implements DataSourceWriter { - private final SinkWriter sinkWriter; + private final SinkWriter.Context context; @Nullable private final SinkCommitter sinkCommitter; @Nullable private final SinkAggregatedCommitter sinkAggregatedCommitter; private final StructType schema; + private final String sinkString; - SparkDataSourceWriter(SinkWriter sinkWriter, - @Nullable SinkCommitter sinkCommitter, + SparkDataSourceWriter(SinkWriter.Context context, @Nullable SinkCommitter sinkCommitter, @Nullable SinkAggregatedCommitter sinkAggregatedCommitter, - StructType schema) { - this.sinkWriter = sinkWriter; + StructType schema, String sinkString) { + this.context = context; this.sinkCommitter = sinkCommitter; this.sinkAggregatedCommitter = sinkAggregatedCommitter; + this.sinkString = sinkString; this.schema = schema; } @Override public DataWriterFactory createWriterFactory() { - return new SparkDataWriterFactory<>(sinkWriter, sinkCommitter, schema); + return new SparkDataWriterFactory(context, schema, sinkString); } @Override @@ -93,10 +93,10 @@ public void abort(WriterCommitMessage[] messages) { } private @Nonnull List extractCommitInfo(WriterCommitMessage[] messages) { - return Arrays.stream(messages) - .map(m -> ((SparkWriterCommitMessage) m).getMessage()) - .filter(Objects::nonNull) - .collect(Collectors.toList()); + return Arrays.stream(messages).filter(Objects::nonNull) + .map(m -> ((SparkWriterCommitMessage) m).getMessage()) + .filter(Objects::nonNull) + .collect(Collectors.toList()); } private @Nonnull List combineCommitMessage(List commitInfos) { diff --git a/seatunnel-translation/seatunnel-translation-spark/src/main/java/org/apache/seatunnel/translation/spark/sink/SparkDataSourceWriterConverter.java b/seatunnel-translation/seatunnel-translation-spark/src/main/java/org/apache/seatunnel/translation/spark/sink/SparkDataSourceWriterConverter.java index 83e09b58011..802de53b803 100644 --- a/seatunnel-translation/seatunnel-translation-spark/src/main/java/org/apache/seatunnel/translation/spark/sink/SparkDataSourceWriterConverter.java +++ b/seatunnel-translation/seatunnel-translation-spark/src/main/java/org/apache/seatunnel/translation/spark/sink/SparkDataSourceWriterConverter.java @@ -30,14 +30,14 @@ public class SparkDataSourceWriterConverter extends AbstractSparkWriterConverter implements SinkWriterConverter { - SparkDataSourceWriterConverter(@Nullable SinkCommitter sinkCommitter, + SparkDataSourceWriterConverter(SinkWriter.Context context, @Nullable SinkCommitter sinkCommitter, @Nullable SinkAggregatedCommitter sinkAggregatedCommitter, - StructType schema) { - super(sinkCommitter, sinkAggregatedCommitter, schema); + StructType schema, String sinkString) { + super(context, sinkCommitter, sinkAggregatedCommitter, schema, sinkString); } @Override public DataSourceWriter convert(SinkWriter sinkWriter) { - return new SparkDataSourceWriter(sinkWriter, sinkCommitter, sinkAggregatedCommitter, schema); + return new SparkDataSourceWriter(context, sinkCommitter, sinkAggregatedCommitter, schema, sinkString); } } diff --git a/seatunnel-translation/seatunnel-translation-spark/src/main/java/org/apache/seatunnel/translation/spark/sink/SparkDataWriter.java b/seatunnel-translation/seatunnel-translation-spark/src/main/java/org/apache/seatunnel/translation/spark/sink/SparkDataWriter.java index 4bf0bdb0324..9016c46f2fb 100644 --- a/seatunnel-translation/seatunnel-translation-spark/src/main/java/org/apache/seatunnel/translation/spark/sink/SparkDataWriter.java +++ b/seatunnel-translation/seatunnel-translation-spark/src/main/java/org/apache/seatunnel/translation/spark/sink/SparkDataWriter.java @@ -17,9 +17,11 @@ package org.apache.seatunnel.translation.spark.sink; +import org.apache.seatunnel.api.sink.SeaTunnelSink; import org.apache.seatunnel.api.sink.SinkCommitter; import org.apache.seatunnel.api.sink.SinkWriter; import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.common.utils.SerializationUtils; import org.apache.seatunnel.translation.serialization.RowSerialization; import org.apache.seatunnel.translation.spark.serialization.InternalRowSerialization; @@ -36,19 +38,24 @@ public class SparkDataWriter implements DataWriter { - private final SinkWriter sinkWriter; + private SinkWriter sinkWriter; @Nullable - private final SinkCommitter sinkCommitter; + private SinkCommitter sinkCommitter = null; private final RowSerialization rowSerialization; private CommitInfoT latestCommitInfoT; private long epochId; - SparkDataWriter(SinkWriter sinkWriter, - @Nullable SinkCommitter sinkCommitter, - StructType schema, long epochId) { - this.sinkWriter = sinkWriter; - this.sinkCommitter = sinkCommitter; + SparkDataWriter(SinkWriter.Context context, + StructType schema, long epochId, String sinkString) { + try { + SeaTunnelSink sink = SerializationUtils.stringToObject(sinkString); + this.sinkWriter = sink.createWriter(context); + Optional> optionalSinkCommitter = sink.createCommitter(); + optionalSinkCommitter.ifPresent(commitInfoTSinkCommitter -> this.sinkCommitter = commitInfoTSinkCommitter); + } catch (Exception e) { + throw new RuntimeException("failed create SparkDataWriter", e); + } this.rowSerialization = new InternalRowSerialization(schema); this.epochId = epochId == 0 ? 1 : epochId; } diff --git a/seatunnel-translation/seatunnel-translation-spark/src/main/java/org/apache/seatunnel/translation/spark/sink/SparkDataWriterFactory.java b/seatunnel-translation/seatunnel-translation-spark/src/main/java/org/apache/seatunnel/translation/spark/sink/SparkDataWriterFactory.java index a3654ce6331..9cf703c2d66 100644 --- a/seatunnel-translation/seatunnel-translation-spark/src/main/java/org/apache/seatunnel/translation/spark/sink/SparkDataWriterFactory.java +++ b/seatunnel-translation/seatunnel-translation-spark/src/main/java/org/apache/seatunnel/translation/spark/sink/SparkDataWriterFactory.java @@ -17,9 +17,7 @@ package org.apache.seatunnel.translation.spark.sink; -import org.apache.seatunnel.api.sink.SinkCommitter; import org.apache.seatunnel.api.sink.SinkWriter; -import org.apache.seatunnel.api.table.type.SeaTunnelRow; import org.apache.spark.sql.catalyst.InternalRow; import org.apache.spark.sql.sources.v2.writer.DataWriter; @@ -28,24 +26,23 @@ import javax.annotation.Nullable; -public class SparkDataWriterFactory implements DataWriterFactory { +public class SparkDataWriterFactory implements DataWriterFactory { - private final SinkWriter sinkWriter; @Nullable - private final SinkCommitter sinkCommitter; + private final SinkWriter.Context context; private final StructType schema; + private final String sinkString; - SparkDataWriterFactory(SinkWriter sinkWriter, - @Nullable SinkCommitter sinkCommitter, - StructType schema) { - this.sinkWriter = sinkWriter; - this.sinkCommitter = sinkCommitter; + SparkDataWriterFactory(@Nullable SinkWriter.Context context, + StructType schema, String sinkString) { + this.context = context; this.schema = schema; + this.sinkString = sinkString; } @Override public DataWriter createDataWriter(int partitionId, long taskId, long epochId) { // TODO use partitionID, taskId information. - return new SparkDataWriter<>(sinkWriter, sinkCommitter, schema, epochId); + return new SparkDataWriter<>(context, schema, epochId, sinkString); } } diff --git a/seatunnel-translation/seatunnel-translation-spark/src/main/java/org/apache/seatunnel/translation/spark/sink/SparkSink.java b/seatunnel-translation/seatunnel-translation-spark/src/main/java/org/apache/seatunnel/translation/spark/sink/SparkSink.java index e2250fd91dd..8de8040ea3c 100644 --- a/seatunnel-translation/seatunnel-translation-spark/src/main/java/org/apache/seatunnel/translation/spark/sink/SparkSink.java +++ b/seatunnel-translation/seatunnel-translation-spark/src/main/java/org/apache/seatunnel/translation/spark/sink/SparkSink.java @@ -39,13 +39,15 @@ public class SparkSink imple StreamWriteSupport, DataSourceV2 { private volatile SeaTunnelSink sink; + + private String sinkString; private Map configuration; private void init(DataSourceOptions options) { if (sink == null) { - this.sink = SerializationUtils.stringToObject( - options.get("sink").orElseThrow(() -> new IllegalArgumentException("can not find sink " + - "class string in DataSourceOptions"))); + sinkString = options.get("sink").orElseThrow(() -> new IllegalArgumentException("can not find " + + "sink class string in DataSourceOptions")); + this.sink = SerializationUtils.stringToObject(sinkString); this.configuration = SerializationUtils.stringToObject( options.get("configuration").orElseThrow(() -> new IllegalArgumentException("can not " + "find configuration class string in DataSourceOptions"))); @@ -61,8 +63,8 @@ public StreamWriter createStreamWriter(String queryId, StructType schema, Output new DefaultSinkWriterContext(configuration, 0, 0); try { - return new SparkStreamWriterConverter(sink.createCommitter().orElse(null), - sink.createAggregatedCommitter().orElse(null), schema).convert(sink.createWriter(stContext)); + return new SparkStreamWriterConverter(stContext, sink.createCommitter().orElse(null), + sink.createAggregatedCommitter().orElse(null), schema, sinkString).convert(sink.createWriter(stContext)); } catch (IOException e) { throw new RuntimeException("find error when createStreamWriter", e); } @@ -77,8 +79,8 @@ public Optional createWriter(String writeUUID, StructType sche new DefaultSinkWriterContext(configuration, 0, 0); try { - return Optional.of(new SparkDataSourceWriterConverter(sink.createCommitter().orElse(null), - sink.createAggregatedCommitter().orElse(null), schema).convert(sink.createWriter(stContext))); + return Optional.of(new SparkDataSourceWriterConverter(stContext, sink.createCommitter().orElse(null), + sink.createAggregatedCommitter().orElse(null), schema, sinkString).convert(sink.createWriter(stContext))); } catch (IOException e) { throw new RuntimeException("find error when createStreamWriter", e); } diff --git a/seatunnel-translation/seatunnel-translation-spark/src/main/java/org/apache/seatunnel/translation/spark/sink/SparkStreamWriter.java b/seatunnel-translation/seatunnel-translation-spark/src/main/java/org/apache/seatunnel/translation/spark/sink/SparkStreamWriter.java index 42e3c8e16fc..e7d1ae61711 100644 --- a/seatunnel-translation/seatunnel-translation-spark/src/main/java/org/apache/seatunnel/translation/spark/sink/SparkStreamWriter.java +++ b/seatunnel-translation/seatunnel-translation-spark/src/main/java/org/apache/seatunnel/translation/spark/sink/SparkStreamWriter.java @@ -20,7 +20,6 @@ import org.apache.seatunnel.api.sink.SinkAggregatedCommitter; import org.apache.seatunnel.api.sink.SinkCommitter; import org.apache.seatunnel.api.sink.SinkWriter; -import org.apache.seatunnel.api.table.type.SeaTunnelRow; import org.apache.spark.sql.catalyst.InternalRow; import org.apache.spark.sql.sources.v2.writer.DataWriterFactory; @@ -33,11 +32,10 @@ public class SparkStreamWriter extends SparkDataSourceWriter implements StreamWriter { - SparkStreamWriter(SinkWriter sinkWriter, - @Nullable SinkCommitter sinkCommitter, + SparkStreamWriter(SinkWriter.Context context, @Nullable SinkCommitter sinkCommitter, @Nullable SinkAggregatedCommitter sinkAggregatedCommitter, - StructType schema) { - super(sinkWriter, sinkCommitter, sinkAggregatedCommitter, schema); + StructType schema, String sinkString) { + super(context, sinkCommitter, sinkAggregatedCommitter, schema, sinkString); } @Override diff --git a/seatunnel-translation/seatunnel-translation-spark/src/main/java/org/apache/seatunnel/translation/spark/sink/SparkStreamWriterConverter.java b/seatunnel-translation/seatunnel-translation-spark/src/main/java/org/apache/seatunnel/translation/spark/sink/SparkStreamWriterConverter.java index 7b7c4ed2929..8a820b672ac 100644 --- a/seatunnel-translation/seatunnel-translation-spark/src/main/java/org/apache/seatunnel/translation/spark/sink/SparkStreamWriterConverter.java +++ b/seatunnel-translation/seatunnel-translation-spark/src/main/java/org/apache/seatunnel/translation/spark/sink/SparkStreamWriterConverter.java @@ -30,14 +30,14 @@ public class SparkStreamWriterConverter extends AbstractSparkWriterConverter implements SinkWriterConverter { - SparkStreamWriterConverter(@Nullable SinkCommitter sinkCommitter, + SparkStreamWriterConverter(SinkWriter.Context context, @Nullable SinkCommitter sinkCommitter, @Nullable SinkAggregatedCommitter sinkAggregatedCommitter, - StructType schema) { - super(sinkCommitter, sinkAggregatedCommitter, schema); + StructType schema, String sinkString) { + super(context, sinkCommitter, sinkAggregatedCommitter, schema, sinkString); } @Override public StreamWriter convert(SinkWriter sinkWriter) { - return new SparkStreamWriter(sinkWriter, sinkCommitter, sinkAggregatedCommitter, schema); + return new SparkStreamWriter(context, sinkCommitter, sinkAggregatedCommitter, schema, sinkString); } } From 17290548b4d2107e5cb7a0248bdefd05eee92660 Mon Sep 17 00:00:00 2001 From: Hisoka Date: Thu, 23 Jun 2022 15:55:31 +0800 Subject: [PATCH 08/21] fix clickhouse file sink bug and not call seatunnel writer close method on spark --- .../seatunnel/clickhouse/config/Config.java | 5 -- .../clickhouse/config/FileReaderOption.java | 3 +- .../sink/client/ClickhouseBatchStatement.java | 11 ++-- .../sink/client/ClickhouseSink.java | 6 +- .../sink/client/ClickhouseSinkWriter.java | 22 +++----- .../sink/file/ClickhouseFileSink.java | 20 +++---- .../sink/file/ClickhouseFileSinkWriter.java | 56 +++++++++---------- .../clickhouse/sink/file/ScpFileTransfer.java | 1 + .../sink/inject/ArrayInjectFunction.java | 5 +- .../sink/inject/BigDecimalInjectFunction.java | 11 ++-- .../inject/ClickhouseFieldInjectFunction.java | 5 +- .../sink/inject/DateInjectFunction.java | 5 +- .../sink/inject/DateTimeInjectFunction.java | 5 +- .../sink/inject/DoubleInjectFunction.java | 5 +- .../sink/inject/FloatInjectFunction.java | 5 +- .../sink/inject/IntInjectFunction.java | 24 +++++--- .../sink/inject/LongInjectFunction.java | 5 +- .../sink/inject/StringInjectFunction.java | 5 +- .../spark/sink/SparkDataWriter.java | 1 + 19 files changed, 95 insertions(+), 105 deletions(-) diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/config/Config.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/config/Config.java index 4b219c100e5..6563274ba1c 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/config/Config.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/config/Config.java @@ -81,11 +81,6 @@ public class Config { */ public static final String TMP_BATCH_CACHE_LINE = "tmp_batch_cache_line"; - /** - * Clickhouse server node is free-password. - */ - public static final String NODE_FREE_PASSWORD = "node_free_password"; - /** * The password of Clickhouse server node */ diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/config/FileReaderOption.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/config/FileReaderOption.java index 16e58e44617..72081078973 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/config/FileReaderOption.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/config/FileReaderOption.java @@ -37,14 +37,13 @@ public class FileReaderOption implements Serializable { public FileReaderOption(ShardMetadata shardMetadata, Map tableSchema, List fields, String clickhouseLocalPath, - ClickhouseFileCopyMethod copyMethod, boolean nodeFreePass, + ClickhouseFileCopyMethod copyMethod, Map nodePassword) { this.shardMetadata = shardMetadata; this.tableSchema = tableSchema; this.fields = fields; this.clickhouseLocalPath = clickhouseLocalPath; this.copyMethod = copyMethod; - this.nodeFreePass = nodeFreePass; this.nodePassword = nodePassword; } diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseBatchStatement.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseBatchStatement.java index d0574444351..ae525acee8f 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseBatchStatement.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseBatchStatement.java @@ -19,17 +19,18 @@ import org.apache.seatunnel.connectors.seatunnel.clickhouse.tool.IntHolder; -import ru.yandex.clickhouse.ClickHouseConnectionImpl; -import ru.yandex.clickhouse.ClickHousePreparedStatementImpl; +import com.clickhouse.jdbc.internal.ClickHouseConnectionImpl; + +import java.sql.PreparedStatement; public class ClickhouseBatchStatement { private final ClickHouseConnectionImpl clickHouseConnection; - private final ClickHousePreparedStatementImpl preparedStatement; + private final PreparedStatement preparedStatement; private final IntHolder intHolder; public ClickhouseBatchStatement(ClickHouseConnectionImpl clickHouseConnection, - ClickHousePreparedStatementImpl preparedStatement, + PreparedStatement preparedStatement, IntHolder intHolder) { this.clickHouseConnection = clickHouseConnection; this.preparedStatement = preparedStatement; @@ -40,7 +41,7 @@ public ClickHouseConnectionImpl getClickHouseConnection() { return clickHouseConnection; } - public ClickHousePreparedStatementImpl getPreparedStatement() { + public PreparedStatement getPreparedStatement() { return preparedStatement; } diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSink.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSink.java index 30c0e55c4a7..c3e6a5d9e1f 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSink.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSink.java @@ -21,7 +21,7 @@ import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.CLICKHOUSE_PREFIX; import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.DATABASE; import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.FIELDS; -import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.NODE_ADDRESS; +import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.HOST; import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.PASSWORD; import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.SHARDING_KEY; import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.SPLIT_MODE; @@ -77,7 +77,7 @@ public String getPluginName() { @SuppressWarnings("checkstyle:MagicNumber") @Override public void prepare(Config config) throws PrepareFailException { - CheckResult result = CheckConfigUtil.checkAllExists(config, NODE_ADDRESS, DATABASE, TABLE, USERNAME, PASSWORD); + CheckResult result = CheckConfigUtil.checkAllExists(config, HOST, DATABASE, TABLE, USERNAME, PASSWORD); if (!result.isSuccess()) { throw new PrepareFailException(getPluginName(), PluginType.SINK, result.getMsg()); } @@ -88,7 +88,7 @@ public void prepare(Config config) throws PrepareFailException { config = config.withFallback(ConfigFactory.parseMap(defaultConfig)); - List nodes = ClickhouseUtil.createNodes(config.getString(NODE_ADDRESS), + List nodes = ClickhouseUtil.createNodes(config.getString(HOST), config.getString(DATABASE), config.getString(USERNAME), config.getString(PASSWORD)); Properties clickhouseProperties = new Properties(); diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSinkWriter.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSinkWriter.java index 74e2b84e999..ce2c7034272 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSinkWriter.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSinkWriter.java @@ -36,16 +36,14 @@ import org.apache.seatunnel.connectors.seatunnel.clickhouse.state.ClickhouseSinkState; import org.apache.seatunnel.connectors.seatunnel.clickhouse.tool.IntHolder; +import com.clickhouse.jdbc.internal.ClickHouseConnectionImpl; import com.google.common.collect.Lists; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import ru.yandex.clickhouse.BalancedClickhouseDataSource; -import ru.yandex.clickhouse.ClickHouseConnectionImpl; -import ru.yandex.clickhouse.ClickHousePreparedStatementImpl; -import ru.yandex.clickhouse.ClickHouseStatement; import java.io.IOException; +import java.sql.PreparedStatement; import java.sql.SQLException; import java.util.Arrays; import java.util.HashMap; @@ -91,7 +89,7 @@ public void write(SeaTunnelRow element) throws IOException { shardKey = element.getField(i); } ClickhouseBatchStatement statement = statementMap.get(shardRouter.getShard(shardKey)); - ClickHousePreparedStatementImpl clickHouseStatement = statement.getPreparedStatement(); + PreparedStatement clickHouseStatement = statement.getPreparedStatement(); IntHolder sizeHolder = statement.getIntHolder(); // add into batch addIntoBatch(element, clickHouseStatement); @@ -118,7 +116,7 @@ public void close() throws IOException { this.proxy.close(); for (ClickhouseBatchStatement batchStatement : statementMap.values()) { try (ClickHouseConnectionImpl needClosedConnection = batchStatement.getClickHouseConnection(); - ClickHousePreparedStatementImpl needClosedStatement = batchStatement.getPreparedStatement()) { + PreparedStatement needClosedStatement = batchStatement.getPreparedStatement()) { IntHolder intHolder = batchStatement.getIntHolder(); if (intHolder.getValue() > 0) { flush(needClosedStatement); @@ -130,7 +128,7 @@ public void close() throws IOException { } } - private void addIntoBatch(SeaTunnelRow row, ClickHousePreparedStatementImpl clickHouseStatement) { + private void addIntoBatch(SeaTunnelRow row, PreparedStatement clickHouseStatement) { try { for (int i = 0; i < option.getFields().size(); i++) { String fieldName = option.getFields().get(i); @@ -152,7 +150,7 @@ private void addIntoBatch(SeaTunnelRow row, ClickHousePreparedStatementImpl clic } } - private void flush(ClickHouseStatement clickHouseStatement) { + private void flush(PreparedStatement clickHouseStatement) { try { clickHouseStatement.executeBatch(); } catch (Exception e) { @@ -164,11 +162,9 @@ private Map initStatementMap() { Map result = new HashMap<>(Common.COLLECTION_SIZE); shardRouter.getShards().forEach((weight, s) -> { try { - ClickHouseConnectionImpl clickhouseConnection = - (ClickHouseConnectionImpl) new BalancedClickhouseDataSource(s.getJdbcUrl(), - this.option.getProperties()).getConnection(); - ClickHousePreparedStatementImpl preparedStatement = - (ClickHousePreparedStatementImpl) clickhouseConnection.prepareStatement(prepareSql); + ClickHouseConnectionImpl clickhouseConnection = new ClickHouseConnectionImpl(s.getJdbcUrl(), + this.option.getProperties()); + PreparedStatement preparedStatement = clickhouseConnection.prepareStatement(prepareSql); IntHolder intHolder = new IntHolder(); ClickhouseBatchStatement batchStatement = new ClickhouseBatchStatement(clickhouseConnection, preparedStatement, intHolder); diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSink.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSink.java index 0b37c7ae162..58e418b90c1 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSink.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSink.java @@ -23,7 +23,6 @@ import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.FIELDS; import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.HOST; import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.NODE_ADDRESS; -import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.NODE_FREE_PASSWORD; import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.NODE_PASS; import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.PASSWORD; import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.SHARDING_KEY; @@ -38,7 +37,6 @@ import org.apache.seatunnel.api.table.type.SeaTunnelRowType; import org.apache.seatunnel.common.config.CheckConfigUtil; import org.apache.seatunnel.common.config.CheckResult; -import org.apache.seatunnel.common.config.TypesafeConfigUtils; import org.apache.seatunnel.common.constants.PluginType; import org.apache.seatunnel.connectors.seatunnel.clickhouse.config.ClickhouseFileCopyMethod; import org.apache.seatunnel.connectors.seatunnel.clickhouse.config.FileReaderOption; @@ -59,7 +57,6 @@ import java.io.IOException; import java.util.ArrayList; -import java.util.Collections; import java.util.List; import java.util.Map; import java.util.stream.Collectors; @@ -86,7 +83,7 @@ public void prepare(Config config) throws PrepareFailException { .build(); config = config.withFallback(ConfigFactory.parseMap(defaultConfigs)); - List nodes = ClickhouseUtil.createNodes(config.getString(NODE_ADDRESS), + List nodes = ClickhouseUtil.createNodes(config.getString(HOST), config.getString(DATABASE), config.getString(USERNAME), config.getString(PASSWORD)); ClickhouseProxy proxy = new ClickhouseProxy(nodes.get(0)); @@ -116,17 +113,14 @@ public void prepare(Config config) throws PrepareFailException { } else { fields = new ArrayList<>(tableSchema.keySet()); } - Map nodePassword = Collections.emptyMap(); - if (!TypesafeConfigUtils.getConfig(config, NODE_FREE_PASSWORD, true)) { - nodePassword = config.getObjectList(NODE_PASS).stream() - .collect(Collectors.toMap( - configObject -> configObject.toConfig().getString(NODE_ADDRESS), - configObject -> configObject.toConfig().getString(PASSWORD))); - } + Map nodePassword = config.getObjectList(NODE_PASS).stream() + .collect(Collectors.toMap( + configObject -> configObject.toConfig().getString(NODE_ADDRESS), + configObject -> configObject.toConfig().getString(PASSWORD))); + proxy.close(); this.readerOption = new FileReaderOption(shardMetadata, tableSchema, fields, config.getString(CLICKHOUSE_LOCAL_PATH), - ClickhouseFileCopyMethod.from(config.getString(COPY_METHOD)), - TypesafeConfigUtils.getConfig(config, NODE_FREE_PASSWORD, true), nodePassword); + ClickhouseFileCopyMethod.from(config.getString(COPY_METHOD)), nodePassword); } @Override diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSinkWriter.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSinkWriter.java index e89e9a27c18..0b6de7f4520 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSinkWriter.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSinkWriter.java @@ -30,6 +30,7 @@ import com.clickhouse.client.ClickHouseException; import com.clickhouse.client.ClickHouseRequest; +import com.clickhouse.client.ClickHouseResponse; import org.apache.commons.io.FileUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -57,7 +58,7 @@ public class ClickhouseFileSinkWriter implements SinkWriter { private static final Logger LOGGER = LoggerFactory.getLogger(ClickhouseFileSinkWriter.class); - private static final String CLICKHOUSE_LOCAL_FILE_PREFIX = "/tmp/clickhouse-local/flink-file"; + private static final String CLICKHOUSE_LOCAL_FILE_PREFIX = "/tmp/clickhouse-local/seatunnel-file"; private static final int UUID_LENGTH = 10; private final FileReaderOption readerOption; private final ShardRouter shardRouter; @@ -116,17 +117,13 @@ public void abortPrepare() { @Override public void close() throws IOException { - for (Map.Entry> entry : rowCache.entrySet()) { - Shard shard = entry.getKey(); - List rows = entry.getValue(); - flush(shard, rows); - rows.clear(); - } + rowCache.forEach(this::flush); } private void flush(Shard shard, List rows) { try { // generate clickhouse local file + // TODO generate file by sub rows to save memory List clickhouseLocalFiles = generateClickhouseLocalFiles(rows); // move file to server attachClickhouseLocalFileToServer(shard, clickhouseLocalFiles); @@ -146,23 +143,25 @@ private List generateClickhouseLocalFiles(List rows) throw String clickhouseLocalFile = String.format("%s/%s", CLICKHOUSE_LOCAL_FILE_PREFIX, uuid); FileUtils.forceMkdir(new File(clickhouseLocalFile)); String clickhouseLocalFileTmpFile = clickhouseLocalFile + "/local_data.log"; - FileChannel fileChannel = FileChannel.open(Paths.get(clickhouseLocalFileTmpFile), StandardOpenOption.WRITE, - StandardOpenOption.READ, StandardOpenOption.CREATE_NEW); - String data = rows.stream() - .map(row -> this.readerOption.getFields().stream().map(field -> row.getField(this.readerOption.getSeaTunnelRowType().indexOf(field)).toString()) - .collect(Collectors.joining("\t"))) - .collect(Collectors.joining("\n")); - MappedByteBuffer buffer = fileChannel.map(FileChannel.MapMode.READ_WRITE, fileChannel.size(), - data.getBytes(StandardCharsets.UTF_8).length); - buffer.put(data.getBytes(StandardCharsets.UTF_8)); - - List command = new ArrayList<>(); - command.add("cat"); - command.add(clickhouseLocalFileTmpFile); - command.add("|"); + try (FileChannel fileChannel = FileChannel.open(Paths.get(clickhouseLocalFileTmpFile), StandardOpenOption.WRITE, + StandardOpenOption.READ, StandardOpenOption.CREATE_NEW)) { + String data = rows.stream() + .map(row -> this.readerOption.getFields().stream().map(field -> row.getField(this.readerOption.getSeaTunnelRowType().indexOf(field)).toString()) + .collect(Collectors.joining("\t"))) + .collect(Collectors.joining("\n")); + MappedByteBuffer buffer = fileChannel.map(FileChannel.MapMode.READ_WRITE, fileChannel.size(), + data.getBytes(StandardCharsets.UTF_8).length); + buffer.put(data.getBytes(StandardCharsets.UTF_8)); + } - command.addAll(Arrays.stream(this.readerOption.getClickhouseLocalPath().trim().split(" ")).collect(Collectors.toList())); - command.add("local"); + List localPaths = Arrays.stream(this.readerOption.getClickhouseLocalPath().trim().split(" ")) + .collect(Collectors.toList()); + List command = new ArrayList<>(localPaths); + if (localPaths.size() == 1) { + command.add("local"); + } + command.add("--file"); + command.add(clickhouseLocalFileTmpFile); command.add("-S"); command.add("\"" + this.readerOption.getFields().stream().map(field -> field + " " + readerOption.getTableSchema().get(field)).collect(Collectors.joining(",")) + "\""); command.add("-N"); @@ -172,9 +171,9 @@ private List generateClickhouseLocalFiles(List rows) throw "\"%s; INSERT INTO TABLE %s SELECT %s FROM temp_table%s;\"", clickhouseTable.getCreateTableDDL().replace(clickhouseTable.getDatabase() + ".", "").replaceAll("`", ""), clickhouseTable.getLocalTableName(), - readerOption.getTableSchema().entrySet().stream().map(entry -> { - if (readerOption.getFields().contains(entry.getKey())) { - return entry.getKey(); + readerOption.getTableSchema().keySet().stream().map(s -> { + if (readerOption.getFields().contains(s)) { + return s; } else { return "NULL"; } @@ -211,7 +210,7 @@ private List generateClickhouseLocalFiles(List rows) throw private void attachClickhouseLocalFileToServer(Shard shard, List clickhouseLocalFiles) throws ClickHouseException { if (ClickhouseFileCopyMethod.SCP.equals(this.readerOption.getCopyMethod())) { - String hostAddress = shard.getNode().getAddress().getAddress().getHostAddress(); + String hostAddress = shard.getNode().getAddress().getHostName(); String password = readerOption.getNodePassword().getOrDefault(hostAddress, null); FileTransfer fileTransfer = new ScpFileTransfer(hostAddress, password); fileTransfer.init(); @@ -223,9 +222,10 @@ private void attachClickhouseLocalFileToServer(Shard shard, List clickho ClickHouseRequest request = proxy.getClickhouseConnection(shard); for (String clickhouseLocalFile : clickhouseLocalFiles) { - request.query(String.format("ALTER TABLE %s ATTACH PART '%s'", + ClickHouseResponse response = request.query(String.format("ALTER TABLE %s ATTACH PART '%s'", clickhouseTable.getLocalTableName(), clickhouseLocalFile.substring(clickhouseLocalFile.lastIndexOf("/") + 1))).executeAndWait(); + response.close(); } } diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ScpFileTransfer.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ScpFileTransfer.java index c45af141205..e6faf2d81ef 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ScpFileTransfer.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ScpFileTransfer.java @@ -51,6 +51,7 @@ public void init() { if (password != null) { clientSession.addPasswordIdentity(password); } + // TODO support add publicKey to identity if (!clientSession.auth().verify().isSuccess()) { throw new IOException("ssh host " + host + "authentication failed"); } diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/ArrayInjectFunction.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/ArrayInjectFunction.java index 63648b3d039..c564e5501d1 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/ArrayInjectFunction.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/ArrayInjectFunction.java @@ -17,8 +17,7 @@ package org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.inject; -import ru.yandex.clickhouse.ClickHousePreparedStatementImpl; - +import java.sql.PreparedStatement; import java.sql.SQLException; import java.util.regex.Pattern; @@ -27,7 +26,7 @@ public class ArrayInjectFunction implements ClickhouseFieldInjectFunction { private static final Pattern PATTERN = Pattern.compile("(Array.*)"); @Override - public void injectFields(ClickHousePreparedStatementImpl statement, int index, Object value) throws SQLException { + public void injectFields(PreparedStatement statement, int index, Object value) throws SQLException { statement.setArray(index, (java.sql.Array) value); } diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/BigDecimalInjectFunction.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/BigDecimalInjectFunction.java index 1ae0cb69f1e..25c73ab3f36 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/BigDecimalInjectFunction.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/BigDecimalInjectFunction.java @@ -17,18 +17,21 @@ package org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.inject; -import ru.yandex.clickhouse.ClickHousePreparedStatementImpl; - +import java.sql.PreparedStatement; import java.sql.SQLException; +import java.util.regex.Pattern; public class BigDecimalInjectFunction implements ClickhouseFieldInjectFunction { + + private static final Pattern PATTERN = Pattern.compile("(Decimal.*)"); + @Override - public void injectFields(ClickHousePreparedStatementImpl statement, int index, Object value) throws SQLException { + public void injectFields(PreparedStatement statement, int index, Object value) throws SQLException { statement.setBigDecimal(index, (java.math.BigDecimal) value); } @Override public boolean isCurrentFieldType(String fieldType) { - return "Decimal".equals(fieldType); + return PATTERN.matcher(fieldType).matches(); } } diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/ClickhouseFieldInjectFunction.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/ClickhouseFieldInjectFunction.java index 1ee8c76afa8..3e27a634396 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/ClickhouseFieldInjectFunction.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/ClickhouseFieldInjectFunction.java @@ -17,9 +17,8 @@ package org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.inject; -import ru.yandex.clickhouse.ClickHousePreparedStatementImpl; - import java.io.Serializable; +import java.sql.PreparedStatement; import java.sql.SQLException; /** @@ -34,7 +33,7 @@ public interface ClickhouseFieldInjectFunction extends Serializable { * @param value value to inject * @param index index in the statement */ - void injectFields(ClickHousePreparedStatementImpl statement, int index, Object value) throws SQLException; + void injectFields(PreparedStatement statement, int index, Object value) throws SQLException; /** * If the fieldType need to be injected by the current function. diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/DateInjectFunction.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/DateInjectFunction.java index 2f4ced88fbf..7a0b0b64826 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/DateInjectFunction.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/DateInjectFunction.java @@ -17,14 +17,13 @@ package org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.inject; -import ru.yandex.clickhouse.ClickHousePreparedStatementImpl; - import java.sql.Date; +import java.sql.PreparedStatement; import java.sql.SQLException; public class DateInjectFunction implements ClickhouseFieldInjectFunction { @Override - public void injectFields(ClickHousePreparedStatementImpl statement, int index, Object value) throws SQLException { + public void injectFields(PreparedStatement statement, int index, Object value) throws SQLException { if (value instanceof Date) { statement.setDate(index, (Date) value); } else { diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/DateTimeInjectFunction.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/DateTimeInjectFunction.java index 76acd7b6306..b85c56afbda 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/DateTimeInjectFunction.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/DateTimeInjectFunction.java @@ -17,14 +17,13 @@ package org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.inject; -import ru.yandex.clickhouse.ClickHousePreparedStatementImpl; - +import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Timestamp; public class DateTimeInjectFunction implements ClickhouseFieldInjectFunction { @Override - public void injectFields(ClickHousePreparedStatementImpl statement, int index, Object value) throws SQLException { + public void injectFields(PreparedStatement statement, int index, Object value) throws SQLException { if (value instanceof Timestamp) { statement.setTimestamp(index, (Timestamp) value); } else { diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/DoubleInjectFunction.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/DoubleInjectFunction.java index 99e82971fde..c416d110cbb 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/DoubleInjectFunction.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/DoubleInjectFunction.java @@ -17,14 +17,13 @@ package org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.inject; -import ru.yandex.clickhouse.ClickHousePreparedStatementImpl; - import java.math.BigDecimal; +import java.sql.PreparedStatement; import java.sql.SQLException; public class DoubleInjectFunction implements ClickhouseFieldInjectFunction { @Override - public void injectFields(ClickHousePreparedStatementImpl statement, int index, Object value) throws SQLException { + public void injectFields(PreparedStatement statement, int index, Object value) throws SQLException { if (value instanceof BigDecimal) { statement.setDouble(index, ((BigDecimal) value).doubleValue()); } else { diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/FloatInjectFunction.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/FloatInjectFunction.java index 884620d9fae..84464808b76 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/FloatInjectFunction.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/FloatInjectFunction.java @@ -17,14 +17,13 @@ package org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.inject; -import ru.yandex.clickhouse.ClickHousePreparedStatementImpl; - import java.math.BigDecimal; +import java.sql.PreparedStatement; import java.sql.SQLException; public class FloatInjectFunction implements ClickhouseFieldInjectFunction { @Override - public void injectFields(ClickHousePreparedStatementImpl statement, int index, Object value) throws SQLException { + public void injectFields(PreparedStatement statement, int index, Object value) throws SQLException { if (value instanceof BigDecimal) { statement.setFloat(index, ((BigDecimal) value).floatValue()); } else { diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/IntInjectFunction.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/IntInjectFunction.java index 8a75ec554d7..f6e8c27dc87 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/IntInjectFunction.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/IntInjectFunction.java @@ -17,23 +17,31 @@ package org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.inject; -import ru.yandex.clickhouse.ClickHousePreparedStatementImpl; - +import java.sql.PreparedStatement; import java.sql.SQLException; public class IntInjectFunction implements ClickhouseFieldInjectFunction { @Override - public void injectFields(ClickHousePreparedStatementImpl statement, int index, Object value) throws SQLException { - statement.setInt(index, (int) value); + public void injectFields(PreparedStatement statement, int index, Object value) throws SQLException { + if (value instanceof Byte) { + statement.setByte(index, (Byte) value); + + } else if (value instanceof Short) { + statement.setShort(index, (Short) value); + + } else { + statement.setInt(index, (Integer) value); + + } } @Override public boolean isCurrentFieldType(String fieldType) { return "Int8".equals(fieldType) - || "UInt8".equals(fieldType) - || "Int16".equals(fieldType) - || "UInt16".equals(fieldType) - || "Int32".equals(fieldType); + || "UInt8".equals(fieldType) + || "Int16".equals(fieldType) + || "UInt16".equals(fieldType) + || "Int32".equals(fieldType); } } diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/LongInjectFunction.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/LongInjectFunction.java index 116fb592eb5..ccd3e60b8ea 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/LongInjectFunction.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/LongInjectFunction.java @@ -17,14 +17,13 @@ package org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.inject; -import ru.yandex.clickhouse.ClickHousePreparedStatementImpl; - +import java.sql.PreparedStatement; import java.sql.SQLException; public class LongInjectFunction implements ClickhouseFieldInjectFunction { @Override - public void injectFields(ClickHousePreparedStatementImpl statement, int index, Object value) throws SQLException { + public void injectFields(PreparedStatement statement, int index, Object value) throws SQLException { statement.setLong(index, (Long) value); } diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/StringInjectFunction.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/StringInjectFunction.java index 0e9cdf3b8a0..4894774dc85 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/StringInjectFunction.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/inject/StringInjectFunction.java @@ -17,14 +17,13 @@ package org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.inject; -import ru.yandex.clickhouse.ClickHousePreparedStatementImpl; - +import java.sql.PreparedStatement; import java.sql.SQLException; public class StringInjectFunction implements ClickhouseFieldInjectFunction { @Override - public void injectFields(ClickHousePreparedStatementImpl statement, int index, Object value) throws SQLException { + public void injectFields(PreparedStatement statement, int index, Object value) throws SQLException { statement.setString(index, value.toString()); } diff --git a/seatunnel-translation/seatunnel-translation-spark/src/main/java/org/apache/seatunnel/translation/spark/sink/SparkDataWriter.java b/seatunnel-translation/seatunnel-translation-spark/src/main/java/org/apache/seatunnel/translation/spark/sink/SparkDataWriter.java index 9016c46f2fb..c20c9925372 100644 --- a/seatunnel-translation/seatunnel-translation-spark/src/main/java/org/apache/seatunnel/translation/spark/sink/SparkDataWriter.java +++ b/seatunnel-translation/seatunnel-translation-spark/src/main/java/org/apache/seatunnel/translation/spark/sink/SparkDataWriter.java @@ -85,6 +85,7 @@ public WriterCommitMessage commit() throws IOException { } SparkWriterCommitMessage sparkWriterCommitMessage = new SparkWriterCommitMessage<>(latestCommitInfoT); cleanCommitInfo(); + sinkWriter.close(); return sparkWriterCommitMessage; } From f353bfe9498fd846add06221d0175fa99915088f Mon Sep 17 00:00:00 2001 From: Hisoka Date: Thu, 23 Jun 2022 16:27:30 +0800 Subject: [PATCH 09/21] fix check style --- .../clickhouse/sink/client/ClickhouseProxy.java | 4 ++-- .../clickhouse/sink/file/ClickhouseFileSink.java | 3 +-- .../sink/file/ClickhouseFileSinkWriter.java | 12 +++++------- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseProxy.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseProxy.java index ac8b5cf9aed..3a3aa082c6b 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseProxy.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseProxy.java @@ -57,8 +57,8 @@ public ClickHouseRequest getClickhouseConnection() { } public ClickHouseRequest getClickhouseConnection(Shard shard) { - ClickHouseClient c = shardToDataSource.computeIfAbsent(shard, - s -> ClickHouseClient.newInstance(s.getNode().getProtocol())); + ClickHouseClient c = shardToDataSource + .computeIfAbsent(shard, s -> ClickHouseClient.newInstance(s.getNode().getProtocol())); return c.connect(shard.getNode()).format(ClickHouseFormat.RowBinaryWithNamesAndTypes); } diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSink.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSink.java index 58e418b90c1..681b5ab8775 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSink.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSink.java @@ -114,8 +114,7 @@ public void prepare(Config config) throws PrepareFailException { fields = new ArrayList<>(tableSchema.keySet()); } Map nodePassword = config.getObjectList(NODE_PASS).stream() - .collect(Collectors.toMap( - configObject -> configObject.toConfig().getString(NODE_ADDRESS), + .collect(Collectors.toMap(configObject -> configObject.toConfig().getString(NODE_ADDRESS), configObject -> configObject.toConfig().getString(PASSWORD))); proxy.close(); diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSinkWriter.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSinkWriter.java index 0b6de7f4520..b05b01f8156 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSinkWriter.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSinkWriter.java @@ -79,13 +79,11 @@ public ClickhouseFileSinkWriter(FileReaderOption readerOption, Context context) // find file local save path of each node shardLocalDataPaths = shardRouter.getShards().values().stream() - .collect(Collectors.toMap( - Function.identity(), - shard -> { - ClickhouseTable shardTable = proxy.getClickhouseTable(shard.getNode().getDatabase().get(), - clickhouseTable.getLocalTableName()); - return shardTable.getDataPaths(); - })); + .collect(Collectors.toMap(Function.identity(), shard -> { + ClickhouseTable shardTable = proxy.getClickhouseTable(shard.getNode().getDatabase().get(), + clickhouseTable.getLocalTableName()); + return shardTable.getDataPaths(); + })); } @Override From 65610d3e41c84cc5c5b9eb1fbd80c64f1fed9c66 Mon Sep 17 00:00:00 2001 From: Hisoka Date: Thu, 23 Jun 2022 16:44:02 +0800 Subject: [PATCH 10/21] fix check style --- .../seatunnel/clickhouse/sink/file/ClickhouseFileSink.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSink.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSink.java index 681b5ab8775..428bab37198 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSink.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSink.java @@ -115,7 +115,7 @@ public void prepare(Config config) throws PrepareFailException { } Map nodePassword = config.getObjectList(NODE_PASS).stream() .collect(Collectors.toMap(configObject -> configObject.toConfig().getString(NODE_ADDRESS), - configObject -> configObject.toConfig().getString(PASSWORD))); + configObject -> configObject.toConfig().getString(PASSWORD))); proxy.close(); this.readerOption = new FileReaderOption(shardMetadata, tableSchema, fields, config.getString(CLICKHOUSE_LOCAL_PATH), From 4ad657cf26ced3538b95473738e1c26a9bec299f Mon Sep 17 00:00:00 2001 From: Hisoka Date: Thu, 23 Jun 2022 17:11:10 +0800 Subject: [PATCH 11/21] fix check style --- .../clickhouse/sink/file/ClickhouseFileSinkWriter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSinkWriter.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSinkWriter.java index b05b01f8156..c6b6bfa1fae 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSinkWriter.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSinkWriter.java @@ -67,7 +67,7 @@ public class ClickhouseFileSinkWriter implements SinkWriter> shardLocalDataPaths; private final Map> rowCache; - public ClickhouseFileSinkWriter(FileReaderOption readerOption, Context context) { + public ClickhouseFileSinkWriter(FileReaderOption readerOption, SinkWriter.Context context) { this.readerOption = readerOption; proxy = new ClickhouseProxy(this.readerOption.getShardMetadata().getDefaultShard().getNode()); shardRouter = new ShardRouter(proxy, this.readerOption.getShardMetadata()); From bd4cae874556e6364779a894e5c91eae4ffb18c2 Mon Sep 17 00:00:00 2001 From: Hisoka Date: Thu, 23 Jun 2022 17:48:39 +0800 Subject: [PATCH 12/21] update known-dependencies --- tools/dependencies/known-dependencies-jdk11.txt | 9 ++++++--- tools/dependencies/known-dependencies-jdk8.txt | 6 ++++++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/tools/dependencies/known-dependencies-jdk11.txt b/tools/dependencies/known-dependencies-jdk11.txt index b529d97ff90..8df9f8c45a6 100644 --- a/tools/dependencies/known-dependencies-jdk11.txt +++ b/tools/dependencies/known-dependencies-jdk11.txt @@ -65,8 +65,11 @@ chill-java-0.9.3.jar chill_2.11-0.9.3.jar classmate-1.1.0.jar clickhouse-client-0.3.2-patch9.jar +clickhouse-grpc-client-0.3.2-patch9-netty.jar +clickhouse-http-client-0.3.2-patch9-shaded.jar clickhouse-http-client-0.3.2-patch9.jar clickhouse-jdbc-0.2.jar +clickhouse-jdbc-0.3.2-patch9.jar commons-beanutils-1.9.3.jar commons-cli-1.2.jar commons-cli-1.3.1.jar @@ -91,9 +94,7 @@ commons-daemon-1.0.13.jar commons-dbcp2-2.0.1.jar commons-digester-1.8.1.jar commons-email-1.5.jar -commons-io-2.11.0.jar -commons-io-2.4.jar -commons-io-2.5.jar +commons-io-2.8.0.jar commons-lang-2.6.jar commons-lang3-3.4.jar commons-logging-1.1.3.jar @@ -186,6 +187,7 @@ google-http-client-1.26.0.jar google-http-client-jackson2-1.26.0.jar google-oauth-client-1.26.0.jar gson-2.2.4.jar +gson-2.9.0.jar guava-19.0.jar guice-3.0.jar guice-4.1.0.jar @@ -264,6 +266,7 @@ httpasyncclient-4.1.4.jar httpclient-4.5.13.jar httpcore-4.4.4.jar httpcore-nio-4.4.4.jar +httpmime-4.5.13.jar httpmime-4.5.2.jar hudi-spark-bundle_2.11-0.10.0.jar i18n-util-1.0.4.jar diff --git a/tools/dependencies/known-dependencies-jdk8.txt b/tools/dependencies/known-dependencies-jdk8.txt index 00b8e500f1d..8e2ee8400d5 100755 --- a/tools/dependencies/known-dependencies-jdk8.txt +++ b/tools/dependencies/known-dependencies-jdk8.txt @@ -67,8 +67,11 @@ chill-java-0.9.3.jar chill_2.11-0.9.3.jar classmate-1.1.0.jar clickhouse-client-0.3.2-patch9.jar +clickhouse-grpc-client-0.3.2-patch9-netty.jar +clickhouse-http-client-0.3.2-patch9-shaded.jar clickhouse-http-client-0.3.2-patch9.jar clickhouse-jdbc-0.2.jar +clickhouse-jdbc-0.3.2-patch9.jar commons-beanutils-1.7.0.jar commons-beanutils-1.9.3.jar commons-beanutils-core-1.8.0.jar @@ -102,6 +105,7 @@ commons-httpclient-3.1.jar commons-io-2.11.0.jar commons-io-2.4.jar commons-io-2.5.jar +commons-io-2.8.0.jar commons-lang-2.6.jar commons-lang3-3.4.jar commons-logging-1.1.3.jar @@ -195,6 +199,7 @@ google-http-client-1.26.0.jar google-http-client-jackson2-1.26.0.jar google-oauth-client-1.26.0.jar gson-2.2.4.jar +gson-2.9.0.jar guava-19.0.jar guice-3.0.jar guice-4.1.0.jar @@ -288,6 +293,7 @@ httpasyncclient-4.1.4.jar httpclient-4.5.13.jar httpcore-4.4.4.jar httpcore-nio-4.4.4.jar +httpmime-4.5.13.jar httpmime-4.5.2.jar hudi-spark-bundle_2.11-0.10.0.jar i18n-util-1.0.4.jar From 9e20eaac404db432780a64be3ce8b1406b330436 Mon Sep 17 00:00:00 2001 From: Hisoka Date: Thu, 23 Jun 2022 18:21:37 +0800 Subject: [PATCH 13/21] update known-dependencies --- tools/dependencies/known-dependencies-jdk8.txt | 3 --- 1 file changed, 3 deletions(-) diff --git a/tools/dependencies/known-dependencies-jdk8.txt b/tools/dependencies/known-dependencies-jdk8.txt index 8e2ee8400d5..f3915d317ad 100755 --- a/tools/dependencies/known-dependencies-jdk8.txt +++ b/tools/dependencies/known-dependencies-jdk8.txt @@ -102,9 +102,6 @@ commons-digester-1.8.jar commons-el-1.0.jar commons-email-1.5.jar commons-httpclient-3.1.jar -commons-io-2.11.0.jar -commons-io-2.4.jar -commons-io-2.5.jar commons-io-2.8.0.jar commons-lang-2.6.jar commons-lang3-3.4.jar From 1d5dd025b5f9eca346a2e6a6f1ca2ef11c31dc79 Mon Sep 17 00:00:00 2001 From: Hisoka Date: Fri, 24 Jun 2022 17:21:03 +0800 Subject: [PATCH 14/21] fix clickhouse file sink chown bug --- .../clickhouse/sink/file/ScpFileTransfer.java | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ScpFileTransfer.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ScpFileTransfer.java index e6faf2d81ef..6fa83794ce7 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ScpFileTransfer.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ScpFileTransfer.java @@ -17,10 +17,13 @@ package org.apache.seatunnel.connectors.seatunnel.clickhouse.sink.file; +import org.apache.commons.lang3.StringUtils; import org.apache.sshd.client.SshClient; import org.apache.sshd.client.session.ClientSession; import org.apache.sshd.scp.client.ScpClient; import org.apache.sshd.scp.client.ScpClientCreator; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.IOException; import java.util.ArrayList; @@ -28,6 +31,8 @@ public class ScpFileTransfer implements FileTransfer { + private static final Logger LOGGER = LoggerFactory.getLogger(ScpFileTransfer.class); + private static final int SCP_PORT = 22; private final String host; @@ -78,12 +83,15 @@ public void transferAndChown(String sourcePath, String targetPath) { List command = new ArrayList<>(); command.add("ls"); command.add("-l"); - command.add(targetPath.substring(0, targetPath.lastIndexOf("/"))); - command.add("/ | tail -n 1 | awk '{print $3}' | xargs -t -i chown -R {}:{} " + targetPath); + command.add(targetPath.substring(0, + StringUtils.stripEnd(targetPath, "/").lastIndexOf("/")) + "/"); + command.add("| tail -n 1 | awk '{print $3}' | xargs -t -i chown -R {}:{} " + targetPath); try { - clientSession.executeRemoteCommand(String.join(" ", command)); + String finalCommand = String.join(" ", command); + LOGGER.info("execute remote command: " + finalCommand); + clientSession.executeRemoteCommand(finalCommand); } catch (IOException e) { - throw new RuntimeException("Failed to execute remote command: " + command, e); + // always return error cause xargs return shell command result } } From 3411dcaa2a94b6da3ee34cfd913e65b39a74b29d Mon Sep 17 00:00:00 2001 From: Hisoka Date: Sat, 25 Jun 2022 09:48:52 +0800 Subject: [PATCH 15/21] add UTF8String support --- .../seatunnel/clickhouse/sink/client/ClickhouseSink.java | 6 ++++++ .../clickhouse/sink/client/ClickhouseSinkWriter.java | 2 +- .../seatunnel/clickhouse/sink/file/ClickhouseFileSink.java | 6 ++++++ .../spark/serialization/InternalRowConverter.java | 3 +++ 4 files changed, 16 insertions(+), 1 deletion(-) diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSink.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSink.java index c3e6a5d9e1f..fb229a07750 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSink.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSink.java @@ -34,6 +34,7 @@ import org.apache.seatunnel.api.serialization.Serializer; import org.apache.seatunnel.api.sink.SeaTunnelSink; import org.apache.seatunnel.api.sink.SinkWriter; +import org.apache.seatunnel.api.table.type.SeaTunnelDataType; import org.apache.seatunnel.api.table.type.SeaTunnelRow; import org.apache.seatunnel.api.table.type.SeaTunnelRowType; import org.apache.seatunnel.common.config.CheckConfigUtil; @@ -159,6 +160,11 @@ public void setTypeInfo(SeaTunnelRowType seaTunnelRowType) { this.option.setSeaTunnelRowType(seaTunnelRowType); } + @Override + public SeaTunnelDataType getConsumedType() { + return this.option.getSeaTunnelRowType(); + } + @Override public SeaTunnelContext getSeaTunnelContext() { return seaTunnelContext; diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSinkWriter.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSinkWriter.java index ce2c7034272..604f2c609ca 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSinkWriter.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSinkWriter.java @@ -170,7 +170,7 @@ private Map initStatementMap() { new ClickhouseBatchStatement(clickhouseConnection, preparedStatement, intHolder); result.put(s, batchStatement); } catch (SQLException e) { - throw new RuntimeException("Clickhouse prepare statement error", e); + throw new RuntimeException("Clickhouse prepare statement error: " + e.getMessage(), e); } }); return result; diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSink.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSink.java index 428bab37198..f56598b44dc 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSink.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSink.java @@ -33,6 +33,7 @@ import org.apache.seatunnel.api.common.SeaTunnelContext; import org.apache.seatunnel.api.sink.SeaTunnelSink; import org.apache.seatunnel.api.sink.SinkWriter; +import org.apache.seatunnel.api.table.type.SeaTunnelDataType; import org.apache.seatunnel.api.table.type.SeaTunnelRow; import org.apache.seatunnel.api.table.type.SeaTunnelRowType; import org.apache.seatunnel.common.config.CheckConfigUtil; @@ -127,6 +128,11 @@ public void setTypeInfo(SeaTunnelRowType seaTunnelRowType) { this.readerOption.setSeaTunnelRowType(seaTunnelRowType); } + @Override + public SeaTunnelDataType getConsumedType() { + return this.readerOption.getSeaTunnelRowType(); + } + @Override public SinkWriter createWriter(SinkWriter.Context context) throws IOException { return new ClickhouseFileSinkWriter(readerOption, context); diff --git a/seatunnel-translation/seatunnel-translation-spark/src/main/java/org/apache/seatunnel/translation/spark/serialization/InternalRowConverter.java b/seatunnel-translation/seatunnel-translation-spark/src/main/java/org/apache/seatunnel/translation/spark/serialization/InternalRowConverter.java index 09ab6d239a4..bcd3d78877d 100644 --- a/seatunnel-translation/seatunnel-translation-spark/src/main/java/org/apache/seatunnel/translation/spark/serialization/InternalRowConverter.java +++ b/seatunnel-translation/seatunnel-translation-spark/src/main/java/org/apache/seatunnel/translation/spark/serialization/InternalRowConverter.java @@ -35,6 +35,7 @@ import org.apache.spark.sql.catalyst.expressions.MutableShort; import org.apache.spark.sql.catalyst.expressions.MutableValue; import org.apache.spark.sql.catalyst.expressions.SpecificInternalRow; +import org.apache.spark.unsafe.types.UTF8String; import java.io.IOException; import java.sql.Date; @@ -77,6 +78,8 @@ private static Object convert(Object field, SeaTunnelDataType dataType) { return Timestamp.valueOf((LocalDateTime) field); case MAP: return convertMap((Map) field, (MapType) dataType, InternalRowConverter::convert); + case STRING: + return UTF8String.fromString((String) field); default: return field; } From b7f13360ef9a85f51d5b0d3856bb221379f5ba9e Mon Sep 17 00:00:00 2001 From: Hisoka Date: Mon, 27 Jun 2022 10:20:19 +0800 Subject: [PATCH 16/21] add missed clickhouse license --- seatunnel-dist/release-docs/LICENSE | 2 ++ 1 file changed, 2 insertions(+) diff --git a/seatunnel-dist/release-docs/LICENSE b/seatunnel-dist/release-docs/LICENSE index 091b8ca9a19..da4a6093cad 100644 --- a/seatunnel-dist/release-docs/LICENSE +++ b/seatunnel-dist/release-docs/LICENSE @@ -833,7 +833,9 @@ The text of each license is the standard Apache 2.0 license. (The Apache Software License, Version 2.0) aggs-matrix-stats (org.elasticsearch.plugin:aggs-matrix-stats-client:7.5.1 - https://github.com/elastic/elasticsearch) (The Apache Software License, Version 2.0) cli (org.elasticsearch:elasticsearch-cli:6.3.1 - https://github.com/elastic/elasticsearch) (The Apache Software License, Version 2.0) clickhouse-client (com.clickhouse:clickhouse-client:0.3.2-patch9 - https://github.com/ClickHouse/clickhouse-jdbc) + (The Apache Software License, Version 2.0) clickhouse-grpc-client (com.clickhouse:clickhouse-grpc-client:0.3.2-patch9 - https://github.com/ClickHouse/clickhouse-jdbc) (The Apache Software License, Version 2.0) clickhouse-http-client (com.clickhouse:clickhouse-http-client:0.3.2-patch9 - https://github.com/ClickHouse/clickhouse-jdbc) + (The Apache Software License, Version 2.0) clickhouse-jdbc (com.clickhouse:clickhouse-jdbc:0.3.2-patch9 - https://github.com/ClickHouse/clickhouse-jdbc) (The Apache Software License, Version 2.0) clickhouse-jdbc (ru.yandex.clickhouse:clickhouse-jdbc:0.2 - https://github.com/yandex/clickhouse-jdbc) (The Apache Software License, Version 2.0) elasticsearch-cli (org.elasticsearch:elasticsearch-cli:7.5.1 - https://github.com/elastic/elasticsearch) (The Apache Software License, Version 2.0) elasticsearch-core (org.elasticsearch:elasticsearch-core:6.3.1 - https://github.com/elastic/elasticsearch) From be69c598614518fa6df0658cf4eab4aab80a6e56 Mon Sep 17 00:00:00 2001 From: Hisoka Date: Mon, 27 Jun 2022 17:59:29 +0800 Subject: [PATCH 17/21] fix clickhouse override error --- .../seatunnel/clickhouse/sink/client/ClickhouseSink.java | 5 ----- .../seatunnel/clickhouse/sink/file/ClickhouseFileSink.java | 5 ----- .../clickhouse/source/ClickhouseSourceSplitEnumerator.java | 1 + 3 files changed, 1 insertion(+), 10 deletions(-) diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSink.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSink.java index fb229a07750..295547c74d6 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSink.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSink.java @@ -165,11 +165,6 @@ public SeaTunnelDataType getConsumedType() { return this.option.getSeaTunnelRowType(); } - @Override - public SeaTunnelContext getSeaTunnelContext() { - return seaTunnelContext; - } - @Override public void setSeaTunnelContext(SeaTunnelContext seaTunnelContext) { this.seaTunnelContext = seaTunnelContext; diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSink.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSink.java index f56598b44dc..05c5112920a 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSink.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/ClickhouseFileSink.java @@ -138,11 +138,6 @@ public SinkWriter createWriter( return new ClickhouseFileSinkWriter(readerOption, context); } - @Override - public SeaTunnelContext getSeaTunnelContext() { - return seaTunnelContext; - } - @Override public void setSeaTunnelContext(SeaTunnelContext seaTunnelContext) { this.seaTunnelContext = seaTunnelContext; diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/source/ClickhouseSourceSplitEnumerator.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/source/ClickhouseSourceSplitEnumerator.java index ce07b3412bd..66d0621df89 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/source/ClickhouseSourceSplitEnumerator.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/source/ClickhouseSourceSplitEnumerator.java @@ -33,6 +33,7 @@ public class ClickhouseSourceSplitEnumerator implements private final Set readers; private volatile int assigned = -1; + // TODO support read distributed engine use multi split ClickhouseSourceSplitEnumerator(Context enumeratorContext) { this.context = enumeratorContext; this.readers = new HashSet<>(); From 06e196a34dfce09432354fdbf9b9c5c50f4a1056 Mon Sep 17 00:00:00 2001 From: Hisoka Date: Mon, 27 Jun 2022 18:11:53 +0800 Subject: [PATCH 18/21] fix clickhouse override error --- .../seatunnel/clickhouse/source/ClickhouseSource.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/source/ClickhouseSource.java b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/source/ClickhouseSource.java index f530838e84f..a94344e72cd 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/source/ClickhouseSource.java +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/source/ClickhouseSource.java @@ -18,7 +18,7 @@ package org.apache.seatunnel.connectors.seatunnel.clickhouse.source; import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.DATABASE; -import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.NODE_ADDRESS; +import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.HOST; import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.PASSWORD; import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.SQL; import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.USERNAME; @@ -65,11 +65,11 @@ public String getPluginName() { @Override public void prepare(Config config) throws PrepareFailException { - CheckResult result = CheckConfigUtil.checkAllExists(config, NODE_ADDRESS, DATABASE, SQL, USERNAME, PASSWORD); + CheckResult result = CheckConfigUtil.checkAllExists(config, HOST, DATABASE, SQL, USERNAME, PASSWORD); if (!result.isSuccess()) { throw new PrepareFailException(getPluginName(), PluginType.SOURCE, result.getMsg()); } - servers = ClickhouseUtil.createNodes(config.getString(NODE_ADDRESS), config.getString(DATABASE), + servers = ClickhouseUtil.createNodes(config.getString(HOST), config.getString(DATABASE), config.getString(USERNAME), config.getString(PASSWORD)); sql = config.getString(SQL); From 6fc2e96f760d7b167528b90136dd182586248f68 Mon Sep 17 00:00:00 2001 From: Hisoka Date: Tue, 28 Jun 2022 11:06:47 +0800 Subject: [PATCH 19/21] add httpmime license --- seatunnel-dist/release-docs/LICENSE | 2 ++ 1 file changed, 2 insertions(+) diff --git a/seatunnel-dist/release-docs/LICENSE b/seatunnel-dist/release-docs/LICENSE index da4a6093cad..a2d448a4a0a 100644 --- a/seatunnel-dist/release-docs/LICENSE +++ b/seatunnel-dist/release-docs/LICENSE @@ -421,6 +421,7 @@ The text of each license is the standard Apache 2.0 license. (Apache License, Version 2.0) Apache HttpClient (org.apache.httpcomponents:httpclient:4.5.6 - http://hc.apache.org/httpcomponents-client) (Apache License, Version 2.0) Apache HttpClient (org.apache.httpcomponents:httpclient:4.5.9 - http://hc.apache.org/httpcomponents-client) (Apache License, Version 2.0) Apache HttpClient Mime (org.apache.httpcomponents:httpmime:4.5.2 - http://hc.apache.org/httpcomponents-client) + (Apache License, Version 2.0) Apache HttpClient Mime (org.apache.httpcomponents:httpmime:4.5.13 - http://hc.apache.org/httpcomponents-client) (Apache License, Version 2.0) Apache HttpCore (org.apache.httpcomponents:httpcore:4.4.10 - http://hc.apache.org/httpcomponents-core-ga) (Apache License, Version 2.0) Apache HttpCore (org.apache.httpcomponents:httpcore:4.4.11 - http://hc.apache.org/httpcomponents-core-ga) (Apache License, Version 2.0) Apache HttpCore (org.apache.httpcomponents:httpcore:4.4.12 - http://hc.apache.org/httpcomponents-core-ga) @@ -760,6 +761,7 @@ The text of each license is the standard Apache 2.0 license. (The Apache Software License, Version 2.0) Google HTTP Client Library for Java (com.google.http-client:google-http-client:1.26.0 - https://github.com/googleapis/google-http-java-client/google-http-client) (The Apache Software License, Version 2.0) Google OAuth Client Library for Java (com.google.oauth-client:google-oauth-client:1.26.0 - https://github.com/googleapis/google-oauth-java-client/google-oauth-client) (The Apache Software License, Version 2.0) Gson (com.google.code.gson:gson:2.2.4 - http://code.google.com/p/google-gson/) + (The Apache Software License, Version 2.0) Gson (com.google.code.gson:gson:2.9.0 - http://code.google.com/p/google-gson/) (The Apache Software License, Version 2.0) Guava: Google Core Libraries for Java (com.google.guava:guava:19.0 - https://github.com/google/guava/guava) (The Apache Software License, Version 2.0) HPPC Collections (com.carrotsearch:hppc:0.7.1 - http://labs.carrotsearch.com/hppc.html/hppc) (The Apache Software License, Version 2.0) HPPC Collections (com.carrotsearch:hppc:0.7.2 - http://labs.carrotsearch.com/hppc.html/hppc) From 6e4b89198c956d9496c7f2ebebedcfc1d2e86220 Mon Sep 17 00:00:00 2001 From: Hisoka Date: Tue, 28 Jun 2022 11:17:28 +0800 Subject: [PATCH 20/21] update NOTICE --- seatunnel-dist/release-docs/NOTICE | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/seatunnel-dist/release-docs/NOTICE b/seatunnel-dist/release-docs/NOTICE index 4f0186eb76e..2e6d4eb9a0d 100644 --- a/seatunnel-dist/release-docs/NOTICE +++ b/seatunnel-dist/release-docs/NOTICE @@ -4377,4 +4377,28 @@ Copyright 2017-2021 The Apache Software Foundation This product includes software developed at The Apache Software Foundation (http://www.apache.org/). +========================================================================= + +Apache Commons IO NOTICE + +========================================================================= + +Apache Commons IO +Copyright 2002-2020 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (https://www.apache.org/). + +========================================================================= + +Apache HttpClient Mime NOTICE + +========================================================================= + +Apache HttpClient Mime +Copyright 1999-2020 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + ========================================================================= \ No newline at end of file From 8fea8319c1fd71d63a2b6d80f8f408ee1938236d Mon Sep 17 00:00:00 2001 From: Hisoka Date: Tue, 28 Jun 2022 15:26:00 +0800 Subject: [PATCH 21/21] remove unused commons-io --- pom.xml | 7 ------- .../seatunnel-connector-seatunnel-clickhouse/pom.xml | 11 ++++++----- seatunnel-dist/release-docs/LICENSE | 1 - seatunnel-dist/release-docs/NOTICE | 12 ------------ tools/dependencies/known-dependencies.txt | 4 +++- 5 files changed, 9 insertions(+), 26 deletions(-) diff --git a/pom.xml b/pom.xml index fd4beaba915..fcf707a46c9 100644 --- a/pom.xml +++ b/pom.xml @@ -140,7 +140,6 @@ 2.2.0 2.6.0 3.4 - 2.8.0 3.3.0 provided provided @@ -436,12 +435,6 @@ ${commons-lang3.version} - - commons-io - commons-io - ${commons-io.version} - - org.apache.flink flink-csv diff --git a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/pom.xml b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/pom.xml index 6f3ebbb598f..38c2f6db4d9 100644 --- a/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/pom.xml +++ b/seatunnel-connectors/seatunnel-connectors-seatunnel/seatunnel-connector-seatunnel-clickhouse/pom.xml @@ -46,11 +46,6 @@ commons-lang3 - - commons-io - commons-io - - com.clickhouse @@ -58,6 +53,12 @@ 0.3.2-patch9 + + commons-io + commons-io + 2.11.0 + + com.clickhouse clickhouse-jdbc diff --git a/seatunnel-dist/release-docs/LICENSE b/seatunnel-dist/release-docs/LICENSE index a2d448a4a0a..dd1a4432c22 100644 --- a/seatunnel-dist/release-docs/LICENSE +++ b/seatunnel-dist/release-docs/LICENSE @@ -354,7 +354,6 @@ The text of each license is the standard Apache 2.0 license. (Apache License, Version 2.0) Apache Commons Email (org.apache.commons:commons-email:1.5 - http://commons.apache.org/proper/commons-email/) (Apache License, Version 2.0) Apache Commons IO (commons-io:commons-io:2.11.0 - https://commons.apache.org/proper/commons-io/) (Apache License, Version 2.0) Apache Commons IO (commons-io:commons-io:2.5 - http://commons.apache.org/proper/commons-io/) - (Apache License, Version 2.0) Apache Commons IO (commons-io:commons-io:2.8.0 - https://commons.apache.org/proper/commons-io/) (Apache License, Version 2.0) Apache Commons Lang (org.apache.commons:commons-lang3:3.4 - http://commons.apache.org/proper/commons-lang/) (Apache License, Version 2.0) Apache Commons Lang (org.apache.commons:commons-lang3:3.5 - http://commons.apache.org/proper/commons-lang/) (Apache License, Version 2.0) Apache Commons Lang (org.apache.commons:commons-lang3:3.6 - http://commons.apache.org/proper/commons-lang/) diff --git a/seatunnel-dist/release-docs/NOTICE b/seatunnel-dist/release-docs/NOTICE index 2e6d4eb9a0d..fe3c949f619 100644 --- a/seatunnel-dist/release-docs/NOTICE +++ b/seatunnel-dist/release-docs/NOTICE @@ -4379,18 +4379,6 @@ The Apache Software Foundation (http://www.apache.org/). ========================================================================= -Apache Commons IO NOTICE - -========================================================================= - -Apache Commons IO -Copyright 2002-2020 The Apache Software Foundation - -This product includes software developed at -The Apache Software Foundation (https://www.apache.org/). - -========================================================================= - Apache HttpClient Mime NOTICE ========================================================================= diff --git a/tools/dependencies/known-dependencies.txt b/tools/dependencies/known-dependencies.txt index ed0504ca3dd..5ad2f9439f5 100755 --- a/tools/dependencies/known-dependencies.txt +++ b/tools/dependencies/known-dependencies.txt @@ -94,7 +94,9 @@ commons-daemon-1.0.13.jar commons-dbcp2-2.0.1.jar commons-digester-1.8.1.jar commons-email-1.5.jar -commons-io-2.8.0.jar +commons-io-2.11.0.jar +commons-io-2.4.jar +commons-io-2.5.jar commons-lang-2.6.jar commons-lang3-3.4.jar commons-logging-1.1.3.jar