diff --git a/docs/en/connector-v2/sink/Kudu.md b/docs/en/connector-v2/sink/Kudu.md
index 9a024ba4a41..b6e4eee24c1 100644
--- a/docs/en/connector-v2/sink/Kudu.md
+++ b/docs/en/connector-v2/sink/Kudu.md
@@ -2,51 +2,125 @@
> Kudu sink connector
-## Description
+## Support Kudu Version
-Write data to Kudu.
+- 1.11.1/1.12.0/1.13.0/1.14.0/1.15.0
-The tested kudu version is 1.11.1.
+## Support Those Engines
+
+> Spark
+> Flink
+> SeaTunnel Zeta
## Key features
- [ ] [exactly-once](../../concept/connector-v2-features.md)
-
-## Options
-
-| name | type | required | default value |
-|----------------|--------|----------|---------------|
-| kudu_master | string | yes | - |
-| kudu_table | string | yes | - |
-| save_mode | string | yes | - |
-| common-options | | no | - |
-
-### kudu_master [string]
-
-`kudu_master` The address of kudu master,such as '192.168.88.110:7051'.
-
-### kudu_table [string]
-
-`kudu_table` The name of kudu table..
-
-### save_mode [string]
-
-Storage mode, we need support `overwrite` and `append`. `append` is now supported.
-
-### common options
-
-Sink plugin common parameters, please refer to [Sink Common Options](common-options.md) for details.
-
-## Example
-
-```bash
-
- kudu {
- kudu_master = "192.168.88.110:7051"
- kudu_table = "studentlyhresultflink"
- save_mode="append"
- }
-
+- [x] [cdc](../../concept/connector-v2-features.md)
+
+## Data Type Mapping
+
+| SeaTunnel Data type | kudu Data type |
+|---------------------|--------------------------|
+| BOOLEAN | BOOL |
+| INT | INT8
INT16
INT32 |
+| BIGINT | INT64 |
+| DECIMAL | DECIMAL |
+| FLOAT | FLOAT |
+| DOUBLE | DOUBLE |
+| STRING | STRING |
+| TIMESTAMP | UNIXTIME_MICROS |
+| BYTES | BINARY |
+
+## Sink Options
+
+| Name | Type | Required | Default | Description |
+|-------------------------------------------|--------|----------|------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------|
+| kudu_masters | String | Yes | - | Kudu master address. Separated by ',',such as '192.168.88.110:7051'. |
+| table_name | String | Yes | - | The name of kudu table. |
+| client_worker_count | Int | No | 2 * Runtime.getRuntime().availableProcessors() | Kudu worker count. Default value is twice the current number of cpu cores. |
+| client_default_operation_timeout_ms | Long | No | 30000 | Kudu normal operation time out. |
+| client_default_admin_operation_timeout_ms | Long | No | 30000 | Kudu admin operation time out. |
+| enable_kerberos | Bool | No | false | Kerberos principal enable. |
+| kerberos_principal | String | No | - | Kerberos principal. Note that all zeta nodes require have this file. |
+| kerberos_keytab | String | No | - | Kerberos keytab. Note that all zeta nodes require have this file. |
+| kerberos_krb5conf | String | No | - | Kerberos krb5 conf. Note that all zeta nodes require have this file. |
+| save_mode | String | No | - | Storage mode, support `overwrite` and `append`. |
+| session_flush_mode | String | No | AUTO_FLUSH_SYNC | Kudu flush mode. Default AUTO_FLUSH_SYNC. |
+| batch_size | Int | No | 1024 | The flush max size (includes all append, upsert and delete records), over this number of records, will flush data. The default value is 100 |
+| buffer_flush_interval | Int | No | 10000 | The flush interval mills, over this time, asynchronous threads will flush data. |
+| ignore_not_found | Bool | No | false | If true, ignore all not found rows. |
+| ignore_not_duplicate | Bool | No | false | If true, ignore all dulicate rows. |
+| common-options | | No | - | Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details. |
+
+## Task Example
+
+### Simple:
+
+> The following example refers to a FakeSource named "kudu" cdc write kudu table "kudu_sink_table"
+
+```hocon
+
+env {
+ execution.parallelism = 1
+ job.mode = "BATCH"
+}
+ source {
+ FakeSource {
+ result_table_name = "kudu"
+ schema = {
+ fields {
+ id = int
+ val_bool = boolean
+ val_int8 = tinyint
+ val_int16 = smallint
+ val_int32 = int
+ val_int64 = bigint
+ val_float = float
+ val_double = double
+ val_decimal = "decimal(16, 1)"
+ val_string = string
+ val_unixtime_micros = timestamp
+ }
+ }
+ rows = [
+ {
+ kind = INSERT
+ fields = [1, true, 1, 2, 3, 4, 4.3,5.3,6.3, "NEW", "2020-02-02T02:02:02"]
+ },
+ {
+ kind = INSERT
+ fields = [2, true, 1, 2, 3, 4, 4.3,5.3,6.3, "NEW", "2020-02-02T02:02:02"]
+ },
+ {
+ kind = INSERT
+ fields = [3, true, 1, 2, 3, 4, 4.3,5.3,6.3, "NEW", "2020-02-02T02:02:02"]
+ },
+ {
+ kind = UPDATE_BEFORE
+ fields = [1, true, 1, 2, 3, 4, 4.3,5.3,6.3, "NEW", "2020-02-02T02:02:02"]
+ },
+ {
+ kind = UPDATE_AFTER
+ fields = [1, true, 2, 2, 3, 4, 4.3,5.3,6.3, "NEW", "2020-02-02T02:02:02"]
+ },
+ {
+ kind = DELETE
+ fields = [2, true, 1, 2, 3, 4, 4.3,5.3,6.3, "NEW", "2020-02-02T02:02:02"]
+ }
+ ]
+ }
+ }
+
+sink {
+ kudu{
+ source_table_name = "kudu"
+ kudu_masters = "kudu-master-cdc:7051"
+ table_name = "kudu_sink_table"
+ enable_kerberos = true
+ kerberos_principal = "xx@xx.COM"
+ kerberos_keytab = "xx.keytab"
+ }
+}
```
## Changelog
diff --git a/docs/en/connector-v2/source/Kudu.md b/docs/en/connector-v2/source/Kudu.md
index 0fc39b82f79..f3953e98ae6 100644
--- a/docs/en/connector-v2/source/Kudu.md
+++ b/docs/en/connector-v2/source/Kudu.md
@@ -2,58 +2,105 @@
> Kudu source connector
-## Description
+## Support Kudu Version
-Used to read data from Kudu.
+- 1.11.1/1.12.0/1.13.0/1.14.0/1.15.0
-The tested kudu version is 1.11.1.
+## Support Those Engines
+
+> Spark
+> Flink
+> SeaTunnel Zeta
## Key features
- [x] [batch](../../concept/connector-v2-features.md)
-- [ ] [stream](../../concept/connector-v2-features.md)
- [ ] [exactly-once](../../concept/connector-v2-features.md)
-- [ ] [column projection](../../concept/connector-v2-features.md)
-- [ ] [parallelism](../../concept/connector-v2-features.md)
+- [x] [column projection](../../concept/connector-v2-features.md)
+- [x] [parallelism](../../concept/connector-v2-features.md)
- [ ] [support user-defined split](../../concept/connector-v2-features.md)
-## Options
-
-| name | type | required | default value |
-|----------------|--------|----------|---------------|
-| kudu_master | string | yes | - |
-| kudu_table | string | yes | - |
-| columnsList | string | yes | - |
-| common-options | | no | - |
-
-### kudu_master [string]
-
-`kudu_master` The address of kudu master,such as '192.168.88.110:7051'.
-
-### kudu_table [string]
-
-`kudu_table` The name of kudu table..
-
-### columnsList [string]
-
-`columnsList` Specifies the column names of the table.
+## Description
-### common options
+Used to read data from Kudu.
-Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details.
+The tested kudu version is 1.11.1.
-## Examples
+## Data Type Mapping
+
+| kudu Data type | SeaTunnel Data type |
+|--------------------------|---------------------|
+| BOOL | BOOLEAN |
+| INT8
INT16
INT32 | INT |
+| INT64 | BIGINT |
+| DECIMAL | DECIMAL |
+| FLOAT | FLOAT |
+| DOUBLE | DOUBLE |
+| STRING | STRING |
+| UNIXTIME_MICROS | TIMESTAMP |
+| BINARY | BYTES |
+
+## Source Options
+
+| Name | Type | Required | Default | Description |
+|-------------------------------------------|--------|----------|------------------------------------------------|----------------------------------------------------------------------------------------------------------|
+| kudu_masters | String | Yes | - | Kudu master address. Separated by ',',such as '192.168.88.110:7051'. |
+| table_name | String | Yes | - | The name of kudu table. |
+| client_worker_count | Int | No | 2 * Runtime.getRuntime().availableProcessors() | Kudu worker count. Default value is twice the current number of cpu cores. |
+| client_default_operation_timeout_ms | Long | No | 30000 | Kudu normal operation time out. |
+| client_default_admin_operation_timeout_ms | Long | No | 30000 | Kudu admin operation time out. |
+| enable_kerberos | Bool | No | false | Kerberos principal enable. |
+| kerberos_principal | String | No | - | Kerberos principal. Note that all zeta nodes require have this file. |
+| kerberos_keytab | String | No | - | Kerberos keytab. Note that all zeta nodes require have this file. |
+| kerberos_krb5conf | String | No | - | Kerberos krb5 conf. Note that all zeta nodes require have this file. |
+| scan_token_query_timeout | Long | No | 30000 | The timeout for connecting scan token. If not set, it will be the same as operationTimeout. |
+| scan_token_batch_size_bytes | Int | No | 1024 * 1024 | Kudu scan bytes. The maximum number of bytes read at a time, the default is 1MB. |
+| filter | Int | No | 1024 * 1024 | Kudu scan filter expressions,Not supported yet. |
+| schema | Map | No | 1024 * 1024 | SeaTunnel Schema. |
+| common-options | | No | - | Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details. |
+
+## Task Example
+
+### Simple:
+
+> The following example is for a Kudu table named "kudu_source_table", The goal is to print the data from this table on the console and write kudu table "kudu_sink_table"
```hocon
+# Defining the runtime environment
+env {
+ # You can set flink configuration here
+ execution.parallelism = 2
+ job.mode = "BATCH"
+}
+
source {
- Kudu {
- result_table_name = "studentlyh2"
- kudu_master = "192.168.88.110:7051"
- kudu_table = "studentlyh2"
- columnsList = "id,name,age,sex"
- }
+ # This is a example source plugin **only for test and demonstrate the feature source plugin**
+ kudu{
+ kudu_masters = "kudu-master:7051"
+ table_name = "kudu_source_table"
+ result_table_name = "kudu"
+ enable_kerberos = true
+ kerberos_principal = "xx@xx.COM"
+ kerberos_keytab = "xx.keytab"
+}
+}
+transform {
}
+
+sink {
+ console {
+ source_table_name = "kudu"
+ }
+
+ kudu{
+ source_table_name = "kudu"
+ kudu_masters = "kudu-master:7051"
+ table_name = "kudu_sink_table"
+ enable_kerberos = true
+ kerberos_principal = "xx@xx.COM"
+ kerberos_keytab = "xx.keytab"
+ }
```
## Changelog
diff --git a/seatunnel-connectors-v2/connector-kudu/pom.xml b/seatunnel-connectors-v2/connector-kudu/pom.xml
index a4f58a3f6c5..9dcdc87853a 100644
--- a/seatunnel-connectors-v2/connector-kudu/pom.xml
+++ b/seatunnel-connectors-v2/connector-kudu/pom.xml
@@ -56,5 +56,19 @@
connector-common
${project.version}
+
+
+ org.apache.seatunnel
+ seatunnel-hadoop3-3.1.4-uber
+ ${project.version}
+ optional
+ provided
+
+
+ org.apache.avro
+ avro
+
+
+
diff --git a/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/catalog/KuduCatalog.java b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/catalog/KuduCatalog.java
new file mode 100644
index 00000000000..0fa40d8b0e0
--- /dev/null
+++ b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/catalog/KuduCatalog.java
@@ -0,0 +1,236 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.kudu.catalog;
+
+import org.apache.seatunnel.api.table.catalog.Catalog;
+import org.apache.seatunnel.api.table.catalog.CatalogTable;
+import org.apache.seatunnel.api.table.catalog.PhysicalColumn;
+import org.apache.seatunnel.api.table.catalog.PrimaryKey;
+import org.apache.seatunnel.api.table.catalog.TableIdentifier;
+import org.apache.seatunnel.api.table.catalog.TablePath;
+import org.apache.seatunnel.api.table.catalog.TableSchema;
+import org.apache.seatunnel.api.table.catalog.exception.CatalogException;
+import org.apache.seatunnel.api.table.catalog.exception.DatabaseAlreadyExistException;
+import org.apache.seatunnel.api.table.catalog.exception.DatabaseNotExistException;
+import org.apache.seatunnel.api.table.catalog.exception.TableAlreadyExistException;
+import org.apache.seatunnel.api.table.catalog.exception.TableNotExistException;
+import org.apache.seatunnel.api.table.type.SeaTunnelDataType;
+import org.apache.seatunnel.connectors.seatunnel.kudu.config.CommonConfig;
+import org.apache.seatunnel.connectors.seatunnel.kudu.kuduclient.KuduTypeMapper;
+import org.apache.seatunnel.connectors.seatunnel.kudu.util.KuduUtil;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.kudu.ColumnSchema;
+import org.apache.kudu.Schema;
+import org.apache.kudu.client.KuduClient;
+import org.apache.kudu.client.KuduException;
+import org.apache.kudu.client.KuduTable;
+import org.apache.kudu.shaded.com.google.common.collect.Lists;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.stream.Collectors;
+
+import static org.apache.seatunnel.connectors.seatunnel.kudu.config.CommonConfig.ADMIN_OPERATION_TIMEOUT;
+import static org.apache.seatunnel.connectors.seatunnel.kudu.config.CommonConfig.ENABLE_KERBEROS;
+import static org.apache.seatunnel.connectors.seatunnel.kudu.config.CommonConfig.KERBEROS_KEYTAB;
+import static org.apache.seatunnel.connectors.seatunnel.kudu.config.CommonConfig.KERBEROS_KRB5_CONF;
+import static org.apache.seatunnel.connectors.seatunnel.kudu.config.CommonConfig.KERBEROS_PRINCIPAL;
+import static org.apache.seatunnel.connectors.seatunnel.kudu.config.CommonConfig.MASTER;
+import static org.apache.seatunnel.connectors.seatunnel.kudu.config.CommonConfig.OPERATION_TIMEOUT;
+import static org.apache.seatunnel.connectors.seatunnel.kudu.config.CommonConfig.TABLE_NAME;
+import static org.apache.seatunnel.connectors.seatunnel.kudu.config.CommonConfig.WORKER_COUNT;
+import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkNotNull;
+
+public class KuduCatalog implements Catalog {
+
+ private final CommonConfig config;
+
+ private KuduClient kuduClient;
+
+ private final String defaultDatabase = "default_database";
+
+ private final String catalogName;
+
+ public KuduCatalog(String catalogName, CommonConfig config) {
+ this.config = config;
+ this.catalogName = catalogName;
+ }
+
+ @Override
+ public void open() throws CatalogException {
+ kuduClient = KuduUtil.getKuduClient(config);
+ }
+
+ @Override
+ public void close() throws CatalogException {
+ try {
+ kuduClient.close();
+ } catch (KuduException e) {
+ throw new CatalogException("Failed close kudu client", e);
+ }
+ }
+
+ @Override
+ public String getDefaultDatabase() throws CatalogException {
+ return defaultDatabase;
+ }
+
+ @Override
+ public boolean databaseExists(String databaseName) throws CatalogException {
+ return listDatabases().contains(databaseName);
+ }
+
+ @Override
+ public List listDatabases() throws CatalogException {
+ return Lists.newArrayList(getDefaultDatabase());
+ }
+
+ @Override
+ public List listTables(String databaseName)
+ throws CatalogException, DatabaseNotExistException {
+ try {
+ return kuduClient.getTablesList().getTablesList();
+ } catch (KuduException e) {
+ throw new CatalogException(
+ String.format("Failed listing database in catalog %s", this.catalogName), e);
+ }
+ }
+
+ @Override
+ public boolean tableExists(TablePath tablePath) throws CatalogException {
+ checkNotNull(tablePath);
+ try {
+ return kuduClient.tableExists(tablePath.getFullName());
+ } catch (KuduException e) {
+ throw new CatalogException(e);
+ }
+ }
+
+ @Override
+ public CatalogTable getTable(TablePath tablePath)
+ throws CatalogException, TableNotExistException {
+ checkNotNull(tablePath);
+
+ if (!tableExists(tablePath)) {
+ throw new TableNotExistException(catalogName, tablePath);
+ }
+
+ String tableName = tablePath.getFullName();
+
+ try {
+ KuduTable kuduTable = kuduClient.openTable(tableName);
+ TableSchema.Builder builder = TableSchema.builder();
+ Schema schema = kuduTable.getSchema();
+ kuduTable.getPartitionSchema();
+ List columnSchemaList = schema.getColumns();
+ Optional primaryKey = getPrimaryKey(schema.getPrimaryKeyColumns());
+ for (int i = 0; i < columnSchemaList.size(); i++) {
+ SeaTunnelDataType> type = KuduTypeMapper.mapping(columnSchemaList, i);
+ builder.column(
+ PhysicalColumn.of(
+ columnSchemaList.get(i).getName(),
+ type,
+ columnSchemaList.get(i).getTypeSize(),
+ columnSchemaList.get(i).isNullable(),
+ columnSchemaList.get(i).getDefaultValue(),
+ columnSchemaList.get(i).getComment()));
+ }
+
+ primaryKey.ifPresent(builder::primaryKey);
+
+ TableIdentifier tableIdentifier =
+ TableIdentifier.of(
+ catalogName, tablePath.getDatabaseName(), tablePath.getTableName());
+
+ return CatalogTable.of(
+ tableIdentifier,
+ builder.build(),
+ buildConnectorOptions(tablePath),
+ Collections.emptyList(),
+ tableName);
+ } catch (Exception e) {
+ throw new CatalogException("An exception occurred while obtaining the table", e);
+ }
+ }
+
+ private Map buildConnectorOptions(TablePath tablePath) {
+ Map options = new HashMap<>(8);
+ options.put("connector", "kudu");
+ options.put(TABLE_NAME.key(), tablePath.getFullName());
+ options.put(MASTER.key(), config.getMasters());
+ options.put(WORKER_COUNT.key(), config.getWorkerCount().toString());
+ options.put(OPERATION_TIMEOUT.key(), config.getOperationTimeout().toString());
+ options.put(ADMIN_OPERATION_TIMEOUT.key(), config.getAdminOperationTimeout().toString());
+ if (config.getEnableKerberos()) {
+ options.put(KERBEROS_PRINCIPAL.key(), config.getPrincipal());
+ options.put(KERBEROS_KEYTAB.key(), config.getKeytab());
+ if (StringUtils.isNotBlank(config.getKrb5conf())) {
+ options.put(KERBEROS_KRB5_CONF.key(), config.getKrb5conf());
+ }
+ }
+ options.put(ENABLE_KERBEROS.key(), config.getEnableKerberos().toString());
+ return options;
+ }
+
+ @Override
+ public void createTable(TablePath tablePath, CatalogTable table, boolean ignoreIfExists)
+ throws TableAlreadyExistException, DatabaseNotExistException, CatalogException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void dropTable(TablePath tablePath, boolean ignoreIfNotExists)
+ throws TableNotExistException, CatalogException {
+ String tableName = tablePath.getFullName();
+ try {
+ if (tableExists(tablePath)) {
+ kuduClient.deleteTable(tableName);
+ } else if (!ignoreIfNotExists) {
+ throw new TableNotExistException(catalogName, tablePath);
+ }
+ } catch (KuduException e) {
+ throw new CatalogException("Could not delete table " + tableName, e);
+ }
+ }
+
+ @Override
+ public void createDatabase(TablePath tablePath, boolean ignoreIfExists)
+ throws DatabaseAlreadyExistException, CatalogException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void dropDatabase(TablePath tablePath, boolean ignoreIfNotExists)
+ throws DatabaseNotExistException, CatalogException {
+ throw new UnsupportedOperationException();
+ }
+
+ protected Optional getPrimaryKey(List columnSchemaList) {
+ List pkFields =
+ columnSchemaList.stream().map(ColumnSchema::getName).collect(Collectors.toList());
+ if (!pkFields.isEmpty()) {
+ String pkName = "pk_" + String.join("_", pkFields);
+ return Optional.of(PrimaryKey.of(pkName, pkFields));
+ }
+ return Optional.empty();
+ }
+}
diff --git a/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/catalog/KuduCatalogFactory.java b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/catalog/KuduCatalogFactory.java
new file mode 100644
index 00000000000..6ebf783604a
--- /dev/null
+++ b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/catalog/KuduCatalogFactory.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.kudu.catalog;
+
+import org.apache.seatunnel.api.configuration.ReadonlyConfig;
+import org.apache.seatunnel.api.configuration.util.OptionRule;
+import org.apache.seatunnel.api.table.catalog.Catalog;
+import org.apache.seatunnel.api.table.factory.CatalogFactory;
+import org.apache.seatunnel.api.table.factory.Factory;
+import org.apache.seatunnel.connectors.seatunnel.kudu.config.CommonConfig;
+
+import com.google.auto.service.AutoService;
+
+import static org.apache.seatunnel.connectors.seatunnel.kudu.config.CommonConfig.ADMIN_OPERATION_TIMEOUT;
+import static org.apache.seatunnel.connectors.seatunnel.kudu.config.CommonConfig.ENABLE_KERBEROS;
+import static org.apache.seatunnel.connectors.seatunnel.kudu.config.CommonConfig.KERBEROS_KEYTAB;
+import static org.apache.seatunnel.connectors.seatunnel.kudu.config.CommonConfig.KERBEROS_KRB5_CONF;
+import static org.apache.seatunnel.connectors.seatunnel.kudu.config.CommonConfig.KERBEROS_PRINCIPAL;
+import static org.apache.seatunnel.connectors.seatunnel.kudu.config.CommonConfig.MASTER;
+import static org.apache.seatunnel.connectors.seatunnel.kudu.config.CommonConfig.OPERATION_TIMEOUT;
+import static org.apache.seatunnel.connectors.seatunnel.kudu.config.CommonConfig.WORKER_COUNT;
+
+@AutoService(Factory.class)
+public class KuduCatalogFactory implements CatalogFactory {
+
+ public static final String IDENTIFIER = "Kudu";
+
+ @Override
+ public Catalog createCatalog(String catalogName, ReadonlyConfig options) {
+ CommonConfig config = new CommonConfig(options);
+ KuduCatalog kuduCatalog = new KuduCatalog(catalogName, config);
+ return kuduCatalog;
+ }
+
+ @Override
+ public String factoryIdentifier() {
+ return IDENTIFIER;
+ }
+
+ @Override
+ public OptionRule optionRule() {
+ return OptionRule.builder()
+ .required(MASTER)
+ .optional(WORKER_COUNT)
+ .optional(OPERATION_TIMEOUT)
+ .optional(ADMIN_OPERATION_TIMEOUT)
+ .optional(KERBEROS_KRB5_CONF)
+ .conditional(ENABLE_KERBEROS, true, KERBEROS_PRINCIPAL, KERBEROS_KEYTAB)
+ .build();
+ }
+}
diff --git a/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/config/CommonConfig.java b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/config/CommonConfig.java
new file mode 100644
index 00000000000..b01cfbdc2bc
--- /dev/null
+++ b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/config/CommonConfig.java
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.kudu.config;
+
+import org.apache.seatunnel.api.configuration.Option;
+import org.apache.seatunnel.api.configuration.Options;
+import org.apache.seatunnel.api.configuration.ReadonlyConfig;
+
+import org.apache.kudu.client.AsyncKuduClient;
+
+import lombok.Getter;
+import lombok.ToString;
+
+import java.io.Serializable;
+
+@Getter
+@ToString
+public class CommonConfig implements Serializable {
+
+ public static final Option MASTER =
+ Options.key("kudu_masters")
+ .stringType()
+ .noDefaultValue()
+ .withDescription("Kudu master address. Separated by ','");
+
+ public static final Option TABLE_NAME =
+ Options.key("table_name")
+ .stringType()
+ .noDefaultValue()
+ .withDescription("Kudu table name");
+
+ public static final Option WORKER_COUNT =
+ Options.key("client_worker_count")
+ .intType()
+ .defaultValue(2 * Runtime.getRuntime().availableProcessors())
+ .withDescription(
+ "Kudu worker count. Default value is twice the current number of cpu cores");
+
+ public static final Option OPERATION_TIMEOUT =
+ Options.key("client_default_operation_timeout_ms")
+ .longType()
+ .defaultValue(AsyncKuduClient.DEFAULT_OPERATION_TIMEOUT_MS)
+ .withDescription("Kudu normal operation time out");
+
+ public static final Option ADMIN_OPERATION_TIMEOUT =
+ Options.key("client_default_admin_operation_timeout_ms")
+ .longType()
+ .defaultValue(AsyncKuduClient.DEFAULT_OPERATION_TIMEOUT_MS)
+ .withDescription("Kudu admin operation time out");
+
+ public static final Option ENABLE_KERBEROS =
+ Options.key("enable_kerberos")
+ .booleanType()
+ .defaultValue(false)
+ .withDescription("Kerberos principal enable.");
+ public static final Option KERBEROS_PRINCIPAL =
+ Options.key("kerberos_principal")
+ .stringType()
+ .noDefaultValue()
+ .withDescription(
+ "Kerberos principal. Note that all zeta nodes require have this file.");
+
+ public static final Option KERBEROS_KEYTAB =
+ Options.key("kerberos_keytab")
+ .stringType()
+ .noDefaultValue()
+ .withDescription(
+ "Kerberos keytab. Note that all zeta nodes require have this file.");
+
+ public static final Option KERBEROS_KRB5_CONF =
+ Options.key("kerberos_krb5conf")
+ .stringType()
+ .noDefaultValue()
+ .withDescription(
+ "Kerberos krb5 conf. Note that all zeta nodes require have this file.");
+
+ protected String masters;
+
+ protected String table;
+
+ protected Integer workerCount;
+
+ protected Long operationTimeout;
+
+ protected Long adminOperationTimeout;
+
+ protected Boolean enableKerberos;
+ protected String principal;
+ protected String keytab;
+ protected String krb5conf;
+
+ public CommonConfig(ReadonlyConfig config) {
+ this.masters = config.get(MASTER);
+ this.table = config.get(TABLE_NAME);
+ this.workerCount = config.get(WORKER_COUNT);
+ this.operationTimeout = config.get(OPERATION_TIMEOUT);
+ this.adminOperationTimeout = config.get(ADMIN_OPERATION_TIMEOUT);
+ this.enableKerberos = config.get(ENABLE_KERBEROS);
+ this.principal = config.get(KERBEROS_PRINCIPAL);
+ this.keytab = config.get(KERBEROS_KEYTAB);
+ this.krb5conf = config.get(KERBEROS_KEYTAB);
+ }
+}
diff --git a/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/config/KuduSinkConfig.java b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/config/KuduSinkConfig.java
index fbeb2885f37..8db9e3896ed 100644
--- a/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/config/KuduSinkConfig.java
+++ b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/config/KuduSinkConfig.java
@@ -17,46 +17,72 @@
package org.apache.seatunnel.connectors.seatunnel.kudu.config;
-import org.apache.seatunnel.shade.com.typesafe.config.Config;
-
-import org.apache.seatunnel.api.common.SeaTunnelAPIErrorCode;
import org.apache.seatunnel.api.configuration.Option;
import org.apache.seatunnel.api.configuration.Options;
-import org.apache.seatunnel.common.constants.PluginType;
-import org.apache.seatunnel.connectors.seatunnel.kudu.exception.KuduConnectorException;
+import org.apache.seatunnel.api.configuration.ReadonlyConfig;
-import org.apache.commons.lang3.StringUtils;
+import org.apache.kudu.client.SessionConfiguration;
-import lombok.Data;
-import lombok.NonNull;
+import lombok.Getter;
+import lombok.ToString;
-@Data
-public class KuduSinkConfig {
+import java.util.Locale;
- public static final Option KUDU_MASTER =
- Options.key("kudu_master")
- .stringType()
- .noDefaultValue()
- .withDescription("kudu master address");
+@Getter
+@ToString
+public class KuduSinkConfig extends CommonConfig {
- public static final Option KUDU_SAVE_MODE =
+ public static final Option SAVE_MODE =
Options.key("save_mode")
.enumType(SaveMode.class)
- .noDefaultValue()
+ .defaultValue(SaveMode.APPEND)
.withDescription("Storage mode,append is now supported");
- public static final Option KUDU_TABLE_NAME =
- Options.key("kudu_table")
+ public static final Option FLUSH_MODE =
+ Options.key("session_flush_mode")
.stringType()
- .noDefaultValue()
- .withDescription("kudu table name");
+ .defaultValue(SessionConfiguration.FlushMode.AUTO_FLUSH_SYNC.name())
+ .withDescription("Kudu flush mode. Default AUTO_FLUSH_SYNC");
+
+ public static final Option BATCH_SIZE =
+ Options.key("batch_size")
+ .intType()
+ .defaultValue(1024)
+ .withDescription(
+ "the flush max size (includes all append, upsert and delete records), over this number"
+ + " of records, will flush data. The default value is 100.");
+
+ public static final Option BUFFER_FLUSH_INTERVAL =
+ Options.key("buffer_flush_interval")
+ .intType()
+ .defaultValue(10000)
+ .withDescription(
+ "the flush interval mills, over this time, asynchronous threads will flush data. The "
+ + "default value is 1s.");
+
+ public static final Option IGNORE_NOT_FOUND =
+ Options.key("ignore_not_found")
+ .booleanType()
+ .defaultValue(false)
+ .withDescription("if true, ignore all not found rows");
+
+ public static final Option IGNORE_DUPLICATE =
+ Options.key("ignore_not_duplicate")
+ .booleanType()
+ .defaultValue(false)
+ .withDescription("if true, ignore all dulicate rows");
private SaveMode saveMode;
- private String kuduMaster;
+ private SessionConfiguration.FlushMode flushMode;
- /** Specifies the name of the table */
- private String kuduTableName;
+ private int maxBufferSize;
+
+ private int flushInterval;
+
+ private boolean ignoreNotFound;
+
+ private boolean ignoreDuplicate;
public enum SaveMode {
APPEND(),
@@ -71,22 +97,25 @@ public static SaveMode fromStr(String str) {
}
}
- public KuduSinkConfig(@NonNull Config pluginConfig) {
- if (pluginConfig.hasPath(KUDU_SAVE_MODE.key())
- && pluginConfig.hasPath(KUDU_MASTER.key())
- && pluginConfig.hasPath(KUDU_TABLE_NAME.key())) {
- this.saveMode =
- StringUtils.isBlank(pluginConfig.getString(KUDU_SAVE_MODE.key()))
- ? SaveMode.APPEND
- : SaveMode.fromStr(pluginConfig.getString(KUDU_SAVE_MODE.key()));
- this.kuduMaster = pluginConfig.getString(KUDU_MASTER.key());
- this.kuduTableName = pluginConfig.getString(KUDU_TABLE_NAME.key());
- } else {
- throw new KuduConnectorException(
- SeaTunnelAPIErrorCode.CONFIG_VALIDATION_FAILED,
- String.format(
- "PluginName: %s, PluginType: %s, Message: %s",
- "Kudu", PluginType.SINK, "Missing Sink configuration parameters"));
+ public KuduSinkConfig(ReadonlyConfig config) {
+ super(config);
+ this.saveMode = config.get(SAVE_MODE);
+ this.flushMode = fromStrFlushMode(config.get(FLUSH_MODE));
+ this.maxBufferSize = config.get(BATCH_SIZE);
+ this.flushInterval = config.get(BUFFER_FLUSH_INTERVAL);
+ this.ignoreNotFound = config.get(IGNORE_NOT_FOUND);
+ this.ignoreDuplicate = config.get(IGNORE_DUPLICATE);
+ }
+
+ private SessionConfiguration.FlushMode fromStrFlushMode(String flushMode) {
+ switch (flushMode.toUpperCase(Locale.ENGLISH)) {
+ case "MANUAL_FLUSH":
+ return SessionConfiguration.FlushMode.MANUAL_FLUSH;
+ case "AUTO_FLUSH_BACKGROUND":
+ return SessionConfiguration.FlushMode.AUTO_FLUSH_BACKGROUND;
+ case "AUTO_FLUSH_SYNC":
+ default:
+ return SessionConfiguration.FlushMode.AUTO_FLUSH_SYNC;
}
}
}
diff --git a/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/config/KuduSourceConfig.java b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/config/KuduSourceConfig.java
index 5a2c1d20532..bb73b43e88f 100644
--- a/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/config/KuduSourceConfig.java
+++ b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/config/KuduSourceConfig.java
@@ -19,26 +19,47 @@
import org.apache.seatunnel.api.configuration.Option;
import org.apache.seatunnel.api.configuration.Options;
+import org.apache.seatunnel.api.configuration.ReadonlyConfig;
-import java.io.Serializable;
+import org.apache.kudu.client.AsyncKuduClient;
-public class KuduSourceConfig implements Serializable {
+import lombok.Getter;
+import lombok.ToString;
- public static final Option KUDU_MASTER =
- Options.key("kudu_master")
- .stringType()
- .noDefaultValue()
- .withDescription("Kudu master address");
+@Getter
+@ToString
+public class KuduSourceConfig extends CommonConfig {
- public static final Option TABLE_NAME =
- Options.key("kudu_table")
- .stringType()
- .noDefaultValue()
- .withDescription("Kudu table name");
+ public static final Option QUERY_TIMEOUT =
+ Options.key("scan_token_query_timeout")
+ .longType()
+ .defaultValue(AsyncKuduClient.DEFAULT_OPERATION_TIMEOUT_MS)
+ .withDescription(
+ "The timeout for connecting scan token. If not set, it will be the same as operationTimeout");
+
+ public static final Option SCAN_BATCH_SIZE_BYTES =
+ Options.key("scan_token_batch_size_bytes")
+ .intType()
+ .defaultValue(1024 * 1024)
+ .withDescription(
+ "Kudu scan bytes. The maximum number of bytes read at a time, the default is 1MB");
- public static final Option COLUMNS_LIST =
- Options.key("columnsList")
+ public static final Option FILTER =
+ Options.key("filter")
.stringType()
.noDefaultValue()
- .withDescription("Specifies the column names of the table");
+ .withDescription("Kudu scan filter expressions");
+
+ private int batchSizeBytes;
+
+ protected Long queryTimeout;
+
+ private String filter;
+
+ public KuduSourceConfig(ReadonlyConfig config) {
+ super(config);
+ this.batchSizeBytes = config.get(SCAN_BATCH_SIZE_BYTES);
+ this.queryTimeout = config.get(QUERY_TIMEOUT);
+ this.filter = config.get(FILTER);
+ }
}
diff --git a/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/exception/KuduConnectorErrorCode.java b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/exception/KuduConnectorErrorCode.java
index 2457560a0f0..04aaa039954 100644
--- a/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/exception/KuduConnectorErrorCode.java
+++ b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/exception/KuduConnectorErrorCode.java
@@ -23,11 +23,10 @@ public enum KuduConnectorErrorCode implements SeaTunnelErrorCode {
GET_KUDUSCAN_OBJECT_FAILED("KUDU-01", "Get the Kuduscan object for each splice failed"),
CLOSE_KUDU_CLIENT_FAILED("KUDU-02", "Close Kudu client failed"),
DATA_TYPE_CAST_FILED("KUDU-03", "Value type does not match column type"),
- KUDU_UPSERT_FAILED("KUDU-04", "Upsert data to Kudu failed"),
- KUDU_INSERT_FAILED("KUDU-05", "Insert data to Kudu failed"),
- INIT_KUDU_CLIENT_FAILED("KUDU-06", "Initialize the Kudu client failed"),
+ WRITE_DATA_FAILED("KUDU-04", "while sending value to Kudu failed"),
+ INIT_KUDU_CLIENT_FAILED("KUDU-05", "Initialize the Kudu client failed"),
GENERATE_KUDU_PARAMETERS_FAILED(
- "KUDU-07", "Generate Kudu Parameters in the preparation phase failed");
+ "KUDU-06", "Generate Kudu Parameters in the preparation phase failed");
private final String code;
diff --git a/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/kuduclient/KuduInputFormat.java b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/kuduclient/KuduInputFormat.java
index 6dcf33eca13..006a8adb3c9 100644
--- a/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/kuduclient/KuduInputFormat.java
+++ b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/kuduclient/KuduInputFormat.java
@@ -17,177 +17,72 @@
package org.apache.seatunnel.connectors.seatunnel.kudu.kuduclient;
-import org.apache.seatunnel.api.table.type.BasicType;
-import org.apache.seatunnel.api.table.type.DecimalType;
import org.apache.seatunnel.api.table.type.SeaTunnelDataType;
import org.apache.seatunnel.api.table.type.SeaTunnelRow;
import org.apache.seatunnel.api.table.type.SeaTunnelRowType;
-import org.apache.seatunnel.common.constants.PluginType;
-import org.apache.seatunnel.common.exception.CommonErrorCode;
-import org.apache.seatunnel.common.utils.ExceptionUtils;
+import org.apache.seatunnel.connectors.seatunnel.kudu.config.KuduSourceConfig;
import org.apache.seatunnel.connectors.seatunnel.kudu.exception.KuduConnectorErrorCode;
import org.apache.seatunnel.connectors.seatunnel.kudu.exception.KuduConnectorException;
+import org.apache.seatunnel.connectors.seatunnel.kudu.source.KuduSourceSplit;
+import org.apache.seatunnel.connectors.seatunnel.kudu.util.KuduUtil;
-import org.apache.kudu.ColumnSchema;
-import org.apache.kudu.Schema;
import org.apache.kudu.client.KuduClient;
import org.apache.kudu.client.KuduException;
-import org.apache.kudu.client.KuduPredicate;
+import org.apache.kudu.client.KuduScanToken;
import org.apache.kudu.client.KuduScanner;
import org.apache.kudu.client.RowResult;
+import lombok.NonNull;
import lombok.extern.slf4j.Slf4j;
+import java.io.IOException;
import java.io.Serializable;
-import java.math.BigDecimal;
-import java.math.BigInteger;
import java.sql.SQLException;
+import java.sql.Timestamp;
import java.util.ArrayList;
-import java.util.Arrays;
+import java.util.HashSet;
import java.util.List;
+import java.util.Optional;
+import java.util.Set;
+
+import static org.apache.seatunnel.api.table.type.SqlType.TIMESTAMP;
@Slf4j
public class KuduInputFormat implements Serializable {
- public KuduInputFormat(String kuduMaster, String tableName, String columnsList) {
- this.kuduMaster = kuduMaster;
- this.columnsList = Arrays.asList(columnsList.split(","));
- this.tableName = tableName;
- }
+ private final KuduSourceConfig kuduSourceConfig;
+ private final SeaTunnelRowType rowTypeInfo;
/** Declare the global variable KuduClient and use it to manipulate the Kudu table */
public KuduClient kuduClient;
- /** Specify kuduMaster address */
- public String kuduMaster;
-
- public List columnsList;
- public Schema schema;
- public String keyColumn;
- public static final int TIMEOUTMS = 18000;
-
- /** Specifies the name of the table */
- public String tableName;
+ public KuduInputFormat(
+ @NonNull KuduSourceConfig kuduSourceConfig, SeaTunnelRowType rowTypeInfo) {
+ this.kuduSourceConfig = kuduSourceConfig;
+ this.rowTypeInfo = rowTypeInfo;
+ }
- public List getColumnsSchemas() {
- List columns = null;
- try {
- schema = kuduClient.openTable(tableName).getSchema();
- keyColumn = schema.getPrimaryKeyColumns().get(0).getName();
- columns = schema.getColumns();
- } catch (KuduException e) {
- throw new KuduConnectorException(
- CommonErrorCode.TABLE_SCHEMA_GET_FAILED, "get table Columns Schemas Failed");
+ public void openInputFormat() {
+ if (kuduClient == null) {
+ kuduClient = KuduUtil.getKuduClient(kuduSourceConfig);
}
- return columns;
}
- public static SeaTunnelRow getSeaTunnelRowData(RowResult rs, SeaTunnelRowType typeInfo)
- throws SQLException {
-
+ public SeaTunnelRow toInternal(RowResult rs) throws SQLException {
List