Merge branch 'apache:dev' into dev-kudukerberros

NickCodeJourney · Aug 15, 2023 · 7b60c51 · 7b60c51
2 parents 3a48fed + 8d6b07e
commit 7b60c51
Show file tree

Hide file tree

Showing 49 changed files with 713 additions and 878 deletions.
diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml
@@ -22,8 +22,6 @@ on:
     branches:
       - dev
     paths-ignore:
-      - 'docs/**'
-      - '**/*.md'
       - 'seatunnel-ui/**'
 
 concurrency:

diff --git a/DISCLAIMER b/DISCLAIMER
@@ -1,4 +1,4 @@
-Apache SeaTunnel (incubating) is an effort undergoing incubation at The Apache Software Foundation (ASF), sponsored by the Apache Incubator PMC.
+Apache SeaTunnel is an effort undergoing incubation at The Apache Software Foundation (ASF), sponsored by the Apache Incubator PMC.
 Incubation is required of all newly accepted projects until a further review indicates that the infrastructure,
 communications, and decision making process have stabilized in a manner consistent with other successful ASF projects.
 While incubation status is not necessarily a reflection of the completeness or stability of the code,

diff --git a/config/seatunnel.yaml b/config/seatunnel.yaml
@@ -27,8 +27,6 @@ seatunnel:
     checkpoint:
       interval: 10000
       timeout: 60000
-      max-concurrent: 1
-      tolerable-failure: 2
       storage:
         type: hdfs
         max-retained: 3

diff --git a/docs/en/connector-v2/sink/HdfsFile.md b/docs/en/connector-v2/sink/HdfsFile.md
diff --git a/docs/en/connector-v2/sink/Redis.md b/docs/en/connector-v2/sink/Redis.md
@@ -23,6 +23,7 @@ Used to write data to Redis.
 | mode           | string | no                    | single        |
 | nodes          | list   | yes when mode=cluster | -             |
 | format         | string | no                    | json          |
+| expire         | long   | no                    | -1            |
 | common-options |        | no                    | -             |
 
 ### host [string]
@@ -120,6 +121,10 @@ Connector will generate data as the following and write it to redis:
 
 ```
 
+### expire [long]
+
+Set redis expiration time, the unit is second. The default value is -1, keys do not automatically expire by default.
+
 ### common options
 
 Sink plugin common parameters, please refer to [Sink Common Options](common-options.md) for details

diff --git a/docs/en/connector-v2/sink/SelectDB-Cloud.md b/docs/en/connector-v2/sink/SelectDB-Cloud.md
@@ -2,139 +2,169 @@
 
 > SelectDB Cloud sink connector
 
-## Description
+## Support Those Engines
 
-Used to send data to SelectDB Cloud. Both support streaming and batch mode.
-The internal implementation of SelectDB Cloud sink connector upload after batch caching and commit the CopyInto sql to load data into the table.
+> Spark<br/>
+> Flink<br/>
+> SeaTunnel Zeta<br/>
 
-:::tip
-
-Version Supported
-
-* supported  `SelectDB Cloud version is >= 2.2.x`
-
-:::
-
-## Key features
+## Key Features
 
 - [x] [exactly-once](../../concept/connector-v2-features.md)
 - [x] [cdc](../../concept/connector-v2-features.md)
 
-## Options
-
-|        name        |  type  | required |     default value      |
-|--------------------|--------|----------|------------------------|
-| load-url           | string | yes      | -                      |
-| jdbc-url           | string | yes      | -                      |
-| cluster-name       | string | yes      | -                      |
-| username           | string | yes      | -                      |
-| password           | string | yes      | -                      |
-| table.identifier   | string | yes      | -                      |
-| sink.enable-delete | bool   | no       | false                  |
-| selectdb.config    | map    | yes      | -                      |
-| sink.buffer-size   | int    | no       | 10 * 1024 * 1024 (1MB) |
-| sink.buffer-count  | int    | no       | 10000                  |
-| sink.max-retries   | int    | no       | 3                      |
-
-### load-url [string]
-
-`SelectDB Cloud` warehouse http address, the format is `warehouse_ip:http_port`
-
-### jdbc-url [string]
-
-`SelectDB Cloud` warehouse jdbc address, the format is `warehouse_ip:mysql_port`
-
-### cluster-name [string]
-
-`SelectDB Cloud` cluster name
-
-### username [string]
-
-`SelectDB Cloud` user username
-
-### password [string]
-
-`SelectDB Cloud` user password
-
-### table.identifier [string]
-
-The name of `SelectDB Cloud` table, the format is `database.table`
+## Description
 
-### sink.enable-delete [string]
+Used to send data to SelectDB Cloud. Both support streaming and batch mode.
+The internal implementation of SelectDB Cloud sink connector upload after batch caching and commit the CopyInto sql to load data into the table.
 
-Whether to enable deletion. This option requires SelectDB Cloud table to enable batch delete function, and only supports Unique model.
+## Supported DataSource Info
 
-`ALTER TABLE example_db.my_table ENABLE FEATURE "BATCH_DELETE";`
+:::tip
 
-### selectdb.config [map]
+Version Supported
 
-Write property configuration
+* supported  `SelectDB Cloud version is >= 2.2.x`
 
-CSV Write：
+:::
 
-```
-selectdb.config {
-    file.type="csv"
-    file.column_separator=","
-    file.line_delimiter="\n"
+## Sink Options
+
+|        Name        |  Type  | Required |        Default         |                                                                Description                                                                |
+|--------------------|--------|----------|------------------------|-------------------------------------------------------------------------------------------------------------------------------------------|
+| load-url           | String | Yes      | -                      | `SelectDB Cloud` warehouse http address, the format is `warehouse_ip:http_port`                                                           |
+| jdbc-url           | String | Yes      | -                      | `SelectDB Cloud` warehouse jdbc address, the format is `warehouse_ip:mysql_port`                                                          |
+| cluster-name       | String | Yes      | -                      | `SelectDB Cloud` cluster name                                                                                                             |
+| username           | String | Yes      | -                      | `SelectDB Cloud` user username                                                                                                            |
+| password           | String | Yes      | -                      | `SelectDB Cloud` user password                                                                                                            |
+| table.identifier   | String | Yes      | -                      | The name of `SelectDB Cloud` table, the format is `database.table`                                                                        |
+| sink.enable-delete | bool   | No       | false                  | Whether to enable deletion. This option requires SelectDB Cloud table to enable batch delete function, and only supports Unique model.    |
+| sink.max-retries   | int    | No       | 3                      | the max retry times if writing records to database failed                                                                                 |
+| sink.buffer-size   | int    | No       | 10 * 1024 * 1024 (1MB) | the buffer size to cache data for stream load.                                                                                            |
+| sink.buffer-count  | int    | No       | 10000                  | the buffer count to cache data for stream load.                                                                                           |
+| selectdb.config    | map    | yes      | -                      | This option is used to support operations such as `insert`, `delete`, and `update` when automatically generate sql,and supported formats. |
+
+## Data Type Mapping
+
+| SelectDB Cloud Data type |           SeaTunnel Data type           |
+|--------------------------|-----------------------------------------|
+| BOOLEAN                  | BOOLEAN                                 |
+| TINYINT                  | TINYINT                                 |
+| SMALLINT                 | SMALLINT<br/>TINYINT                    |
+| INT                      | INT<br/>SMALLINT<br/>TINYINT            |
+| BIGINT                   | BIGINT<br/>INT<br/>SMALLINT<br/>TINYINT |
+| LARGEINT                 | BIGINT<br/>INT<br/>SMALLINT<br/>TINYINT |
+| FLOAT                    | FLOAT                                   |
+| DOUBLE                   | DOUBLE<br/>FLOAT                        |
+| DECIMAL                  | DECIMAL<br/>DOUBLE<br/>FLOAT            |
+| DATE                     | DATE                                    |
+| DATETIME                 | TIMESTAMP                               |
+| CHAR                     | STRING                                  |
+| VARCHAR                  | STRING                                  |
+| STRING                   | STRING                                  |
+| ARRAY                    | ARRAY                                   |
+| MAP                      | MAP                                     |
+| JSON                     | STRING                                  |
+| HLL                      | Not supported yet                       |
+| BITMAP                   | Not supported yet                       |
+| QUANTILE_STATE           | Not supported yet                       |
+| STRUCT                   | Not supported yet                       |
+
+#### Supported import data formats
+
+The supported formats include CSV and JSON
+
+## Task Example
+
+### Simple:
+
+> The following example describes writing multiple data types to SelectDBCloud, and users need to create corresponding tables downstream
+
+```hocon
+env {
+  parallelism = 1
+  job.mode = "BATCH"
+  checkpoint.interval = 10000
 }
-```
 
-JSON Write:
+source {
+  FakeSource {
+    row.num = 10
+    map.size = 10
+    array.size = 10
+    bytes.length = 10
+    string.length = 10
+    schema = {
+      fields {
+        c_map = "map<string, array<int>>"
+        c_array = "array<int>"
+        c_string = string
+        c_boolean = boolean
+        c_tinyint = tinyint
+        c_smallint = smallint
+        c_int = int
+        c_bigint = bigint
+        c_float = float
+        c_double = double
+        c_decimal = "decimal(16, 1)"
+        c_null = "null"
+        c_bytes = bytes
+        c_date = date
+        c_timestamp = timestamp
+      }
+    }
+    }
+}
 
-```
-selectdb.config {
-    file.type="json"
+sink {
+  SelectDBCloud {
+    load-url = "warehouse_ip:http_port"
+    jdbc-url = "warehouse_ip:mysql_port"
+    cluster-name = "Cluster"
+    table.identifier = "test.test"
+    username = "admin"
+    password = "******"
+    selectdb.config {
+        file.type = "json"
+    }
+  }
 }
 ```
 
-### sink.buffer-size [string]
-
-The maximum capacity of the cache, in bytes, that is flushed to the object storage. The default is 10MB. it is not recommended to modify it.
-
-### sink.buffer-count [string]
-
-Maximum number of entries flushed to the object store. The default value is 10000. it is not recommended to modify.
-
-### sink.max-retries [string]
-
-The maximum number of retries in the Commit phase, the default is 3.
-
-## Example
-
-Use JSON format to import data
+### Use JSON format to import data
 
 ```
 sink {
   SelectDBCloud {
-    load-url="warehouse_ip:http_port"
-    jdbc-url="warehouse_ip:mysql_port"
-    cluster-name="Cluster"
-    table.identifier="test.test"
-    username="admin"
-    password="******"
+    load-url = "warehouse_ip:http_port"
+    jdbc-url = "warehouse_ip:mysql_port"
+    cluster-name = "Cluster"
+    table.identifier = "test.test"
+    username = "admin"
+    password = "******"
     selectdb.config {
-        file.type="json"
+        file.type = "json"
     }
   }
 }
+
 ```
 
-Use CSV format to import data
+### Use CSV format to import data
 
 ```
 sink {
   SelectDBCloud {
-    load-url="warehouse_ip:http_port"
-    jdbc-url="warehouse_ip:mysql_port"
-    cluster-name="Cluster"
-    table.identifier="test.test"
-    username="admin"
-    password="******"
+    load-url = "warehouse_ip:http_port"
+    jdbc-url = "warehouse_ip:mysql_port"
+    cluster-name = "Cluster"
+    table.identifier = "test.test"
+    username = "admin"
+    password = "******"
     selectdb.config {
-        file.type="csv"
-        file.column_separator="," 
-        file.line_delimiter="\n" 
+        file.type = "csv"
+        file.column_separator = "," 
+        file.line_delimiter = "\n" 
     }
   }
 }