diff --git a/.dlc.json b/.dlc.json index db4f4d1f8b1..8c7ba9ed05e 100644 --- a/.dlc.json +++ b/.dlc.json @@ -17,6 +17,9 @@ }, { "pattern": "^/docs/category" + }, + { + "pattern": "^https://opencollective.com" } ], "timeout": "10s", diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index 8d4242a8b1d..ee6620ec71c 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -23,6 +23,7 @@ on: paths-ignore: - 'docs/**' - '**/*.md' + - 'seatunnel-ui/**' concurrency: group: backend-${{ github.event.pull_request.number || github.ref }} @@ -88,7 +89,7 @@ jobs: java: [ '8', '11' ] os: [ 'ubuntu-latest', 'windows-latest' ] runs-on: ${{ matrix.os }} - timeout-minutes: 50 + timeout-minutes: 80 steps: - uses: actions/checkout@v3 with: @@ -114,7 +115,7 @@ jobs: name: Dependency licenses needs: [ sanity-check ] runs-on: ubuntu-latest - timeout-minutes: 30 + timeout-minutes: 40 steps: - uses: actions/checkout@v3 with: @@ -154,7 +155,9 @@ jobs: cache: 'maven' - name: Run Unit tests run: | - ./mvnw -T 2C -B clean verify -D"maven.test.skip"=false -D"checkstyle.skip"=true -D"scalastyle.skip"=true -D"license.skipAddThirdParty"=true --no-snapshot-updates + ./mvnw -B -T 1C clean verify -D"maven.test.skip"=false -D"checkstyle.skip"=true -D"scalastyle.skip"=true -D"license.skipAddThirdParty"=true --no-snapshot-updates + env: + MAVEN_OPTS: -Xmx2048m integration-test: name: Integration Test @@ -175,4 +178,6 @@ jobs: cache: 'maven' - name: Run Integration tests run: | - ./mvnw -T 2C -B verify -DskipUT=true -DskipIT=false -D"checkstyle.skip"=true -D"scalastyle.skip"=true -D"license.skipAddThirdParty"=true --no-snapshot-updates + ./mvnw -T 1C -B verify -DskipUT=true -DskipIT=false -D"checkstyle.skip"=true -D"scalastyle.skip"=true -D"license.skipAddThirdParty"=true --no-snapshot-updates + env: + MAVEN_OPTS: -Xmx2048m diff --git a/.github/workflows/code-analysys.yml b/.github/workflows/code-analysys.yml index 0211b5603ff..4fda66e5f9f 100644 --- a/.github/workflows/code-analysys.yml +++ b/.github/workflows/code-analysys.yml @@ -21,9 +21,11 @@ on: paths-ignore: - 'docs/**' - '**/*.md' + - 'seatunnel-ui/**' jobs: build: runs-on: ubuntu-latest + timeout-minutes: 120 steps: - uses: actions/checkout@v2 with: diff --git a/.github/workflows/codeql.yaml b/.github/workflows/codeql.yaml index 28b656fad53..237bebd08e1 100644 --- a/.github/workflows/codeql.yaml +++ b/.github/workflows/codeql.yaml @@ -22,12 +22,13 @@ on: paths-ignore: - 'docs/**' - '**/*.md' + - 'seatunnel-ui/**' jobs: analyze: name: Analyze runs-on: ubuntu-latest - timeout-minutes: 30 + timeout-minutes: 60 env: JAVA_TOOL_OPTIONS: -Xmx2G -Xms2G -Dhttp.keepAlive=false -Dmaven.test.skip=true -Dcheckstyle.skip=true -Dlicense.skipAddThirdParty=true -Dhttp.keepAlive=false -Dmaven.wagon.http.pool=false -Dmaven.wagon.http.retryHandler.count=3 -Dmaven.wagon.httpconnectionManager.ttlSeconds=120 diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 7d31f392f36..1cf4140aa4f 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -25,6 +25,7 @@ on: paths-ignore: - 'docs/**' - '**/*.md' + - 'seatunnel-ui/**' concurrency: group: docker-${{ github.event.pull_request.number || github.ref }} @@ -34,7 +35,7 @@ jobs: check: name: Spark runs-on: ubuntu-latest - timeout-minutes: 30 + timeout-minutes: 60 steps: - uses: actions/checkout@v2 - name: Set up JDK 1.8 diff --git a/.github/workflows/license.yml b/.github/workflows/license.yml index 2d5ec0da1c4..d9cd901cab8 100644 --- a/.github/workflows/license.yml +++ b/.github/workflows/license.yml @@ -36,7 +36,7 @@ jobs: steps: - uses: actions/checkout@v2 - name: Check License Header - uses: apache/skywalking-eyes@main + uses: apache/skywalking-eyes/header@501a28d2fb4a9b962661987e50cf0219631b32ff auto-license: name: Auto License runs-on: ubuntu-latest diff --git a/.github/workflows/publish-docker.yaml b/.github/workflows/publish-docker.yaml index a7a53d24cde..c2ffb5b433a 100644 --- a/.github/workflows/publish-docker.yaml +++ b/.github/workflows/publish-docker.yaml @@ -24,6 +24,7 @@ on: paths-ignore: - 'docs/**' - '**/*.md' + - 'seatunnel-ui/**' env: HUB: ghcr.io/${{ github.repository }} diff --git a/.gitignore b/.gitignore index dbd0c25fb06..2318e11827c 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,7 @@ target/ # Intellij Idea files .idea/ *.iml +.idea/* .DS_Store @@ -40,4 +41,4 @@ Test.scala test.conf log4j.properties spark-warehouse -*.flattened-pom.xml \ No newline at end of file +*.flattened-pom.xml diff --git a/docs/en/new-connector/sink/Assert.md b/docs/en/connector-v2/sink/Assert.md similarity index 100% rename from docs/en/new-connector/sink/Assert.md rename to docs/en/connector-v2/sink/Assert.md diff --git a/docs/en/new-connector/sink/Clickhouse.md b/docs/en/connector-v2/sink/Clickhouse.md similarity index 100% rename from docs/en/new-connector/sink/Clickhouse.md rename to docs/en/connector-v2/sink/Clickhouse.md diff --git a/docs/en/new-connector/sink/ClickhouseFile.md b/docs/en/connector-v2/sink/ClickhouseFile.md similarity index 100% rename from docs/en/new-connector/sink/ClickhouseFile.md rename to docs/en/connector-v2/sink/ClickhouseFile.md diff --git a/docs/en/connector-v2/sink/File.mdx b/docs/en/connector-v2/sink/File.mdx new file mode 100644 index 00000000000..7e2e3efd735 --- /dev/null +++ b/docs/en/connector-v2/sink/File.mdx @@ -0,0 +1,262 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# File + +## Description + +Output data to local or hdfs or s3 file. + +## Options + + + + +| name | type | required | default value | +| --------------------------------- | ------ | -------- | ------------------------------------------------------------- | +| path | string | yes | - | +| file_name_expression | string | no | "${transactionId}" | +| file_format | string | no | "text" | +| filename_time_format | string | no | "yyyy.MM.dd" | +| field_delimiter | string | no | '\001' | +| row_delimiter | string | no | "\n" | +| partition_by | array | no | - | +| partition_dir_expression | string | no | "\${k0}=\${v0}\/\${k1}=\${v1}\/...\/\${kn}=\${vn}\/" | +| is_partition_field_write_in_file | boolean| no | false | +| sink_columns | array | no | When this parameter is empty, all fields are sink columns | +| is_enable_transaction | boolean| no | true | +| save_mode | string | no | "error" | + +### path [string] + +The target dir path is required. The `hdfs file` starts with `hdfs://` , and the `local file` starts with `file://`, + +### file_name_expression [string] + +`file_name_expression` describes the file expression which will be created into the `path`. We can add the variable `${now}` or `${uuid}` in the `file_name_expression`, like `test_${uuid}_${now}`, +`${now}` represents the current time, and its format can be defined by specifying the option `filename_time_format`. + +Please note that, If `is_enable_transaction` is `true`, we will auto add `${transactionId}_` in the head of the file. + +### file_format [string] + +We supported `file_format` is `text`. + +Please note that, The final file name will ends with the file_format's suffix, the suffix of the text file is `txt`. + +### filename_time_format [string] + +When the format in the `file_name_expression` parameter is `xxxx-${now}` , `filename_time_format` can specify the time format of the path, and the default value is `yyyy.MM.dd` . The commonly used time formats are listed as follows: + +| Symbol | Description | +| ------ | ------------------ | +| y | Year | +| M | Month | +| d | Day of month | +| H | Hour in day (0-23) | +| m | Minute in hour | +| s | Second in minute | + +See [Java SimpleDateFormat](https://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html) for detailed time format syntax. + +### field_delimiter [string] + +The separator between columns in a row of data. + +### row_delimiter [string] + +The separator between rows in a file. + +### partition_by [array] + +Partition data based on selected fields + +### partition_dir_expression [string] + +If the `partition_by` is specified, we will generate the corresponding partition directory based on the partition information, and the final file will be placed in the partition directory. + +Default `partition_dir_expression` is `${k0}=${v0}/${k1}=${v1}/.../${kn}=${vn}/`. `k0` is the first partition field and `v0` is the value of the first partition field. + +### is_partition_field_write_in_file [boolean] + +If `is_partition_field_write_in_file` is `true`, the partition field and the value of it will be write into data file. + +For example, if you want to write a Hive Data File, Its value should be `false`. + +### sink_columns [array] + +Which columns need be write to file, default value is all of the columns get from `Transform` or `Source`. +The order of the fields determines the order in which the file is actually written. + +### is_enable_transaction [boolean] + +If `is_enable_transaction` is true, we will ensure that data will not be lost or duplicated when it is written to the target directory. + +Please note that, If `is_enable_transaction` is `true`, we will auto add `${transactionId}_` in the head of the file. + +Only support `true` now. + +### save_mode [string] + +Storage mode, currently supports `overwrite` , `append` , `ignore` and `error` . For the specific meaning of each mode, see [save-modes](https://spark.apache.org/docs/latest/sql-programming-guide.html#save-modes) + +Streaming Job not support `overwrite`. + + + + +In order to use this connector, You must ensure your spark/flink cluster already integrated hadoop. The tested hadoop version is 2.x. + +| name | type | required | default value | +| --------------------------------- | ------ | -------- | ------------------------------------------------------------- | +| path | string | yes | - | +| file_name_expression | string | no | "${transactionId}" | +| file_format | string | no | "text" | +| filename_time_format | string | no | "yyyy.MM.dd" | +| field_delimiter | string | no | '\001' | +| row_delimiter | string | no | "\n" | +| partition_by | array | no | - | +| partition_dir_expression | string | no | "\${k0}=\${v0}\/\${k1}=\${v1}\/...\/\${kn}=\${vn}\/" | +| is_partition_field_write_in_file | boolean| no | false | +| sink_columns | array | no | When this parameter is empty, all fields are sink columns | +| is_enable_transaction | boolean| no | true | +| save_mode | string | no | "error" | + +### path [string] + +The target dir path is required. The `hdfs file` starts with `hdfs://` , and the `local file` starts with `file://`, + +### file_name_expression [string] + +`file_name_expression` describes the file expression which will be created into the `path`. We can add the variable `${now}` or `${uuid}` in the `file_name_expression`, like `test_${uuid}_${now}`, +`${now}` represents the current time, and its format can be defined by specifying the option `filename_time_format`. + +Please note that, If `is_enable_transaction` is `true`, we will auto add `${transactionId}_` in the head of the file. + +### file_format [string] + +We supported `file_format` is `text`. + +Please note that, The final file name will ends with the file_format's suffix, the suffix of the text file is `txt`. + +### filename_time_format [string] + +When the format in the `file_name_expression` parameter is `xxxx-${now}` , `filename_time_format` can specify the time format of the path, and the default value is `yyyy.MM.dd` . The commonly used time formats are listed as follows: + +| Symbol | Description | +| ------ | ------------------ | +| y | Year | +| M | Month | +| d | Day of month | +| H | Hour in day (0-23) | +| m | Minute in hour | +| s | Second in minute | + +See [Java SimpleDateFormat](https://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html) for detailed time format syntax. + +### field_delimiter [string] + +The separator between columns in a row of data. + +### row_delimiter [string] + +The separator between rows in a file. + +### partition_by [array] + +Partition data based on selected fields + +### partition_dir_expression [string] + +If the `partition_by` is specified, we will generate the corresponding partition directory based on the partition information, and the final file will be placed in the partition directory. + +Default `partition_dir_expression` is `${k0}=${v0}/${k1}=${v1}/.../${kn}=${vn}/`. `k0` is the first partition field and `v0` is the value of the first partition field. + +### is_partition_field_write_in_file [boolean] + +If `is_partition_field_write_in_file` is `true`, the partition field and the value of it will be write into data file. + +For example, if you want to write a Hive Data File, Its value should be `false`. + +### sink_columns [array] + +Which columns need be write to file, default value is all of the columns get from `Transform` or `Source`. +The order of the fields determines the order in which the file is actually written. + +### is_enable_transaction [boolean] + +If `is_enable_transaction` is true, we will ensure that data will not be lost or duplicated when it is written to the target directory. + +Please note that, If `is_enable_transaction` is `true`, we will auto add `${transactionId}_` in the head of the file. + +Only support `true` now. + +### save_mode [string] + +Storage mode, currently supports `overwrite` , `append` , `ignore` and `error` . For the specific meaning of each mode, see [save-modes](https://spark.apache.org/docs/latest/sql-programming-guide.html#save-modes) + +Streaming Job not support `overwrite`. + + + +## Example + + + + +```bash + +LocalFile { + path="file:///tmp/hive/warehouse/test2" + field_delimiter="\t" + row_delimiter="\n" + partition_by=["age"] + partition_dir_expression="${k0}=${v0}" + is_partition_field_write_in_file=true + file_name_expression="${transactionId}_${now}" + file_format="text" + sink_columns=["name","age"] + filename_time_format="yyyy.MM.dd" + is_enable_transaction=true + save_mode="error" +} + +``` + + + + + +```bash + +HdfsFile { + path="file:///tmp/hive/warehouse/test2" + field_delimiter="\t" + row_delimiter="\n" + partition_by=["age"] + partition_dir_expression="${k0}=${v0}" + is_partition_field_write_in_file=true + file_name_expression="${transactionId}_${now}" + file_format="text" + sink_columns=["name","age"] + filename_time_format="yyyy.MM.dd" + is_enable_transaction=true + save_mode="error" +} + +``` + + + diff --git a/docs/en/connector-v2/sink/Hive.md b/docs/en/connector-v2/sink/Hive.md new file mode 100644 index 00000000000..1794633f195 --- /dev/null +++ b/docs/en/connector-v2/sink/Hive.md @@ -0,0 +1,62 @@ +# Hive + +## Description + +Write data to Hive. + +In order to use this connector, You must ensure your spark/flink cluster already integrated hive. The tested hive version is 2.3.9. + +## Options + +| name | type | required | default value | +| --------------------------------- | ------ | -------- | ------------------------------------------------------------- | +| hive_table_name | string | yes | - | +| hive_metastore_uris | string | yes | - | +| partition_by | array | no | - | +| sink_columns | array | no | When this parameter is empty, all fields are sink columns | +| is_enable_transaction | boolean| no | true | +| save_mode | string | no | "append" | + +### hive_table_name [string] + +Target Hive table name eg: db1.table1 + +### hive_metastore_uris [string] + +Hive metastore uris + +### partition_by [array] + +Partition data based on selected fields + +### sink_columns [array] + +Which columns need be write to hive, default value is all of the columns get from `Transform` or `Source`. +The order of the fields determines the order in which the file is actually written. + +### is_enable_transaction [boolean] + +If `is_enable_transaction` is true, we will ensure that data will not be lost or duplicated when it is written to the target directory. + +Only support `true` now. + +### save_mode [string] + +Storage mode, we need support `overwrite` and `append`. `append` is now supported. + +Streaming Job not support `overwrite`. + +## Example + +```bash + +Hive { + hive_table_name="db1.table1" + hive_metastore_uris="thrift://localhost:9083" + partition_by=["age"] + sink_columns=["name","age"] + is_enable_transaction=true + save_mode="append" +} + +``` diff --git a/docs/en/new-connector/sink/Jdbc.md b/docs/en/connector-v2/sink/Jdbc.md similarity index 100% rename from docs/en/new-connector/sink/Jdbc.md rename to docs/en/connector-v2/sink/Jdbc.md diff --git a/docs/en/new-connector/sink/common-options.md b/docs/en/connector-v2/sink/common-options.md similarity index 100% rename from docs/en/new-connector/sink/common-options.md rename to docs/en/connector-v2/sink/common-options.md diff --git a/docs/en/new-connector/source/Clickhouse.md b/docs/en/connector-v2/source/Clickhouse.md similarity index 100% rename from docs/en/new-connector/source/Clickhouse.md rename to docs/en/connector-v2/source/Clickhouse.md diff --git a/docs/en/connector-v2/source/Http.md b/docs/en/connector-v2/source/Http.md new file mode 100644 index 00000000000..e190009425c --- /dev/null +++ b/docs/en/connector-v2/source/Http.md @@ -0,0 +1,56 @@ +# Http +## Description + +Used to read data from Http. Both support streaming and batch mode. + +## Options + +| name | type | required | default value | +| --- |--------| --- | --- | +| url | String | Yes | - | +| method | String | No | GET | +| headers | Map | No | - | +| params | Map | No | - | +| body | String | No | - | + +### url [string] +http request url + +### method [string] + +http request method, only supports GET, POST method. + +### headers [Map] + +http headers + +### params [Map] + +http params + +### body [String] + +http body + +## Example + +simple: + +```hocon +Http { + url = "http://localhost/test/query" + method = "GET" + headers { + token = "9e32e859ef044462a257e1fc76730066" + } + params { + id = "1" + type = "TEST" + } + body = "{ + \"code\": 5945141259552, + \"name\": \"test\" + }" + } +``` + diff --git a/docs/en/connector-v2/source/Hudi.md b/docs/en/connector-v2/source/Hudi.md new file mode 100644 index 00000000000..4851d61ef39 --- /dev/null +++ b/docs/en/connector-v2/source/Hudi.md @@ -0,0 +1,59 @@ +# Hudi + +## Description + +Used to read data from Hudi. Currently, only supports hudi cow table and Snapshot Query with Batch Mode. + +In order to use this connector, You must ensure your spark/flink cluster already integrated hive. The tested hive version is 2.3.9. + +## Options + +| name | type | required | default value | +|--------------------------|---------|----------|---------------| +| table.path | string | yes | - | +| table.type | string | yes | - | +| conf.files | string | yes | - | +| use.kerberos | boolean | no | false | +| kerberos.principal | string | no | - | +| kerberos.principal.file | string | no | - | + +### table.path [string] + +`table.path` The hdfs root path of hudi table,such as 'hdfs://nameserivce/data/hudi/hudi_table/'. + +### table.type [string] + +`table.type` The type of hudi table. Now we only support 'cow', 'mor' is not support yet. + +### conf.files [string] + +`conf.files` The environment conf file path list(local path), which used to init hdfs client to read hudi table file. The example is '/home/test/hdfs-site.xml;/home/test/core-site.xml;/home/test/yarn-site.xml'. + +### use.kerberos [boolean] + +`use.kerberos` Whether to enable Kerberos, default is false. + +### kerberos.principal [string] + +`kerberos.principal` When use kerberos, we should set kerberos princal such as 'test_user@xxx'. + +### kerberos.principal.file [string] + +`kerberos.principal.file` When use kerberos, we should set kerberos princal file such as '/home/test/test_user.keytab'. + +## Examples + +```hocon +source { + + Hudi { + table.path = "hdfs://nameserivce/data/hudi/hudi_table/" + table.type = "cow" + conf.files = "/home/test/hdfs-site.xml;/home/test/core-site.xml;/home/test/yarn-site.xml" + use.kerberos = true + kerberos.principal = "test_user@xxx" + kerberos.principal.file = "/home/test/test_user.keytab" + } + +} +``` \ No newline at end of file diff --git a/docs/en/new-connector/source/Jdbc.md b/docs/en/connector-v2/source/Jdbc.md similarity index 100% rename from docs/en/new-connector/source/Jdbc.md rename to docs/en/connector-v2/source/Jdbc.md diff --git a/docs/en/new-connector/source/common-options.md b/docs/en/connector-v2/source/common-options.md similarity index 100% rename from docs/en/new-connector/source/common-options.md rename to docs/en/connector-v2/source/common-options.md diff --git a/docs/en/connector-v2/source/pulsar.md b/docs/en/connector-v2/source/pulsar.md new file mode 100644 index 00000000000..68f9da4df7c --- /dev/null +++ b/docs/en/connector-v2/source/pulsar.md @@ -0,0 +1,126 @@ +# Apache Pulsar + +## Description + +Source connector for Apache Pulsar. It can support both off-line and real-time jobs. + +## Options + +| name | type | required | default value | +| --- | --- | --- | --- | +| topic | String | No | - | +| topic-pattern | String | No | - | +| topic-discovery.interval | Long | No | 30000 | +| subscription.name | String | Yes | - | +| client.service-url | String | Yes | - | +| admin.service-url | String | Yes | - | +| auth.plugin-class | String | No | - | +| auth.params | String | No | - | +| poll.timeout | Integer | No | 100 | +| poll.interval | Long | No | 50 | +| poll.batch.size | Integer | No | 500 | +| cursor.startup.mode | Enum | No | LATEST | +| cursor.startup.timestamp | Long | No | - | +| cursor.reset.mode | Enum | No | LATEST | +| cursor.stop.mode | Enum | No | NEVER | +| cursor.stop.timestamp | Long | No | - | + +### topic [String] + +Topic name(s) to read data from when the table is used as source. It also supports topic list for source by separating topic by semicolon like 'topic-1;topic-2'. + +**Note, only one of "topic-pattern" and "topic" can be specified for sources.** + +### topic-pattern [String] + +The regular expression for a pattern of topic names to read from. All topics with names that match the specified regular expression will be subscribed by the consumer when the job starts running. + +**Note, only one of "topic-pattern" and "topic" can be specified for sources.** + +### topic-discovery.interval [Long] + +The interval (in ms) for the Pulsar source to discover the new topic partitions. A non-positive value disables the topic partition discovery. + +**Note, This option only works if the 'topic-pattern' option is used.** + +### subscription.name [String] + +Specify the subscription name for this consumer. This argument is required when constructing the consumer. + +### client.service-url [String] + +Service URL provider for Pulsar service. +To connect to Pulsar using client libraries, you need to specify a Pulsar protocol URL. +You can assign Pulsar protocol URLs to specific clusters and use the Pulsar scheme. + +For example, `localhost`: `pulsar://localhost:6650,localhost:6651`. + +### admin.service-url [String] + +The Pulsar service HTTP URL for the admin endpoint. + +For example, `http://my-broker.example.com:8080`, or `https://my-broker.example.com:8443` for TLS. + +### auth.plugin-class [String] + +Name of the authentication plugin. + +### auth.params [String] + +Parameters for the authentication plugin. + +For example, `key1:val1,key2:val2` + +### poll.timeout [Integer] + +The maximum time (in ms) to wait when fetching records. A longer time increases throughput but also latency. + +### poll.interval [Long] + +The interval time(in ms) when fetcing records. A shorter time increases throughput, but also increases CPU load. + +### poll.batch.size [Integer] + +The maximum number of records to fetch to wait when polling. A longer time increases throughput but also latency. + +### cursor.startup.mode [Enum] + +Startup mode for Pulsar consumer, valid values are `'EARLIEST'`, `'LATEST'`, `'SUBSCRIPTION'`, `'TIMESTAMP'`. + +### cursor.startup.timestamp [String] + +Start from the specified epoch timestamp (in milliseconds). + +**Note, This option is required when the "cursor.startup.mode" option used `'TIMESTAMP'`.** + +### cursor.reset.mode [Enum] + +Cursor reset strategy for Pulsar consumer valid values are `'EARLIEST'`, `'LATEST'`. + +**Note, This option only works if the "cursor.startup.mode" option used `'SUBSCRIPTION'`.** + +### cursor.stop.mode [String] + +Stop mode for Pulsar consumer, valid values are `'NEVER'`, `'LATEST'`and `'TIMESTAMP'`. + +**Note, When `'NEVER' `is specified, it is a real-time job, and other mode are off-line jobs.** + +### cursor.startup.timestamp [String] + +Stop from the specified epoch timestamp (in milliseconds). + +**Note, This option is required when the "cursor.stop.mode" option used `'TIMESTAMP'`.** + +## Example + +```Jdbc { +source { + Pulsar { + topic = "example" + subscription.name = "seatunnel" + client.service-url = "localhost:pulsar://localhost:6650" + admin.service-url = "http://my-broker.example.com:8080" + result_table_name = "test" + } +} +``` \ No newline at end of file diff --git a/docs/en/connector/source/Fake.mdx b/docs/en/connector/source/Fake.mdx index 4487f828181..12a6c2bfb7f 100644 --- a/docs/en/connector/source/Fake.mdx +++ b/docs/en/connector/source/Fake.mdx @@ -100,7 +100,7 @@ column_config option type. | name | type | required | default value | support values | |-------------|-------------|----------|---------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | name | string | yes | string | - | -| type | string | yes | string | int,integer,byte,boolean,char,
character,short,long,float,double,
date,timestamp,decimal,bigdecimal,
bigint,biginteger,int[],byte[],
boolean[],char[],character[],short[],
long[],float[],double[],string[],
binary,varchar | +| type | string | yes | string | int,integer,byte,boolean,char,
character,short,long,float,double,
date,timestamp,decimal,bigdecimal,
bigint,int[],byte[],
boolean[],char[],character[],short[],
long[],float[],double[],string[],
binary,varchar | | mock_config | mock_config | no | - | - | mock_config Option diff --git a/docs/en/connector/source/Http.md b/docs/en/connector/source/Http.md index d9d3a399ba3..ad34e9e7469 100644 --- a/docs/en/connector/source/Http.md +++ b/docs/en/connector/source/Http.md @@ -37,7 +37,7 @@ HTTP request header, json format. ### request_params[string] -HTTP request parameters, json format. +HTTP request parameters, json format. Use string with escapes to save json ### sync_path[string] diff --git a/docs/en/contribution/new-license.md b/docs/en/contribution/new-license.md index 44c80b34a06..8db57150c57 100644 --- a/docs/en/contribution/new-license.md +++ b/docs/en/contribution/new-license.md @@ -1,25 +1,37 @@ # How To Add New License -If you have any new Jar binary package adding in you PR, you need to follow the steps below to notice license +### ASF 3RD PARTY LICENSE POLICY -1. declared in `tools/dependencies/known-dependencies.txt` +You have to pay attention to the following open-source software protocols which Apache projects support when you intend to add a new feature to the SeaTunnel (or other Apache projects), which functions refers to other open-source software references. -2. Add the corresponding License file under `seatunnel-dist/release-docs/licenses`, if it is a standard Apache License, it does not need to be added +[ASF 3RD PARTY LICENSE POLICY](https://apache.org/legal/resolved.html) -3. Add the corresponding statement in `seatunnel-dist/release-docs/LICENSE` +If the 3rd party software is not present at the above policy, we could't that accept your code. - ```bash - # At the same time, you can also use the script to assist the inspection. - # Because it only uses the Python native APIs and does not depend on any third-party libraries, it can run using the original Python environment. - # Please refer to the documentation if you do not have a Python env: https://www.python.org/downloads/ - - # First, generate the seatunnel-dist/target/THIRD-PARTY.txt temporary file - ./mvnw license:aggregate-add-third-party -DskipTests -Dcheckstyle.skip - # Second, run the script to assist the inspection - python3 tools/dependencies/license.py seatunnel-dist/target/THIRD-PARTY.txt seatunnel-dist/release-docs/LICENSE true - ``` -4. Add the corresponding statement in `seatunnel-dist/release-docs/NOTICE` +### How to Legally Use 3rd Party Open-source Software in the SeaTunnel -If you want to learn more about strategy of License, you could read -[License Notice](https://seatunnel.apache.org/community/submit_guide/license) in submit guide. +Moreover, when we intend to refer a new software ( not limited to 3rd party jar, text, CSS, js, pics, icons, audios etc and modifications based on 3rd party files) to our project, we need to use them legally in addition to the permission of ASF. Refer to the following article: + +* [COMMUNITY-LED DEVELOPMENT "THE APACHE WAY"](https://apache.org/dev/licensing-howto.html) + + +For example, we should contain the NOTICE file (most of open-source project has NOTICE file, generally under root directory) of ZooKeeper in our project when we are using ZooKeeper. As the Apache explains, "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work. + +We are not going to dive into every 3rd party open-source license policy in here, you may look up them if interested. + +### SeaTunnel-License Check Rules + +In general, we would have our License-check scripts to our project. SeaTunnel-License-Check is provided by [SkyWalking](https://github.com/apache/skywalking) which differ a bit from other open-source projects. All in all, we are trying to make sure avoiding the license issues at the first time. + +We need to follow the following steps when we need to add new jars or external resources: + +* Add the name and the version of the jar file in the known-dependencies.txt +* Add relevant maven repository address under 'seatunnel-dist/release-docs/LICENSE' directory +* Append relevant NOTICE files under 'seatunnel-dist/release-docs/NOTICE' directory and make sure they are no different to the original repository +* Add relevant source code protocols under 'seatunnel-dist/release-docs/licenses' directory and the file name should be named as license+filename.txt. Eg: license-zk.txt + +### References + +* [COMMUNITY-LED DEVELOPMENT "THE APACHE WAY"](https://apache.org/dev/licensing-howto.html) +* [ASF 3RD PARTY LICENSE POLICY](https://apache.org/legal/resolved.html) diff --git a/docs/en/contribution/setup.md b/docs/en/contribution/setup.md index bfca4e2d1a7..5194a951de3 100644 --- a/docs/en/contribution/setup.md +++ b/docs/en/contribution/setup.md @@ -57,12 +57,12 @@ as an example, when you run it successfully you could see the output as below: ```log -+I[Gary, 1647423592505] -+I[Kid Xiong, 1647423593510] -+I[Ricky Huo, 1647423598537] ++I[Ricky Huo, 71] ++I[Gary, 12] ++I[Ricky Huo, 93] ... ... -+I[Gary, 1647423597533] ++I[Ricky Huo, 83] ``` ## What's More diff --git a/docs/en/start/local.mdx b/docs/en/start/local.mdx index 13e61b88511..8f44993907f 100644 --- a/docs/en/start/local.mdx +++ b/docs/en/start/local.mdx @@ -15,7 +15,7 @@ Before you getting start the local run, you need to make sure you already have i * Download the engine, you can choose and download one of them from below as your favour, you could see more information about [why we need engine in SeaTunnel](../faq.md#why-i-should-install-computing-engine-like-spark-or-flink) * Spark: Please [download Spark](https://spark.apache.org/downloads.html) first(**required version >= 2 and version < 3.x **). For more information you could see [Getting Started: standalone](https://spark.apache.org/docs/latest/spark-standalone.html#installing-spark-standalone-to-a-cluster) - * Flink: Please [download Flink](https://flink.apache.org/downloads.html) first(**required version >= 1.9.0 and version < 1.14.x **). For more information you could see [Getting Started: standalone](https://nightlies.apache.org/flink/flink-docs-release-1.14/docs/deployment/resource-providers/standalone/overview/) + * Flink: Please [download Flink](https://flink.apache.org/downloads.html) first(**required version >= 1.12.0 and version < 1.14.x **). For more information you could see [Getting Started: standalone](https://nightlies.apache.org/flink/flink-docs-release-1.14/docs/deployment/resource-providers/standalone/overview/) ## Installation diff --git a/docs/en/transform/nulltf.md b/docs/en/transform/nulltf.md new file mode 100644 index 00000000000..dad4ee945b0 --- /dev/null +++ b/docs/en/transform/nulltf.md @@ -0,0 +1,73 @@ +# Nulltf + +## Description + +set default value for null field + +:::tip + +This transform only supported by engine Spark. + +::: + +## Options + +| name | type | required | default value | +| ------------------- | ------- | -------- | ------------- | +| fields | array | no | - | + +### fields [list] + +A list of fields whose default value will be set. +The default value of the field can be set in the form of "field:value", If no set, the default value will be set according to the field type. + +## Examples + +the configuration + +```bash + nulltf { + fields { + name: "", + price: 0, + num: 100, + flag: false, + dt_timestamp: "2022-05-18 13:51:40.603", + dt_date: "2022-05-19" + } + } +``` + +before use nulltf transform + +```bash ++-----+-----+----+-----+--------------------+----------+ +| name|price| num| flag| dt_timestamp| dt_date| ++-----+-----+----+-----+--------------------+----------+ +|名称1| 22.5| 100|false|2022-05-20 14:34:...|2022-05-20| +| null| 22.5| 100|false|2022-05-20 14:35:...|2022-05-20| +|名称1| null| 100|false|2022-05-20 14:35:...|2022-05-20| +|名称1| 22.5|null|false|2022-05-20 14:36:...|2022-05-20| +|名称1| 22.5| 100| null|2022-05-20 14:36:...|2022-05-20| +|名称1| 22.5| 100|false| null|2022-05-20| +|名称1| 22.5| 100|false|2022-05-20 14:37:...| null| ++-----+-----+----+-----+--------------------+----------+ +``` + +after use nulltf transform + +```bash ++-----+-----+----+-----+--------------------+----------+ +| name|price| num| flag| dt_timestamp| dt_date| ++-----+-----+----+-----+--------------------+----------+ +|名称1| 22.5|100|false|2022-05-20 14:34:...|2022-05-20| +| | 22.5|100|false|2022-05-20 14:35:...|2022-05-20| +|名称1| 0.0|100|false|2022-05-20 14:35:...|2022-05-20| +|名称1| 22.5|100|false|2022-05-20 14:36:...|2022-05-20| +|名称1| 22.5|100|false|2022-05-20 14:36:...|2022-05-20| +|名称1| 22.5|100|false|2022-05-18 13:51:...|2022-05-20| +|名称1| 22.5|100|false|2022-05-20 14:37:...|2022-05-19| ++-----+-----+---+-----+--------------------+----------+ +``` + + diff --git a/docs/sidebars.js b/docs/sidebars.js index ef8c73158bc..ad3cb667fb8 100644 --- a/docs/sidebars.js +++ b/docs/sidebars.js @@ -78,6 +78,51 @@ const sidebars = { 'concept/config', ], }, + { + type: 'category', + label: 'Connector-V2', + items: [ + { + type: 'category', + label: 'Sink', + link: { + type: 'generated-index', + title: 'Source-V2 of SeaTunnel', + description: 'List all Sink supported Apache SeaTunnel for now.', + // Should remove the `v2` suffix when we migrate all sink to v2 and delete the old one + slug: '/category/sink-v2', + keywords: ['sink'], + image: '/img/favicon.ico', + }, + items: [ + { + type: 'autogenerated', + dirName: 'connector-v2/sink', + }, + ], + }, + { + type: 'category', + label: 'Source', + link: { + type: 'generated-index', + title: 'Source-V2 of SeaTunnel', + description: 'List all source supported Apache SeaTunnel for now.', + // Should remove the `v2` suffix when we migrate all sink to v2 and delete the old one + slug: '/category/source-v2', + keywords: ['source'], + image: '/img/favicon.ico', + }, + items: [ + { + type: 'autogenerated', + dirName: 'connector-v2/source', + }, + ], + + }, + ], + }, { type: 'category', label: 'Connector', diff --git a/plugin-mapping.properties b/plugin-mapping.properties index da3890a70a0..21a23ae6ca4 100644 --- a/plugin-mapping.properties +++ b/plugin-mapping.properties @@ -96,8 +96,15 @@ seatunnel.sink.Kafka = connector-kafka seatunnel.source.Http = connector-http seatunnel.source.Socket = connector-socket seatunnel.sink.Hive = connector-hive +seatunnel.source.Hive = connector-hive seatunnel.source.Clickhouse = connector-clickhouse seatunnel.sink.Clickhouse = connector-clickhouse seatunnel.sink.ClickhouseFile = connector-clickhouse seatunnel.source.Jdbc = connector-jdbc seatunnel.sink.Jdbc = connector-jdbc +seatunnel.source.Kudu = connector-Kudu +seatunnel.sink.Kudu = connector-Kudu +seatunnel.sink.HdfsFile = connector-file-hadoop +seatunnel.sink.LocalFile = connector-file-local +seatunnel.source.Pulsar = connector-pulsar +seatunnel.source.Hudi = connector-hudi diff --git a/pom.xml b/pom.xml index 23e15e3c4b3..9578970ef13 100644 --- a/pom.xml +++ b/pom.xml @@ -82,17 +82,32 @@ seatunnel-core seatunnel-transforms seatunnel-connectors - seatunnel-connectors-v2 - seatunnel-connectors-v2-dist - seatunnel-examples - seatunnel-e2e seatunnel-api seatunnel-translation seatunnel-plugin-discovery seatunnel-formats seatunnel-dist + seatunnel-server + + + all + + true + + + seatunnel-connectors-v2 + seatunnel-connectors-v2-dist + seatunnel-examples + seatunnel-e2e + + + + release + + + 2.1.3-SNAPSHOT 2.1.1 @@ -107,7 +122,19 @@ 4.1.0 0.13.1 1.13.6 - 0.10.0 + 0.11.1 + 1.5.6 + 2.3.9 + 1.2 + 1.9.4 + 1.4 + 1.7 + 1.8.1 + 4.3.0 + 1.9.13 + 3.1.6 + 1.19 + 2.1 2.7 2.12.6 1.18.0 @@ -142,6 +169,7 @@ 2.2.0 2.6.0 3.4 + 1.11.1 4.4 3.3.0 provided @@ -180,11 +208,18 @@ 1.7.25 19.0 1.0.1 + 2.0.9 2.6.5 3.0.0 org.apache.seatunnel.shade 4.3.0 1.1.8.3 + 2.6.8 + 2.2.2 + 1.2.9 + 2.6.1 + 1.5.10 + 6.2.2.Final @@ -227,7 +262,12 @@ lz4 1.3.0 - + + + org.apache.kudu + kudu-client + ${kudu.version} + org.apache.flink @@ -427,7 +467,7 @@ spark-streaming-kafka-0-10_${scala.binary.version} ${spark.version} - + org.projectlombok lombok @@ -478,6 +518,12 @@ ${flink.version} + + org.apache.hudi + hudi-hadoop-mr-bundle + ${hudi.version} + + org.apache.hudi hudi-spark-bundle_${scala.binary.version} @@ -639,12 +685,23 @@ ${guava.version} + + org.powermock + powermock-module-junit4 + ${powermock.version} + test + + + org.powermock + powermock-api-mockito2 + ${powermock.version} + test + com.github.jsonzou jmockdata ${jmockdata.version} - org.slf4j slf4j-api @@ -663,6 +720,214 @@ ${snappy-java.version} + + org.apache.orc + orc-core + ${orc.version} + + + javax.servlet + servlet-api + + + + org.apache.logging.log4j + * + + + com.fasterxml.jackson.core + * + + + org.apapche.hadoop + * + + + org.apache.curator + * + + + + + org.codehaus.jackson + jackson-core-asl + ${codehaus.jackson.version} + + + org.codehaus.jackson + jackson-xc + ${codehaus.jackson.version} + + + org.codehaus.jackson + jackson-mapper- + ${codehaus.jackson.version} + + + org.codehaus.jackson + jackson-jaxrs + ${codehaus.jackson.version} + + + com.sun.jersey + jersey-json + ${jersey.version} + + + org.apache.hive + hive-exec + ${hive.exec.version} + + + org.pentaho + pentaho-aggdesigner-algorithm + + + javax.servlet + servlet-api + + + org.apache.logging.log4j + log4j-1.2-api + + + org.apache.logging.log4j + log4j-web + + + com.fasterxml.jackson.core + * + + + org.apapche.hadoop + * + + + com.github.joshelser + dropwizard-metrics-hadoop-metrics2-reporter + + + org.apache.logging.log4j + * + + + org.apache.zookeeper + zookeeper + + + org.apache.hadoop + hadoop-yarn-server-resourcemanager + + + org.apache.hadoop + hadoop-hdfs + + + + + commons-logging + commons-logging + ${commons.logging.version} + + + commons-beanutils + commons-beanutils + ${commons.beanutils.version} + + + commons-cli + commons-cli + ${commons.cli.version} + + + commons-configuration + commons-configuration + ${commons.configuration.version} + + + commons-digester + commons-digester + ${commons.digester.version} + + + org.apache.curator + curator-client + ${curator.version} + + + org.apache.curator + curator-framework + ${curator.version} + + + org.apache.curator + curator-recipes + ${curator.version} + + + com.sun.jersey + jersey-core + ${jersey.version} + + + com.sun.jersey + jersey-server + ${jersey.version} + + + javax.servlet.jsp + jsp-api + ${javax.servlet.jap.version} + + + + + org.springframework.boot + spring-boot-starter-web + ${spring-boot.version} + + + + org.springframework.boot + spring-boot-starter-jetty + ${spring-boot.version} + + + + org.mybatis.spring.boot + mybatis-spring-boot-starter + ${mybatis-spring-boot-starter.version} + + + + com.alibaba + druid-spring-boot-starter + ${druid-spring-boot-starter.version} + + + + io.springfox + springfox-swagger2 + ${springfox-swagger.version} + + + + io.springfox + springfox-swagger-ui + ${springfox-swagger.version} + + + + io.swagger + swagger-annotations + ${swagger-annotations.version} + + + + org.hibernate.validator + hibernate-validator + ${hibernate.validator.version} + @@ -907,7 +1172,7 @@ - + @@ -1038,7 +1303,6 @@ org.apache.maven.plugins maven-surefire-plugin - org.apache.maven.plugins maven-failsafe-plugin @@ -1061,4 +1325,4 @@ - \ No newline at end of file + diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/common/PrepareFailException.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/common/PrepareFailException.java index bb43d07ff11..7cb75b98fed 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/common/PrepareFailException.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/common/PrepareFailException.java @@ -30,4 +30,9 @@ public PrepareFailException(String pluginName, PluginType type, String message) super(String.format("PluginName: %s, PluginType: %s, Message: %s", pluginName, type.getType(), message)); } + + public PrepareFailException(String pluginName, PluginType type, String message, Throwable cause) { + super(String.format("PluginName: %s, PluginType: %s, Message: %s", pluginName, type.getType(), + message), cause); + } } diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/serialization/Serializer.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/serialization/Serializer.java index ffdd9421d6b..61703486445 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/serialization/Serializer.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/serialization/Serializer.java @@ -18,9 +18,8 @@ package org.apache.seatunnel.api.serialization; import java.io.IOException; -import java.io.Serializable; -public interface Serializer extends Serializable { +public interface Serializer { /** * Serializes the given object. diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/DefaultSinkWriterContext.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/DefaultSinkWriterContext.java index 8c8219db71d..c8a3fd61ff9 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/DefaultSinkWriterContext.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/DefaultSinkWriterContext.java @@ -17,26 +17,14 @@ package org.apache.seatunnel.api.sink; -import java.util.Map; - /** * The default {@link SinkWriter.Context} implement class. */ public class DefaultSinkWriterContext implements SinkWriter.Context { - - private final Map configuration; private final int subtask; - private final int parallelism; - public DefaultSinkWriterContext(Map configuration, int subtask, int parallelism) { - this.configuration = configuration; + public DefaultSinkWriterContext(int subtask) { this.subtask = subtask; - this.parallelism = parallelism; - } - - @Override - public Map getConfiguration() { - return configuration; } @Override @@ -44,8 +32,4 @@ public int getIndexOfSubtask() { return subtask; } - @Override - public int getNumberOfParallelSubtasks() { - return parallelism; - } } diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SeaTunnelSink.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SeaTunnelSink.java index 48d8f33dacf..59517e409a6 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SeaTunnelSink.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SeaTunnelSink.java @@ -20,7 +20,7 @@ import org.apache.seatunnel.api.common.PluginIdentifierInterface; import org.apache.seatunnel.api.common.SeaTunnelPluginLifeCycle; import org.apache.seatunnel.api.serialization.Serializer; -import org.apache.seatunnel.api.source.SeaTunnelRuntimeEnvironment; +import org.apache.seatunnel.api.source.SeaTunnelContextAware; import org.apache.seatunnel.api.table.type.SeaTunnelDataType; import org.apache.seatunnel.api.table.type.SeaTunnelRowType; @@ -43,7 +43,7 @@ * {@link SinkAggregatedCommitter} handle it, this class should implement interface {@link Serializable}. */ public interface SeaTunnelSink - extends Serializable, PluginIdentifierInterface, SeaTunnelPluginLifeCycle, SeaTunnelRuntimeEnvironment { + extends Serializable, PluginIdentifierInterface, SeaTunnelPluginLifeCycle, SeaTunnelContextAware { /** * Set the row type info of sink row data. This method will be automatically called by translation. diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SinkWriter.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SinkWriter.java index 268d3d40e6a..897e64b4cea 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SinkWriter.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SinkWriter.java @@ -21,7 +21,6 @@ import java.io.Serializable; import java.util.Collections; import java.util.List; -import java.util.Map; import java.util.Optional; /** @@ -76,20 +75,10 @@ default List snapshotState(long checkpointId) throws IOException { interface Context extends Serializable{ - /** - * Gets the configuration with which Job was started. - */ - Map getConfiguration(); - /** * @return The index of this subtask. */ int getIndexOfSubtask(); - /** - * @return The number of parallel Sink tasks. - */ - int getNumberOfParallelSubtasks(); - } } diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/source/SeaTunnelRuntimeEnvironment.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/source/SeaTunnelContextAware.java similarity index 95% rename from seatunnel-api/src/main/java/org/apache/seatunnel/api/source/SeaTunnelRuntimeEnvironment.java rename to seatunnel-api/src/main/java/org/apache/seatunnel/api/source/SeaTunnelContextAware.java index c0a51dd5799..429f0515583 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/source/SeaTunnelRuntimeEnvironment.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/source/SeaTunnelContextAware.java @@ -22,7 +22,7 @@ /** * This interface defines the runtime environment of the SeaTunnel application. */ -public interface SeaTunnelRuntimeEnvironment { +public interface SeaTunnelContextAware { default void setSeaTunnelContext(SeaTunnelContext seaTunnelContext){ // nothing diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/source/SeaTunnelSource.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/source/SeaTunnelSource.java index 3e1cb8702fa..f93f4d3bf7f 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/source/SeaTunnelSource.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/source/SeaTunnelSource.java @@ -33,8 +33,8 @@ * @param The type of splits handled by the source. * @param The type of checkpoint states. */ -public interface SeaTunnelSource - extends Serializable, PluginIdentifierInterface, SeaTunnelPluginLifeCycle, SeaTunnelRuntimeEnvironment { +public interface SeaTunnelSource + extends Serializable, PluginIdentifierInterface, SeaTunnelPluginLifeCycle, SeaTunnelContextAware { /** * Get the boundedness of this source. @@ -94,6 +94,8 @@ SourceSplitEnumerator restoreEnumerator(SourceSplitEnumerator.Co * * @return enumerator state serializer. */ - Serializer getEnumeratorStateSerializer(); + default Serializer getEnumeratorStateSerializer(){ + return new DefaultSerializer<>(); + } } diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/source/SourceReader.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/source/SourceReader.java index b8505902ec6..72b3e7eb57f 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/source/SourceReader.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/source/SourceReader.java @@ -114,6 +114,6 @@ interface Context { * * @param sourceEvent the source event to coordinator. */ - void sendSourceEventToCoordinator(SourceEvent sourceEvent); + void sendSourceEventToEnumerator(SourceEvent sourceEvent); } } diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/connector/TableSource.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/connector/TableSource.java index edb8ee69dfe..d9b6294c4d0 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/connector/TableSource.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/connector/TableSource.java @@ -20,7 +20,9 @@ import org.apache.seatunnel.api.source.SeaTunnelSource; import org.apache.seatunnel.api.source.SourceSplit; -public interface TableSource { +import java.io.Serializable; + +public interface TableSource { SeaTunnelSource createSource(); } diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/FactoryUtil.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/FactoryUtil.java index c260c2640e7..288e35beb96 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/FactoryUtil.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/FactoryUtil.java @@ -27,6 +27,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.Serializable; import java.util.ArrayList; import java.util.LinkedList; import java.util.List; @@ -42,7 +43,7 @@ public final class FactoryUtil { private static final Logger LOG = LoggerFactory.getLogger(FactoryUtil.class); - public static List> createAndPrepareSource( + public static List> createAndPrepareSource( List multipleTables, Map options, ClassLoader classLoader, diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableSourceFactory.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableSourceFactory.java index a75236e0cec..d637cc97023 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableSourceFactory.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableSourceFactory.java @@ -20,11 +20,13 @@ import org.apache.seatunnel.api.source.SourceSplit; import org.apache.seatunnel.api.table.connector.TableSource; +import java.io.Serializable; + /** * This is an SPI interface, used to create {@link TableSource}. Each plugin need to have it own implementation. * todo: now we have not use this interface, we directly use {@link org.apache.seatunnel.api.source.SeaTunnelSource} as the SPI interface */ public interface TableSourceFactory extends Factory { - TableSource createSource(TableFactoryContext context); + TableSource createSource(TableFactoryContext context); } diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/SeaTunnelRow.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/SeaTunnelRow.java index de82a639baf..aa5923ec1c8 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/SeaTunnelRow.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/SeaTunnelRow.java @@ -87,7 +87,6 @@ public boolean equals(Object o) { return tableId == that.tableId && kind == that.kind && Arrays.deepEquals(fields, that.fields); } - @SuppressWarnings("magicnumber") @Override public int hashCode() { int result = Objects.hash(tableId, kind); diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/SeaTunnelRowType.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/SeaTunnelRowType.java index 45164c2463b..cb94bba000a 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/SeaTunnelRowType.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/SeaTunnelRowType.java @@ -98,7 +98,6 @@ public boolean equals(Object obj) { } @Override - @SuppressWarnings("MagicNumber") public int hashCode() { int result = Arrays.hashCode(fieldNames); result = 31 * result + Arrays.hashCode(fieldTypes); diff --git a/seatunnel-apis/seatunnel-api-flink/src/main/java/org/apache/seatunnel/flink/FlinkEnvironment.java b/seatunnel-apis/seatunnel-api-flink/src/main/java/org/apache/seatunnel/flink/FlinkEnvironment.java index eaf6780d8c6..49c97beb637 100644 --- a/seatunnel-apis/seatunnel-api-flink/src/main/java/org/apache/seatunnel/flink/FlinkEnvironment.java +++ b/seatunnel-apis/seatunnel-api-flink/src/main/java/org/apache/seatunnel/flink/FlinkEnvironment.java @@ -123,31 +123,31 @@ public JobMode getJobMode() { @Override public void registerPlugin(List pluginPaths) { pluginPaths.forEach(url -> LOGGER.info("register plugins : {}", url)); - Configuration configuration; + List configurations = new ArrayList<>(); try { - if (isStreaming()) { - configuration = - (Configuration) Objects.requireNonNull(ReflectionUtils.getDeclaredMethod(StreamExecutionEnvironment.class, - "getConfiguration")).orElseThrow(() -> new RuntimeException("can't find " + - "method: getConfiguration")).invoke(this.environment); - } else { - configuration = batchEnvironment.getConfiguration(); + configurations.add((Configuration) Objects.requireNonNull(ReflectionUtils.getDeclaredMethod(StreamExecutionEnvironment.class, + "getConfiguration")).orElseThrow(() -> new RuntimeException("can't find " + + "method: getConfiguration")).invoke(this.environment)); + if (!isStreaming()) { + configurations.add(batchEnvironment.getConfiguration()); } } catch (Exception e) { throw new RuntimeException(e); } - List jars = configuration.get(PipelineOptions.JARS); - if (jars == null) { - jars = new ArrayList<>(); - } - jars.addAll(pluginPaths.stream().map(URL::toString).collect(Collectors.toList())); - configuration.set(PipelineOptions.JARS, jars); - List classpath = configuration.get(PipelineOptions.CLASSPATHS); - if (classpath == null) { - classpath = new ArrayList<>(); - } - classpath.addAll(pluginPaths.stream().map(URL::toString).collect(Collectors.toList())); - configuration.set(PipelineOptions.CLASSPATHS, classpath); + configurations.forEach(configuration -> { + List jars = configuration.get(PipelineOptions.JARS); + if (jars == null) { + jars = new ArrayList<>(); + } + jars.addAll(pluginPaths.stream().map(URL::toString).collect(Collectors.toList())); + configuration.set(PipelineOptions.JARS, jars.stream().distinct().collect(Collectors.toList())); + List classpath = configuration.get(PipelineOptions.CLASSPATHS); + if (classpath == null) { + classpath = new ArrayList<>(); + } + classpath.addAll(pluginPaths.stream().map(URL::toString).collect(Collectors.toList())); + configuration.set(PipelineOptions.CLASSPATHS, classpath.stream().distinct().collect(Collectors.toList())); + }); } public StreamExecutionEnvironment getStreamExecutionEnvironment() { diff --git a/seatunnel-apis/seatunnel-api-flink/src/main/java/org/apache/seatunnel/flink/util/SchemaUtil.java b/seatunnel-apis/seatunnel-api-flink/src/main/java/org/apache/seatunnel/flink/util/SchemaUtil.java index c980e308eb0..7a393cca86c 100644 --- a/seatunnel-apis/seatunnel-api-flink/src/main/java/org/apache/seatunnel/flink/util/SchemaUtil.java +++ b/seatunnel-apis/seatunnel-api-flink/src/main/java/org/apache/seatunnel/flink/util/SchemaUtil.java @@ -17,7 +17,6 @@ package org.apache.seatunnel.flink.util; -import org.apache.seatunnel.common.utils.JsonUtils; import org.apache.seatunnel.flink.enums.FormatType; import org.apache.seatunnel.shade.com.typesafe.config.Config; @@ -69,7 +68,7 @@ public static void setSchema(Schema schema, Object info, FormatType format) { getJsonSchema(schema, (ObjectNode) info); break; case CSV: - getCsvSchema(schema, (List>) info); + getCsvSchema(schema, (ArrayNode) info); break; case ORC: getOrcSchema(schema, (ObjectNode) info); @@ -149,11 +148,14 @@ private static void getJsonSchema(Schema schema, ObjectNode json) { } } - private static void getCsvSchema(Schema schema, List> schemaList) { + private static void getCsvSchema(Schema schema, ArrayNode schemaList) { + Iterator iterator = schemaList.elements(); + + while (iterator.hasNext()) { + JsonNode jsonNode = iterator.next(); + String field = jsonNode.get("field").textValue(); + String type = jsonNode.get("type").textValue().toUpperCase(); - for (Map map : schemaList) { - String field = map.get("field"); - String type = map.get("type").toUpperCase(); schema.field(field, type); } } @@ -200,7 +202,6 @@ public static RowTypeInfo getTypeInformation(ObjectNode json) { int size = json.size(); String[] fields = new String[size]; TypeInformation[] informations = new TypeInformation[size]; - Map jsonMap = JsonUtils.toMap(json); int i = 0; Iterator> nodeIterator = json.fields(); while (nodeIterator.hasNext()) { diff --git a/seatunnel-common/src/main/java/org/apache/seatunnel/common/Constants.java b/seatunnel-common/src/main/java/org/apache/seatunnel/common/Constants.java index f77cd014b41..d75131b838a 100644 --- a/seatunnel-common/src/main/java/org/apache/seatunnel/common/Constants.java +++ b/seatunnel-common/src/main/java/org/apache/seatunnel/common/Constants.java @@ -23,6 +23,24 @@ public final class Constants { public static final String LOGO = "SeaTunnel"; + public static final String SOURCE = "source"; + + public static final String TRANSFORM = "transform"; + + public static final String SINK = "sink"; + + public static final String SOURCE_SERIALIZATION = "source.serialization"; + + public static final String SOURCE_PARALLELISM = "source.parallelism"; + + public static final String HDFS_ROOT = "hdfs.root"; + + public static final String HDFS_USER = "hdfs.user"; + + public static final String CHECKPOINT_INTERVAL = "checkpoint.interval"; + + public static final String CHECKPOINT_ID = "checkpoint.id"; + private Constants() { } } diff --git a/seatunnel-common/src/main/java/org/apache/seatunnel/common/config/Common.java b/seatunnel-common/src/main/java/org/apache/seatunnel/common/config/Common.java index d48c190ec87..67034db0490 100644 --- a/seatunnel-common/src/main/java/org/apache/seatunnel/common/config/Common.java +++ b/seatunnel-common/src/main/java/org/apache/seatunnel/common/config/Common.java @@ -35,6 +35,8 @@ private Common() { private static DeployMode MODE; + private static boolean STARTER = false; + /** * Set mode. return false in case of failure */ @@ -42,6 +44,10 @@ public static void setDeployMode(DeployMode mode) { MODE = mode; } + public static void setStarter(boolean inStarter) { + STARTER = inStarter; + } + public static DeployMode getDeployMode() { return MODE; } @@ -54,7 +60,7 @@ public static DeployMode getDeployMode() { * When running seatunnel in --master yarn or --master mesos, you can put plugins related files in plugins dir. */ public static Path appRootDir() { - if (DeployMode.CLIENT == MODE) { + if (DeployMode.CLIENT == MODE || STARTER) { try { String path = Common.class.getProtectionDomain().getCodeSource().getLocation().toURI().getPath(); path = new File(path).getPath(); diff --git a/seatunnel-common/src/main/java/org/apache/seatunnel/common/utils/ReflectionUtils.java b/seatunnel-common/src/main/java/org/apache/seatunnel/common/utils/ReflectionUtils.java index 3edd9cccf96..f186f0486a2 100644 --- a/seatunnel-common/src/main/java/org/apache/seatunnel/common/utils/ReflectionUtils.java +++ b/seatunnel-common/src/main/java/org/apache/seatunnel/common/utils/ReflectionUtils.java @@ -28,7 +28,7 @@ public static Optional getDeclaredMethod(Class clazz, String methodNa Optional method = Optional.empty(); Method m; - for (; clazz != Object.class; clazz = clazz.getSuperclass()) { + for (; clazz != null; clazz = clazz.getSuperclass()) { try { m = clazz.getDeclaredMethod(methodName, parameterTypes); m.setAccessible(true); @@ -80,9 +80,14 @@ public static Object invoke(Object object, String methodName, Object... args) { public static Object invoke( Object object, String methodName, Class[] argTypes, Object[] args) { try { - Method method = object.getClass().getDeclaredMethod(methodName, argTypes); - method.setAccessible(true); - return method.invoke(object, args); + Optional method = getDeclaredMethod(object.getClass(), methodName, argTypes); + if (method.isPresent()) { + method.get().setAccessible(true); + return method.get().invoke(object, args); + } else { + throw new NoSuchMethodException(String.format("method invoke failed, no such method '%s' in '%s'", + methodName, object.getClass())); + } } catch (NoSuchMethodException | InvocationTargetException | IllegalAccessException e) { throw new RuntimeException("method invoke failed", e); } diff --git a/seatunnel-common/src/main/java/org/apache/seatunnel/common/utils/SerializationException.java b/seatunnel-common/src/main/java/org/apache/seatunnel/common/utils/SerializationException.java new file mode 100644 index 00000000000..33ca29a6f7d --- /dev/null +++ b/seatunnel-common/src/main/java/org/apache/seatunnel/common/utils/SerializationException.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.common.utils; + +public class SerializationException extends RuntimeException { + + /** + * Required for serialization support. + * + * @see java.io.Serializable + */ + private static final long serialVersionUID = 2263144814025689516L; + + /** + *

Constructs a new {@code SerializationException} without specified + * detail message.

+ */ + public SerializationException() { + } + + /** + *

Constructs a new {@code SerializationException} with specified + * detail message.

+ * + * @param msg The error message. + */ + public SerializationException(final String msg) { + super(msg); + } + + /** + *

Constructs a new {@code SerializationException} with specified + * nested {@code Throwable}.

+ * + * @param cause The {@code Exception} or {@code Error} + * that caused this exception to be thrown. + */ + public SerializationException(final Throwable cause) { + super(cause); + } + + /** + *

Constructs a new {@code SerializationException} with specified + * detail message and nested {@code Throwable}.

+ * + * @param msg The error message. + * @param cause The {@code Exception} or {@code Error} + * that caused this exception to be thrown. + */ + public SerializationException(final String msg, final Throwable cause) { + super(msg, cause); + } +} diff --git a/seatunnel-common/src/main/java/org/apache/seatunnel/common/utils/SerializationUtils.java b/seatunnel-common/src/main/java/org/apache/seatunnel/common/utils/SerializationUtils.java index 43dead16767..7e721617e8e 100644 --- a/seatunnel-common/src/main/java/org/apache/seatunnel/common/utils/SerializationUtils.java +++ b/seatunnel-common/src/main/java/org/apache/seatunnel/common/utils/SerializationUtils.java @@ -20,30 +20,60 @@ import org.apache.commons.codec.binary.Base64; import org.apache.commons.lang3.StringUtils; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.io.ObjectStreamClass; import java.io.Serializable; public class SerializationUtils { public static String objectToString(Serializable obj) { if (obj != null) { - return Base64.encodeBase64String(org.apache.commons.lang3.SerializationUtils.serialize(obj)); + return Base64.encodeBase64String(serialize(obj)); } return null; } public static T stringToObject(String str) { if (StringUtils.isNotEmpty(str)) { - return org.apache.commons.lang3.SerializationUtils.deserialize(Base64.decodeBase64(str)); + return deserialize(Base64.decodeBase64(str)); } return null; } + @SuppressWarnings("checkstyle:MagicNumber") public static byte[] serialize(T obj) { - return org.apache.commons.lang3.SerializationUtils.serialize(obj); + try (ByteArrayOutputStream b = new ByteArrayOutputStream(512); + ObjectOutputStream out = new ObjectOutputStream(b)) { + out.writeObject(obj); + return b.toByteArray(); + } catch (final IOException ex) { + throw new SerializationException(ex); + } + } public static T deserialize(byte[] bytes) { - return org.apache.commons.lang3.SerializationUtils.deserialize(bytes); + try (ByteArrayInputStream s = new ByteArrayInputStream(bytes); + ObjectInputStream in = new ObjectInputStream(s) { + @Override + protected Class resolveClass(ObjectStreamClass desc) throws IOException, ClassNotFoundException { + // make sure use current thread classloader + ClassLoader cl = Thread.currentThread().getContextClassLoader(); + if (cl == null) { + return super.resolveClass(desc); + } + return Class.forName(desc.getName(), false, cl); + } + }) { + @SuppressWarnings("unchecked") final T obj = (T) in.readObject(); + return obj; + } catch (final ClassNotFoundException | IOException ex) { + throw new SerializationException(ex); + } } } diff --git a/seatunnel-common/src/test/java/org/apache/seatunnel/common/utils/ReflectionUtilsTest.java b/seatunnel-common/src/test/java/org/apache/seatunnel/common/utils/ReflectionUtilsTest.java new file mode 100644 index 00000000000..422c70eef4d --- /dev/null +++ b/seatunnel-common/src/test/java/org/apache/seatunnel/common/utils/ReflectionUtilsTest.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.common.utils; + +import org.junit.Assert; +import org.junit.Test; + +import java.net.MalformedURLException; +import java.net.URL; +import java.net.URLClassLoader; + +public class ReflectionUtilsTest { + + @Test + public void testInvoke() throws MalformedURLException { + ReflectionUtils.invoke(new String[]{}, "toString"); + + URLClassLoader classLoader = new URLClassLoader(new URL[]{}, Thread.currentThread().getContextClassLoader()); + ReflectionUtils.invoke(classLoader, "addURL", new URL("file:///test")); + Assert.assertArrayEquals(classLoader.getURLs(), new URL[]{new URL("file:///test")}); + } + +} diff --git a/seatunnel-common/src/test/java/org/apache/seatunnel/common/utils/SerializationUtilsTest.java b/seatunnel-common/src/test/java/org/apache/seatunnel/common/utils/SerializationUtilsTest.java new file mode 100644 index 00000000000..ad0e80ceabb --- /dev/null +++ b/seatunnel-common/src/test/java/org/apache/seatunnel/common/utils/SerializationUtilsTest.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.common.utils; + +import org.junit.Assert; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.HashMap; + +@SuppressWarnings("checkstyle:RegexpSingleline") +public class SerializationUtilsTest { + + @Test + public void testObjectToString() { + + HashMap data = new HashMap<>(); + data.put("key1", "value1"); + data.put("seatunnelTest", "apache SeaTunnel"); + data.put("中 文", "Apache Asia"); + String configStr = SerializationUtils.objectToString(data); + Assert.assertNotNull(configStr); + + HashMap dataAfter = SerializationUtils.stringToObject(configStr); + + Assert.assertEquals(dataAfter, data); + + data.put("key2", ""); + Assert.assertNotEquals(dataAfter, data); + + } + + @Test + public void testByteToObject() { + + HashMap data = new HashMap<>(); + data.put("key1", "value1"); + data.put("seatunnelTest", "apache SeaTunnel"); + data.put("中 文", "Apache Asia"); + + ArrayList> array = new ArrayList<>(); + array.add(data); + HashMap data2 = new HashMap<>(); + data2.put("Apache Asia", "中 文"); + data2.put("value1", "key1"); + data2.put("apache SeaTunnel", "seatunnelTest"); + array.add(data2); + + byte[] result = SerializationUtils.serialize(array); + + ArrayList> array2 = SerializationUtils.deserialize(result); + + Assert.assertEquals(array2, array); + + Assert.assertThrows(SerializationException.class, () -> SerializationUtils.deserialize(new byte[]{1, 0, 1})); + + } + +} diff --git a/seatunnel-common/src/test/java/org/apache/seatunnel/common/utils/VariablesSubstituteTest.java b/seatunnel-common/src/test/java/org/apache/seatunnel/common/utils/VariablesSubstituteTest.java index 33d721ae81f..09d044a5312 100644 --- a/seatunnel-common/src/test/java/org/apache/seatunnel/common/utils/VariablesSubstituteTest.java +++ b/seatunnel-common/src/test/java/org/apache/seatunnel/common/utils/VariablesSubstituteTest.java @@ -26,7 +26,6 @@ public class VariablesSubstituteTest { - @SuppressWarnings("checkstyle:MagicNumber") @Test public void testSubstitute() { String timeFormat = "yyyyMMddHHmmss"; diff --git a/seatunnel-connectors-v2-dist/pom.xml b/seatunnel-connectors-v2-dist/pom.xml index 9e349fbb99d..fe37965a1b8 100644 --- a/seatunnel-connectors-v2-dist/pom.xml +++ b/seatunnel-connectors-v2-dist/pom.xml @@ -17,6 +17,7 @@ limitations under the License. --> + @@ -80,9 +81,23 @@ connector-hive ${project.version} + + org.apache.seatunnel + connector-file-hadoop + ${project.version} + + + org.apache.seatunnel + connector-file-local + ${project.version} + + + org.apache.seatunnel + connector-hudi + ${project.version} + - diff --git a/seatunnel-connectors-v2/README.md b/seatunnel-connectors-v2/README.md index 7522b846eb8..b24122d941e 100644 --- a/seatunnel-connectors-v2/README.md +++ b/seatunnel-connectors-v2/README.md @@ -5,7 +5,7 @@ This article introduces the new interface and the new code structure on account In order to separate from the old code, we have defined new modules for execution flow. This facilitates parallel development at the current stage, and reduces the difficulty of merging. All the relevant code at this stage is kept on the ``api-draft`` branch. ### **Example** -We have prepared a new version of the locally executable example program in ``seatunnel-examples``, which can be directly called using ``seatunnel-flink-new-connector-example`` or ``seatunnel-spark-new-connector-example`` in ``SeaTunnelApiExample``. This is also the debugging method that is often used in the local development of Connector. The corresponding configuration files are saved in the same module ``resources/examples`` folder as before. +We have prepared a new version of the locally executable example program in ``seatunnel-examples``, which can be directly called using ``seatunnel-flink-connector-v2-example`` or ``seatunnel-spark-connector-v2-example`` in ``SeaTunnelApiExample``. This is also the debugging method that is often used in the local development of Connector. The corresponding configuration files are saved in the same module ``resources/examples`` folder as before. ### **Startup Class** diff --git a/seatunnel-connectors-v2/README.zh.md b/seatunnel-connectors-v2/README.zh.md index c0aa4d6395b..017b19907a2 100644 --- a/seatunnel-connectors-v2/README.zh.md +++ b/seatunnel-connectors-v2/README.zh.md @@ -4,7 +4,7 @@ Because SeaTunnel design new API for connectors, 所以通过这篇文章来介 现阶段所有相关代码保存在`api-draft`分支上。 为了和老的代码分开,方便现阶段的并行开发,以及降低merge的难度。我们为新的执行流程定义了新的模块 ### Example -我们已经在`seatunnel-examples`中准备好了新版本的可本地执行Example程序,直接调用`seatunnel-flink-new-connector-example`或`seatunnel-spark-new-connector-example`中的`SeaTunnelApiExample`即可。这也是本地开发Connector经常会用到的调试方式。 +我们已经在`seatunnel-examples`中准备好了新版本的可本地执行Example程序,直接调用`seatunnel-flink-connector-v2-example`或`seatunnel-spark-connector-v2-example`中的`SeaTunnelApiExample`即可。这也是本地开发Connector经常会用到的调试方式。 对应的配置文件保存在同模块的`resources/examples`文件夹下,和以前一样。 ### 启动类 和老的启动类分开,我们创建了两个新的启动类工程,分别是`seatunnel-core/seatunnel-flink-starter`和`seatunnel-core/seatunnel-spark-starter`. 可以在这里找到如何将配置文件解析为可以执行的Flink/Spark流程。 diff --git a/seatunnel-connectors-v2/connector-assert/src/test/java/org/apache/seatunnel/flink/assertion/AssertExecutorTest.java b/seatunnel-connectors-v2/connector-assert/src/test/java/org/apache/seatunnel/flink/assertion/AssertExecutorTest.java index c5d30f3b189..fa9fee2093f 100644 --- a/seatunnel-connectors-v2/connector-assert/src/test/java/org/apache/seatunnel/flink/assertion/AssertExecutorTest.java +++ b/seatunnel-connectors-v2/connector-assert/src/test/java/org/apache/seatunnel/flink/assertion/AssertExecutorTest.java @@ -29,7 +29,6 @@ import java.util.List; -@SuppressWarnings("magicnumber") public class AssertExecutorTest extends TestCase { SeaTunnelRow row = new SeaTunnelRow(new Object[]{"jared", 17}); SeaTunnelRowType rowType = new SeaTunnelRowType(new String[]{"name", "age"}, new SeaTunnelDataType[]{BasicType.STRING_TYPE, BasicType.INT_TYPE}); diff --git a/seatunnel-connectors-v2/connector-assert/src/test/java/org/apache/seatunnel/flink/assertion/rule/AssertRuleParserTest.java b/seatunnel-connectors-v2/connector-assert/src/test/java/org/apache/seatunnel/flink/assertion/rule/AssertRuleParserTest.java index 9a9628dedf0..5c1bb650cc2 100644 --- a/seatunnel-connectors-v2/connector-assert/src/test/java/org/apache/seatunnel/flink/assertion/rule/AssertRuleParserTest.java +++ b/seatunnel-connectors-v2/connector-assert/src/test/java/org/apache/seatunnel/flink/assertion/rule/AssertRuleParserTest.java @@ -28,7 +28,6 @@ import java.util.List; -@SuppressWarnings("magicnumber") public class AssertRuleParserTest extends TestCase { AssertRuleParser parser = new AssertRuleParser(); diff --git a/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSinkWriter.java b/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSinkWriter.java index c8114b6379b..6e2a4ceb190 100644 --- a/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSinkWriter.java +++ b/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSinkWriter.java @@ -193,10 +193,10 @@ private Map initFieldInjectFunctionMap() new BigDecimalInjectFunction(), new DateInjectFunction(), new DateTimeInjectFunction(), + new LongInjectFunction(), new DoubleInjectFunction(), new FloatInjectFunction(), new IntInjectFunction(), - new LongInjectFunction(), new StringInjectFunction() ); ClickhouseFieldInjectFunction defaultFunction = new StringInjectFunction(); diff --git a/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/source/ClickhouseSource.java b/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/source/ClickhouseSource.java index a94344e72cd..4a4b638d915 100644 --- a/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/source/ClickhouseSource.java +++ b/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/source/ClickhouseSource.java @@ -24,8 +24,6 @@ import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.Config.USERNAME; import org.apache.seatunnel.api.common.PrepareFailException; -import org.apache.seatunnel.api.serialization.DefaultSerializer; -import org.apache.seatunnel.api.serialization.Serializer; import org.apache.seatunnel.api.source.Boundedness; import org.apache.seatunnel.api.source.SeaTunnelSource; import org.apache.seatunnel.api.source.SourceReader; @@ -124,9 +122,4 @@ public SourceSplitEnumerator resto return new ClickhouseSourceSplitEnumerator(enumeratorContext); } - @Override - public Serializer getEnumeratorStateSerializer() { - return new DefaultSerializer<>(); - } - } diff --git a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/source/AbstractSingleSplitSource.java b/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/source/AbstractSingleSplitSource.java index f2968c833e3..54410962bc2 100644 --- a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/source/AbstractSingleSplitSource.java +++ b/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/source/AbstractSingleSplitSource.java @@ -29,7 +29,7 @@ public abstract class AbstractSingleSplitSource implements SeaTunnelSource createReader(SourceReader.Context readerContext) throws Exception { - checkArgument(readerContext.getIndexOfSubtask() == 0, "Single split source allows only a single reader to be created."); + checkArgument(readerContext.getIndexOfSubtask() == 0, "A single split source allows only one single reader to be created."); return createReader(new SingleSplitReaderContext(readerContext)); } @@ -45,11 +45,6 @@ public final SourceSplitEnumerator rest return createEnumerator(enumeratorContext); } - @Override - public final Serializer getEnumeratorStateSerializer() { - return new DefaultSerializer<>(); - } - @Override public final Serializer getSplitSerializer() { return new DefaultSerializer<>(); diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/pom.xml b/seatunnel-connectors-v2/connector-file/connector-file-base/pom.xml new file mode 100644 index 00000000000..2f3c38c519b --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/pom.xml @@ -0,0 +1,72 @@ + + + + + connector-file + org.apache.seatunnel + ${revision} + + 4.0.0 + + connector-file-base + + + + org.apache.seatunnel + seatunnel-api + ${project.version} + + + + org.apache.seatunnel + seatunnel-core-base + ${project.version} + test + + + + org.apache.commons + commons-collections4 + + + org.apache.commons + commons-lang3 + + + + junit + junit + test + + + + org.powermock + powermock-module-junit4 + test + + + org.powermock + powermock-api-mockito2 + test + + + \ No newline at end of file diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/BaseTextFileConfig.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/BaseTextFileConfig.java new file mode 100644 index 00000000000..d6fd26d1b27 --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/BaseTextFileConfig.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.config; + +import static com.google.common.base.Preconditions.checkNotNull; + +import org.apache.seatunnel.shade.com.typesafe.config.Config; + +import lombok.Data; +import lombok.NonNull; +import org.apache.commons.lang3.StringUtils; + +import java.io.Serializable; +import java.util.Locale; + +@Data +public class BaseTextFileConfig implements DelimiterConfig, CompressConfig, Serializable { + private static final long serialVersionUID = 1L; + + protected String compressCodec; + + protected String fieldDelimiter = String.valueOf('\001'); + + protected String rowDelimiter = "\n"; + + protected String path; + protected String fileNameExpression; + protected FileFormat fileFormat = FileFormat.TEXT; + + public BaseTextFileConfig(@NonNull Config config) { + if (config.hasPath(Constant.COMPRESS_CODEC)) { + throw new RuntimeException("compress not support now"); + } + + if (config.hasPath(Constant.FIELD_DELIMITER) && !StringUtils.isBlank(config.getString(Constant.FIELD_DELIMITER))) { + this.fieldDelimiter = config.getString(Constant.FIELD_DELIMITER); + } + + if (config.hasPath(Constant.ROW_DELIMITER) && !StringUtils.isBlank(config.getString(Constant.ROW_DELIMITER))) { + this.rowDelimiter = config.getString(Constant.ROW_DELIMITER); + } + + if (config.hasPath(Constant.PATH) && !StringUtils.isBlank(config.getString(Constant.PATH))) { + this.path = config.getString(Constant.PATH); + } + checkNotNull(path); + + if (config.hasPath(Constant.FILE_NAME_EXPRESSION) && !StringUtils.isBlank(config.getString(Constant.FILE_NAME_EXPRESSION))) { + this.fileNameExpression = config.getString(Constant.FILE_NAME_EXPRESSION); + } + + if (config.hasPath(Constant.FILE_FORMAT) && !StringUtils.isBlank(config.getString(Constant.FILE_FORMAT))) { + this.fileFormat = FileFormat.valueOf(config.getString(Constant.FILE_FORMAT).toUpperCase(Locale.ROOT)); + } + } + + public BaseTextFileConfig() {} +} diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/CompressConfig.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/CompressConfig.java new file mode 100644 index 00000000000..48d47c8d1df --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/CompressConfig.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.config; + +public interface CompressConfig { + String getCompressCodec(); +} diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/Constant.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/Constant.java new file mode 100644 index 00000000000..8d10024cb3a --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/Constant.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.config; + +public class Constant { + public static final String SEATUNNEL = "seatunnel"; + public static final String NON_PARTITION = "NON_PARTITION"; + public static final String TRANSACTION_ID_SPLIT = "_"; + public static final String TRANSACTION_EXPRESSION = "transactionId"; + + public static final String SAVE_MODE = "save_mode"; + public static final String COMPRESS_CODEC = "compress_codec"; + + public static final String PATH = "path"; + public static final String FIELD_DELIMITER = "field_delimiter"; + public static final String ROW_DELIMITER = "row_delimiter"; + public static final String PARTITION_BY = "partition_by"; + public static final String PARTITION_DIR_EXPRESSION = "partition_dir_expression"; + public static final String IS_PARTITION_FIELD_WRITE_IN_FILE = "is_partition_field_write_in_file"; + public static final String TMP_PATH = "tmp_path"; + public static final String FILE_NAME_EXPRESSION = "file_name_expression"; + public static final String FILE_FORMAT = "file_format"; + public static final String SINK_COLUMNS = "sink_columns"; + public static final String FILENAME_TIME_FORMAT = "filename_time_format"; + public static final String IS_ENABLE_TRANSACTION = "is_enable_transaction"; +} diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/DelimiterConfig.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/DelimiterConfig.java new file mode 100644 index 00000000000..146974c33a7 --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/DelimiterConfig.java @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.config; + +public interface DelimiterConfig { + String getFieldDelimiter(); + + String getRowDelimiter(); +} diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/FileFormat.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/FileFormat.java new file mode 100644 index 00000000000..6b3f31f79e0 --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/FileFormat.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.config; + +import java.io.Serializable; + +public enum FileFormat implements Serializable { + CSV("csv"), + TEXT("txt"); + + private String suffix; + + private FileFormat(String suffix) { + this.suffix = suffix; + } + + public String getSuffix() { + return "." + suffix; + } +} diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/PartitionConfig.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/PartitionConfig.java new file mode 100644 index 00000000000..f77f69f3d93 --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/PartitionConfig.java @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.config; + +import java.util.List; + +public interface PartitionConfig { + List getPartitionFieldList(); + + boolean isPartitionFieldWriteInFile(); +} diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/AbstractFileSink.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/AbstractFileSink.java new file mode 100644 index 00000000000..77b72f004ca --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/AbstractFileSink.java @@ -0,0 +1,154 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.sink; + +import org.apache.seatunnel.api.common.PrepareFailException; +import org.apache.seatunnel.api.common.SeaTunnelContext; +import org.apache.seatunnel.api.serialization.DefaultSerializer; +import org.apache.seatunnel.api.serialization.Serializer; +import org.apache.seatunnel.api.sink.SeaTunnelSink; +import org.apache.seatunnel.api.sink.SinkAggregatedCommitter; +import org.apache.seatunnel.api.sink.SinkWriter; +import org.apache.seatunnel.api.table.type.SeaTunnelDataType; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.common.constants.JobMode; +import org.apache.seatunnel.connectors.seatunnel.file.sink.config.SaveMode; +import org.apache.seatunnel.connectors.seatunnel.file.sink.config.TextFileSinkConfig; +import org.apache.seatunnel.connectors.seatunnel.file.sink.spi.FileSystemCommitter; +import org.apache.seatunnel.connectors.seatunnel.file.sink.spi.SinkFileSystemPlugin; + +import org.apache.seatunnel.shade.com.typesafe.config.Config; + +import java.io.IOException; +import java.util.List; +import java.util.Optional; + +/** + * Hive Sink implementation by using SeaTunnel sink API. + */ +public abstract class AbstractFileSink implements SeaTunnelSink { + private Config config; + private String jobId; + private Long checkpointId; + private SeaTunnelRowType seaTunnelRowTypeInfo; + private SeaTunnelContext seaTunnelContext; + private TextFileSinkConfig textFileSinkConfig; + private SinkFileSystemPlugin sinkFileSystemPlugin; + + public abstract SinkFileSystemPlugin getSinkFileSystemPlugin(); + + @Override + public String getPluginName() { + this.sinkFileSystemPlugin = getSinkFileSystemPlugin(); + return this.sinkFileSystemPlugin.getPluginName(); + } + + @Override + public void setTypeInfo(SeaTunnelRowType seaTunnelRowTypeInfo) { + this.seaTunnelRowTypeInfo = seaTunnelRowTypeInfo; + } + + @Override + public void prepare(Config pluginConfig) throws PrepareFailException { + this.config = pluginConfig; + this.checkpointId = 1L; + } + + @Override + public SinkWriter createWriter(SinkWriter.Context context) throws IOException { + if (!seaTunnelContext.getJobMode().equals(JobMode.BATCH) && this.getSinkConfig().getSaveMode().equals(SaveMode.OVERWRITE)) { + throw new RuntimeException("only batch job can overwrite mode"); + } + + if (this.getSinkConfig().isEnableTransaction()) { + return new TransactionStateFileSinkWriter(seaTunnelRowTypeInfo, + config, + context, + getSinkConfig(), + jobId, + sinkFileSystemPlugin); + } else { + throw new RuntimeException("File Sink Connector only support transaction now"); + } + } + + @Override + public SinkWriter restoreWriter(SinkWriter.Context context, List states) throws IOException { + if (this.getSinkConfig().isEnableTransaction()) { + return new FileSinkWriterWithTransaction(seaTunnelRowTypeInfo, + config, + context, + textFileSinkConfig, + jobId, + states, + sinkFileSystemPlugin); + } else { + throw new RuntimeException("File Sink Connector only support transaction now"); + } + } + + @Override + public void setSeaTunnelContext(SeaTunnelContext seaTunnelContext) { + this.seaTunnelContext = seaTunnelContext; + this.jobId = seaTunnelContext.getJobId(); + } + + @Override + public Optional> createAggregatedCommitter() throws IOException { + if (this.getSinkConfig().isEnableTransaction()) { + Optional fileSystemCommitter = sinkFileSystemPlugin.getFileSystemCommitter(); + if (fileSystemCommitter.isPresent()) { + return Optional.of(new FileSinkAggregatedCommitter(fileSystemCommitter.get())); + } else { + throw new RuntimeException("FileSystemCommitter is need"); + } + } else { + return Optional.empty(); + } + } + + @Override + public Optional> getWriterStateSerializer() { + return Optional.of(new DefaultSerializer<>()); + } + + @Override + public Optional> getAggregatedCommitInfoSerializer() { + return Optional.of(new DefaultSerializer<>()); + } + + @Override + public Optional> getCommitInfoSerializer() { + return Optional.of(new DefaultSerializer<>()); + } + + private TextFileSinkConfig getSinkConfig() { + if (this.textFileSinkConfig == null && (this.seaTunnelRowTypeInfo != null && this.config != null)) { + this.textFileSinkConfig = new TextFileSinkConfig(config, seaTunnelRowTypeInfo); + } + return this.textFileSinkConfig; + } + + @Override + public SeaTunnelDataType getConsumedType() { + return this.seaTunnelRowTypeInfo; + } +} + + diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/FileAggregatedCommitInfo.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/FileAggregatedCommitInfo.java new file mode 100644 index 00000000000..c847ff659f2 --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/FileAggregatedCommitInfo.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.sink; + +import lombok.AllArgsConstructor; +import lombok.Data; + +import java.io.Serializable; +import java.util.List; +import java.util.Map; + +@Data +@AllArgsConstructor +public class FileAggregatedCommitInfo implements Serializable { + + /** + * Storage the commit info in map. + * K is the file path need to be moved to target dir. + * V is the target file path of the data file. + */ + private Map> transactionMap; + + private Map> partitionDirAndValsMap; +} diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/FileCommitInfo.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/FileCommitInfo.java new file mode 100644 index 00000000000..0fcb04a03fa --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/FileCommitInfo.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.sink; + +import lombok.AllArgsConstructor; +import lombok.Data; + +import java.io.Serializable; +import java.util.List; +import java.util.Map; + +@Data +@AllArgsConstructor +public class FileCommitInfo implements Serializable { + + /** + * Storage the commit info in map. + * K is the file path need to be moved to target dir. + * V is the target file path of the data file. + */ + private Map needMoveFiles; + + private Map> partitionDirAndValsMap; + + private String transactionDir; +} diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/FileSinkAggregatedCommitter.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/FileSinkAggregatedCommitter.java new file mode 100644 index 00000000000..3c7c8cf9c92 --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/FileSinkAggregatedCommitter.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.sink; + +import org.apache.seatunnel.api.sink.SinkAggregatedCommitter; +import org.apache.seatunnel.connectors.seatunnel.file.sink.spi.FileSystemCommitter; + +import lombok.NonNull; +import org.apache.commons.collections4.CollectionUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +public class FileSinkAggregatedCommitter implements SinkAggregatedCommitter { + private static final Logger LOGGER = LoggerFactory.getLogger(FileSinkAggregatedCommitter.class); + + private FileSystemCommitter fileSystemCommitter; + + public FileSinkAggregatedCommitter(@NonNull FileSystemCommitter fileSystemCommitter) { + this.fileSystemCommitter = fileSystemCommitter; + } + + @Override + public List commit(List aggregatedCommitInfoList) throws IOException { + if (aggregatedCommitInfoList == null || aggregatedCommitInfoList.size() == 0) { + return null; + } + List errorAggregatedCommitInfoList = new ArrayList(); + aggregatedCommitInfoList.stream().forEach(aggregateCommitInfo -> { + try { + fileSystemCommitter.commitTransaction(aggregateCommitInfo); + } catch (Exception e) { + LOGGER.error("commit aggregateCommitInfo error ", e); + errorAggregatedCommitInfoList.add(aggregateCommitInfo); + } + }); + + return errorAggregatedCommitInfoList; + } + + @Override + public FileAggregatedCommitInfo combine(List commitInfos) { + if (commitInfos == null || commitInfos.size() == 0) { + return null; + } + Map> aggregateCommitInfo = new HashMap<>(); + Map> partitionDirAndValsMap = new HashMap<>(); + commitInfos.stream().forEach(commitInfo -> { + Map needMoveFileMap = aggregateCommitInfo.get(commitInfo.getTransactionDir()); + if (needMoveFileMap == null) { + needMoveFileMap = new HashMap<>(); + aggregateCommitInfo.put(commitInfo.getTransactionDir(), needMoveFileMap); + } + needMoveFileMap.putAll(commitInfo.getNeedMoveFiles()); + Set>> entries = commitInfo.getPartitionDirAndValsMap().entrySet(); + if (!CollectionUtils.isEmpty(entries)) { + partitionDirAndValsMap.putAll(commitInfo.getPartitionDirAndValsMap()); + } + }); + return new FileAggregatedCommitInfo(aggregateCommitInfo, partitionDirAndValsMap); + } + + @Override + public void abort(List aggregatedCommitInfoList) throws Exception { + if (aggregatedCommitInfoList == null || aggregatedCommitInfoList.size() == 0) { + return; + } + aggregatedCommitInfoList.stream().forEach(aggregateCommitInfo -> { + try { + fileSystemCommitter.abortTransaction(aggregateCommitInfo); + + } catch (Exception e) { + LOGGER.error("abort aggregateCommitInfo error ", e); + } + }); + } + + @Override + public void close() throws IOException { + } +} diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/FileSinkState.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/FileSinkState.java new file mode 100644 index 00000000000..1b7e6b8c523 --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/FileSinkState.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.sink; + +import lombok.AllArgsConstructor; +import lombok.Data; + +import java.io.Serializable; + +@Data +@AllArgsConstructor +public class FileSinkState implements Serializable { + private String transactionId; + private Long checkpointId; +} diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/FileSinkWriterWithTransaction.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/FileSinkWriterWithTransaction.java new file mode 100644 index 00000000000..83e51d1bc6a --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/FileSinkWriterWithTransaction.java @@ -0,0 +1,160 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.sink; + +import org.apache.seatunnel.api.sink.SinkWriter; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.connectors.seatunnel.file.sink.config.TextFileSinkConfig; +import org.apache.seatunnel.connectors.seatunnel.file.sink.spi.SinkFileSystemPlugin; +import org.apache.seatunnel.connectors.seatunnel.file.sink.transaction.TransactionStateFileWriter; +import org.apache.seatunnel.connectors.seatunnel.file.sink.writer.FileSinkPartitionDirNameGenerator; +import org.apache.seatunnel.connectors.seatunnel.file.sink.writer.FileSinkTransactionFileNameGenerator; + +import org.apache.seatunnel.shade.com.typesafe.config.Config; + +import lombok.NonNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.List; +import java.util.Optional; + +public class FileSinkWriterWithTransaction implements SinkWriter { + private static final Logger LOGGER = LoggerFactory.getLogger(FileSinkWriterWithTransaction.class); + + private SeaTunnelRowType seaTunnelRowTypeInfo; + private Config pluginConfig; + private Context context; + private String jobId; + + private TransactionStateFileWriter fileWriter; + + private TextFileSinkConfig textFileSinkConfig; + + public FileSinkWriterWithTransaction(@NonNull SeaTunnelRowType seaTunnelRowTypeInfo, + @NonNull Config pluginConfig, + @NonNull SinkWriter.Context context, + @NonNull TextFileSinkConfig textFileSinkConfig, + @NonNull String jobId, + @NonNull SinkFileSystemPlugin sinkFileSystemPlugin) { + this.seaTunnelRowTypeInfo = seaTunnelRowTypeInfo; + this.pluginConfig = pluginConfig; + this.context = context; + this.jobId = jobId; + this.textFileSinkConfig = textFileSinkConfig; + + Optional transactionStateFileWriter = sinkFileSystemPlugin.getTransactionStateFileWriter(this.seaTunnelRowTypeInfo, + new FileSinkTransactionFileNameGenerator( + this.textFileSinkConfig.getFileFormat(), + this.textFileSinkConfig.getFileNameExpression(), + this.textFileSinkConfig.getFileNameTimeFormat()), + new FileSinkPartitionDirNameGenerator( + this.textFileSinkConfig.getPartitionFieldList(), + this.textFileSinkConfig.getPartitionFieldsIndexInRow(), + this.textFileSinkConfig.getPartitionDirExpression()), + this.textFileSinkConfig.getSinkColumnsIndexInRow(), + this.textFileSinkConfig.getTmpPath(), + this.textFileSinkConfig.getPath(), + this.jobId, + this.context.getIndexOfSubtask(), + this.textFileSinkConfig.getFieldDelimiter(), + this.textFileSinkConfig.getRowDelimiter(), + sinkFileSystemPlugin.getFileSystem().get()); + + if (!transactionStateFileWriter.isPresent()) { + throw new RuntimeException("A TransactionStateFileWriter is need"); + } + + this.fileWriter = transactionStateFileWriter.get(); + + fileWriter.beginTransaction(1L); + } + + public FileSinkWriterWithTransaction(@NonNull SeaTunnelRowType seaTunnelRowTypeInfo, + @NonNull Config pluginConfig, + @NonNull SinkWriter.Context context, + @NonNull TextFileSinkConfig textFileSinkConfig, + @NonNull String jobId, + @NonNull List fileSinkStates, + @NonNull SinkFileSystemPlugin sinkFileSystemPlugin) { + this.seaTunnelRowTypeInfo = seaTunnelRowTypeInfo; + this.pluginConfig = pluginConfig; + this.context = context; + this.jobId = jobId; + + Optional transactionStateFileWriter = sinkFileSystemPlugin.getTransactionStateFileWriter(this.seaTunnelRowTypeInfo, + new FileSinkTransactionFileNameGenerator( + this.textFileSinkConfig.getFileFormat(), + this.textFileSinkConfig.getFileNameExpression(), + this.textFileSinkConfig.getFileNameTimeFormat()), + new FileSinkPartitionDirNameGenerator( + this.textFileSinkConfig.getPartitionFieldList(), + this.textFileSinkConfig.getPartitionFieldsIndexInRow(), + this.textFileSinkConfig.getPartitionDirExpression()), + this.textFileSinkConfig.getSinkColumnsIndexInRow(), + this.textFileSinkConfig.getTmpPath(), + this.textFileSinkConfig.getPath(), + this.jobId, + this.context.getIndexOfSubtask(), + this.textFileSinkConfig.getFieldDelimiter(), + this.textFileSinkConfig.getRowDelimiter(), + sinkFileSystemPlugin.getFileSystem().get()); + + if (!transactionStateFileWriter.isPresent()) { + throw new RuntimeException("A TransactionStateFileWriter is need"); + } + + this.fileWriter = transactionStateFileWriter.get(); + + // Rollback dirty transaction + if (fileSinkStates.size() > 0) { + List transactionAfter = fileWriter.getTransactionAfter(fileSinkStates.get(0).getTransactionId()); + fileWriter.abortTransactions(transactionAfter); + } + fileWriter.beginTransaction(fileSinkStates.get(0).getCheckpointId() + 1); + } + + @Override + public void write(SeaTunnelRow element) throws IOException { + fileWriter.write(element); + } + + @Override + public Optional prepareCommit() throws IOException { + return fileWriter.prepareCommit(); + } + + @Override + public void abortPrepare() { + fileWriter.abortTransaction(); + } + + @Override + public void close() throws IOException { + fileWriter.finishAndCloseWriteFile(); + } + + @Override + public List snapshotState(long checkpointId) throws IOException { + List fileSinkStates = fileWriter.snapshotState(checkpointId); + fileWriter.beginTransaction(checkpointId); + return fileSinkStates; + } +} diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/TransactionStateFileSinkWriter.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/TransactionStateFileSinkWriter.java new file mode 100644 index 00000000000..0bdad1afef1 --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/TransactionStateFileSinkWriter.java @@ -0,0 +1,160 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.sink; + +import org.apache.seatunnel.api.sink.SinkWriter; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.connectors.seatunnel.file.sink.config.TextFileSinkConfig; +import org.apache.seatunnel.connectors.seatunnel.file.sink.spi.SinkFileSystemPlugin; +import org.apache.seatunnel.connectors.seatunnel.file.sink.transaction.TransactionStateFileWriter; +import org.apache.seatunnel.connectors.seatunnel.file.sink.writer.FileSinkPartitionDirNameGenerator; +import org.apache.seatunnel.connectors.seatunnel.file.sink.writer.FileSinkTransactionFileNameGenerator; + +import org.apache.seatunnel.shade.com.typesafe.config.Config; + +import lombok.NonNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.List; +import java.util.Optional; + +public class TransactionStateFileSinkWriter implements SinkWriter { + private static final Logger LOGGER = LoggerFactory.getLogger(TransactionStateFileSinkWriter.class); + + private SeaTunnelRowType seaTunnelRowTypeInfo; + private Config pluginConfig; + private Context context; + private String jobId; + + private TransactionStateFileWriter fileWriter; + + private TextFileSinkConfig textFileSinkConfig; + + public TransactionStateFileSinkWriter(@NonNull SeaTunnelRowType seaTunnelRowTypeInfo, + @NonNull Config pluginConfig, + @NonNull SinkWriter.Context context, + @NonNull TextFileSinkConfig textFileSinkConfig, + @NonNull String jobId, + @NonNull SinkFileSystemPlugin sinkFileSystemPlugin) { + this.seaTunnelRowTypeInfo = seaTunnelRowTypeInfo; + this.pluginConfig = pluginConfig; + this.context = context; + this.jobId = jobId; + this.textFileSinkConfig = textFileSinkConfig; + + Optional transactionStateFileWriter = sinkFileSystemPlugin.getTransactionStateFileWriter(this.seaTunnelRowTypeInfo, + new FileSinkTransactionFileNameGenerator( + this.textFileSinkConfig.getFileFormat(), + this.textFileSinkConfig.getFileNameExpression(), + this.textFileSinkConfig.getFileNameTimeFormat()), + new FileSinkPartitionDirNameGenerator( + this.textFileSinkConfig.getPartitionFieldList(), + this.textFileSinkConfig.getPartitionFieldsIndexInRow(), + this.textFileSinkConfig.getPartitionDirExpression()), + this.textFileSinkConfig.getSinkColumnsIndexInRow(), + this.textFileSinkConfig.getTmpPath(), + this.textFileSinkConfig.getPath(), + this.jobId, + this.context.getIndexOfSubtask(), + this.textFileSinkConfig.getFieldDelimiter(), + this.textFileSinkConfig.getRowDelimiter(), + sinkFileSystemPlugin.getFileSystem().get()); + + if (!transactionStateFileWriter.isPresent()) { + throw new RuntimeException("A TransactionStateFileWriter is need"); + } + + this.fileWriter = transactionStateFileWriter.get(); + + fileWriter.beginTransaction(1L); + } + + public TransactionStateFileSinkWriter(@NonNull SeaTunnelRowType seaTunnelRowTypeInfo, + @NonNull Config pluginConfig, + @NonNull SinkWriter.Context context, + @NonNull TextFileSinkConfig textFileSinkConfig, + @NonNull String jobId, + @NonNull List fileSinkStates, + @NonNull SinkFileSystemPlugin sinkFileSystemPlugin) { + this.seaTunnelRowTypeInfo = seaTunnelRowTypeInfo; + this.pluginConfig = pluginConfig; + this.context = context; + this.jobId = jobId; + + Optional transactionStateFileWriter = sinkFileSystemPlugin.getTransactionStateFileWriter(this.seaTunnelRowTypeInfo, + new FileSinkTransactionFileNameGenerator( + this.textFileSinkConfig.getFileFormat(), + this.textFileSinkConfig.getFileNameExpression(), + this.textFileSinkConfig.getFileNameTimeFormat()), + new FileSinkPartitionDirNameGenerator( + this.textFileSinkConfig.getPartitionFieldList(), + this.textFileSinkConfig.getPartitionFieldsIndexInRow(), + this.textFileSinkConfig.getPartitionDirExpression()), + this.textFileSinkConfig.getSinkColumnsIndexInRow(), + this.textFileSinkConfig.getTmpPath(), + this.textFileSinkConfig.getPath(), + this.jobId, + this.context.getIndexOfSubtask(), + this.textFileSinkConfig.getFieldDelimiter(), + this.textFileSinkConfig.getRowDelimiter(), + sinkFileSystemPlugin.getFileSystem().get()); + + if (!transactionStateFileWriter.isPresent()) { + throw new RuntimeException("A TransactionStateFileWriter is need"); + } + + this.fileWriter = transactionStateFileWriter.get(); + + // Rollback dirty transaction + if (fileSinkStates.size() > 0) { + List transactionAfter = fileWriter.getTransactionAfter(fileSinkStates.get(0).getTransactionId()); + fileWriter.abortTransactions(transactionAfter); + } + fileWriter.beginTransaction(fileSinkStates.get(0).getCheckpointId() + 1); + } + + @Override + public void write(SeaTunnelRow element) throws IOException { + fileWriter.write(element); + } + + @Override + public Optional prepareCommit() throws IOException { + return fileWriter.prepareCommit(); + } + + @Override + public void abortPrepare() { + fileWriter.abortTransaction(); + } + + @Override + public void close() throws IOException { + fileWriter.finishAndCloseWriteFile(); + } + + @Override + public List snapshotState(long checkpointId) throws IOException { + List fileSinkStates = fileWriter.snapshotState(checkpointId); + fileWriter.beginTransaction(checkpointId); + return fileSinkStates; + } +} diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/config/FileSystemType.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/config/FileSystemType.java new file mode 100644 index 00000000000..58c1ba15702 --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/config/FileSystemType.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.sink.config; + +import java.io.Serializable; + +public enum FileSystemType implements Serializable { + HDFS("HdfsFile"), + LOCAL("LocalFile"); + + private String sinkFileSystemPluginName; + + private FileSystemType(String sinkFileSystemPluginName) { + this.sinkFileSystemPluginName = sinkFileSystemPluginName; + } + + public String getSinkFileSystemPluginName() { + return sinkFileSystemPluginName; + } +} diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/config/SaveMode.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/config/SaveMode.java new file mode 100644 index 00000000000..d46a75c77ac --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/config/SaveMode.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.sink.config; + +import lombok.NonNull; + +import java.io.Serializable; +import java.util.Locale; + +public enum SaveMode implements Serializable { + APPEND(), + OVERWRITE(), + IGNORE(), + ERROR(); + + public static SaveMode fromStr(@NonNull String str) { + return SaveMode.valueOf(str.toUpperCase(Locale.ROOT)); + } +} diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/config/TextFileSinkConfig.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/config/TextFileSinkConfig.java new file mode 100644 index 00000000000..ce94ff84769 --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/config/TextFileSinkConfig.java @@ -0,0 +1,148 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.sink.config; + +import static com.google.common.base.Preconditions.checkArgument; + +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.connectors.seatunnel.file.config.BaseTextFileConfig; +import org.apache.seatunnel.connectors.seatunnel.file.config.Constant; +import org.apache.seatunnel.connectors.seatunnel.file.config.PartitionConfig; +import org.apache.seatunnel.connectors.seatunnel.file.sink.writer.FileSinkPartitionDirNameGenerator; + +import org.apache.seatunnel.shade.com.typesafe.config.Config; + +import lombok.Data; +import lombok.NonNull; +import org.apache.commons.collections4.CollectionUtils; +import org.apache.commons.lang3.StringUtils; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +@Data +public class TextFileSinkConfig extends BaseTextFileConfig implements PartitionConfig { + + private List sinkColumnList; + + private List partitionFieldList; + + /** + * default is ${k0}=${v0}/${k1}=${v1}/... {@link FileSinkPartitionDirNameGenerator#generatorPartitionDir(SeaTunnelRow)} ()} + */ + private String partitionDirExpression; + + private boolean isPartitionFieldWriteInFile = false; + + private String tmpPath = "/tmp/seatunnel"; + + private SaveMode saveMode = SaveMode.ERROR; + + private String fileNameTimeFormat = "yyyy.MM.dd"; + + private boolean isEnableTransaction = true; + + //---------------------generator by config params------------------- + + private List sinkColumnsIndexInRow; + + private List partitionFieldsIndexInRow; + + public TextFileSinkConfig(@NonNull Config config, @NonNull SeaTunnelRowType seaTunnelRowTypeInfo) { + super(config); + checkArgument(!CollectionUtils.isEmpty(Arrays.asList(seaTunnelRowTypeInfo.getFieldNames()))); + + if (config.hasPath(Constant.SINK_COLUMNS) && !CollectionUtils.isEmpty(config.getStringList(Constant.SINK_COLUMNS))) { + this.sinkColumnList = config.getStringList(Constant.SINK_COLUMNS); + } + + // if the config sink_columns is empty, all fields in SeaTunnelRowTypeInfo will being write + if (CollectionUtils.isEmpty(this.sinkColumnList)) { + this.sinkColumnList = Arrays.asList(seaTunnelRowTypeInfo.getFieldNames()); + } + + if (config.hasPath(Constant.PARTITION_BY) && !CollectionUtils.isEmpty(config.getStringList(Constant.PARTITION_BY))) { + this.partitionFieldList = config.getStringList(Constant.PARTITION_BY); + } + + if (config.hasPath(Constant.PARTITION_DIR_EXPRESSION) && !StringUtils.isBlank(config.getString(Constant.PARTITION_DIR_EXPRESSION))) { + this.partitionDirExpression = config.getString(Constant.PARTITION_DIR_EXPRESSION); + } + + if (config.hasPath(Constant.IS_PARTITION_FIELD_WRITE_IN_FILE) && config.getBoolean(Constant.IS_PARTITION_FIELD_WRITE_IN_FILE)) { + this.isPartitionFieldWriteInFile = config.getBoolean(Constant.IS_PARTITION_FIELD_WRITE_IN_FILE); + } + + if (config.hasPath(Constant.TMP_PATH) && !StringUtils.isBlank(config.getString(Constant.TMP_PATH))) { + this.tmpPath = config.getString(Constant.TMP_PATH); + } + + if (config.hasPath(Constant.SAVE_MODE) && !StringUtils.isBlank(config.getString(Constant.SAVE_MODE))) { + this.saveMode = SaveMode.fromStr(config.getString(Constant.SAVE_MODE)); + } + + if (config.hasPath(Constant.FILENAME_TIME_FORMAT) && !StringUtils.isBlank(config.getString(Constant.FILENAME_TIME_FORMAT))) { + this.fileNameTimeFormat = config.getString(Constant.FILENAME_TIME_FORMAT); + } + + if (config.hasPath(Constant.IS_ENABLE_TRANSACTION) && !config.getBoolean(Constant.IS_ENABLE_TRANSACTION)) { + this.isEnableTransaction = isEnableTransaction(); + } + + if (this.isEnableTransaction && !this.fileNameExpression.contains(Constant.TRANSACTION_EXPRESSION)) { + throw new RuntimeException("file_name_expression must contains " + Constant.TRANSACTION_EXPRESSION + " when is_enable_transaction is true"); + } + + // check partition field must in seaTunnelRowTypeInfo + if (!CollectionUtils.isEmpty(this.partitionFieldList) + && (CollectionUtils.isEmpty(this.sinkColumnList) || !this.sinkColumnList.containsAll(this.partitionFieldList))) { + throw new RuntimeException("partition fields must in sink columns"); + } + + if (!CollectionUtils.isEmpty(this.partitionFieldList) && !isPartitionFieldWriteInFile) { + if (!this.sinkColumnList.removeAll(this.partitionFieldList)) { + throw new RuntimeException("remove partition field from sink columns error"); + } + } + + if (CollectionUtils.isEmpty(this.sinkColumnList)) { + throw new RuntimeException("sink columns can not be empty"); + } + + Map columnsMap = new HashMap<>(seaTunnelRowTypeInfo.getFieldNames().length); + String[] fieldNames = seaTunnelRowTypeInfo.getFieldNames(); + for (int i = 0; i < fieldNames.length; i++) { + columnsMap.put(fieldNames[i], i); + } + + // init sink column index and partition field index, we will use the column index to found the data in SeaTunnelRow + this.sinkColumnsIndexInRow = this.sinkColumnList.stream() + .map(columnName -> columnsMap.get(columnName)) + .collect(Collectors.toList()); + + if (!CollectionUtils.isEmpty(this.partitionFieldList)) { + this.partitionFieldsIndexInRow = this.partitionFieldList.stream() + .map(columnName -> columnsMap.get(columnName)) + .collect(Collectors.toList()); + } + } +} diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/spi/FileSystem.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/spi/FileSystem.java new file mode 100644 index 00000000000..938a4108cee --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/spi/FileSystem.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.sink.spi; + +import java.io.IOException; +import java.io.Serializable; +import java.util.List; + +public interface FileSystem extends Serializable { + + void deleteFile(String path) throws IOException; + + List dirList(String dirPath) throws IOException; +} diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/spi/FileSystemCommitter.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/spi/FileSystemCommitter.java new file mode 100644 index 00000000000..4dcba5b21c9 --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/spi/FileSystemCommitter.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.sink.spi; + +import org.apache.seatunnel.connectors.seatunnel.file.sink.FileAggregatedCommitInfo; + +import lombok.NonNull; + +import java.io.IOException; +import java.io.Serializable; + +public interface FileSystemCommitter extends Serializable { + + void commitTransaction(@NonNull FileAggregatedCommitInfo fileAggregatedCommitInfo) throws IOException; + + void abortTransaction(@NonNull FileAggregatedCommitInfo fileAggregatedCommitInfo) throws IOException; +} diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/spi/SinkFileSystemPlugin.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/spi/SinkFileSystemPlugin.java new file mode 100644 index 00000000000..97c6ab9904a --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/spi/SinkFileSystemPlugin.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.sink.spi; + +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.connectors.seatunnel.file.sink.transaction.TransactionFileNameGenerator; +import org.apache.seatunnel.connectors.seatunnel.file.sink.transaction.TransactionStateFileWriter; +import org.apache.seatunnel.connectors.seatunnel.file.sink.writer.PartitionDirNameGenerator; + +import lombok.NonNull; + +import java.io.Serializable; +import java.util.List; +import java.util.Optional; + +public interface SinkFileSystemPlugin extends Serializable { + + String getPluginName(); + + /** + * Implements this method and return a class which is implement the interface {@link TransactionStateFileWriter} + * + * @return + */ + Optional getTransactionStateFileWriter(@NonNull SeaTunnelRowType seaTunnelRowTypeInfo, + @NonNull TransactionFileNameGenerator transactionFileNameGenerator, + @NonNull PartitionDirNameGenerator partitionDirNameGenerator, + @NonNull List sinkColumnsIndexInRow, + @NonNull String tmpPath, + @NonNull String targetPath, + @NonNull String jobId, + int subTaskIndex, + @NonNull String fieldDelimiter, + @NonNull String rowDelimiter, + @NonNull FileSystem fileSystem); + + Optional getFileSystemCommitter(); + + Optional getFileSystem(); +} diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/transaction/Transaction.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/transaction/Transaction.java new file mode 100644 index 00000000000..d9a39c5df3b --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/transaction/Transaction.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.sink.transaction; + +import org.apache.seatunnel.api.sink.SinkWriter; +import org.apache.seatunnel.connectors.seatunnel.file.sink.AbstractFileSink; +import org.apache.seatunnel.connectors.seatunnel.file.sink.FileCommitInfo; +import org.apache.seatunnel.connectors.seatunnel.file.sink.FileSinkAggregatedCommitter; +import org.apache.seatunnel.connectors.seatunnel.file.sink.FileSinkState; + +import lombok.NonNull; + +import java.io.Serializable; +import java.util.List; +import java.util.Optional; + +public interface Transaction extends Serializable { + /** + * A new transaction needs to be started after each checkpoint is completed. + * + * @param checkpointId A checkpoint indicates that all tasks have a status snapshot operation + * @return transactionId + */ + String beginTransaction(@NonNull Long checkpointId); + + /** + * Abort current Transaction, called when {@link org.apache.seatunnel.connectors.seatunnel.file.sink.TransactionStateFileSinkWriter#prepareCommit()} or {@link org.apache.seatunnel.connectors.seatunnel.file.sink.TransactionStateFileSinkWriter#snapshotState(long)} failed + */ + void abortTransaction(); + + /** + * Get all transactionIds after the @param transactionId + * This method called when {@link AbstractFileSink#restoreWriter(SinkWriter.Context, List)} + * We get the transactionId of the last successful commit from {@link FileSinkState} and + * then all transactionIds after this transactionId is dirty transactions that need to be rollback. + * + * @param transactionId The transactionId of the last successful commit get from {@link FileSinkState} + * @return transactionId list + */ + List getTransactionAfter(@NonNull String transactionId); + + /** + * Called by {@link org.apache.seatunnel.connectors.seatunnel.file.sink.TransactionStateFileSinkWriter#prepareCommit()} + * We should end the transaction in this method. After this method is called, the transaction will no longer accept data writing + * + * @return Return the commit information that can be commit in {@link FileSinkAggregatedCommitter#commit(List)} + */ + Optional prepareCommit(); + + /** + * rollback the transaction which is not be commit + * + * @param transactionIds transactionIds + */ + void abortTransactions(List transactionIds); +} diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/transaction/TransactionFileNameGenerator.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/transaction/TransactionFileNameGenerator.java new file mode 100644 index 00000000000..e976910bebe --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/transaction/TransactionFileNameGenerator.java @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.sink.transaction; + +import java.io.Serializable; + +public interface TransactionFileNameGenerator extends Serializable { + String generateFileName(String transactionId); +} diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/transaction/TransactionStateFileWriter.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/transaction/TransactionStateFileWriter.java new file mode 100644 index 00000000000..a1a66ec20a4 --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/transaction/TransactionStateFileWriter.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.sink.transaction; + +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.connectors.seatunnel.file.sink.FileSinkState; + +import lombok.NonNull; + +import java.util.List; + +public interface TransactionStateFileWriter extends Transaction { + void write(@NonNull SeaTunnelRow seaTunnelRow); + + /** + * In this method we need finish write the file. The following operations are often required: + * 1. Flush memory to disk. + * 2. Close output stream. + * 3. Add the mapping relationship between seatunnel file path and hive file path to needMoveFiles. + */ + void finishAndCloseWriteFile(); + + /** + * snapshotState + * @param checkpointId checkpointId + * @return + */ + List snapshotState(long checkpointId); +} diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/writer/AbstractTransactionStateFileWriter.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/writer/AbstractTransactionStateFileWriter.java new file mode 100644 index 00000000000..b14827b5671 --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/writer/AbstractTransactionStateFileWriter.java @@ -0,0 +1,201 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.sink.writer; + +import static com.google.common.base.Preconditions.checkArgument; + +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.connectors.seatunnel.file.config.Constant; +import org.apache.seatunnel.connectors.seatunnel.file.sink.FileCommitInfo; +import org.apache.seatunnel.connectors.seatunnel.file.sink.FileSinkState; +import org.apache.seatunnel.connectors.seatunnel.file.sink.spi.FileSystem; +import org.apache.seatunnel.connectors.seatunnel.file.sink.transaction.TransactionFileNameGenerator; +import org.apache.seatunnel.connectors.seatunnel.file.sink.transaction.TransactionStateFileWriter; + +import com.google.common.collect.Lists; +import lombok.NonNull; +import org.apache.commons.collections4.CollectionUtils; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.stream.Collectors; + +public abstract class AbstractTransactionStateFileWriter implements TransactionStateFileWriter { + protected Map needMoveFiles; + protected SeaTunnelRowType seaTunnelRowTypeInfo; + protected String jobId; + protected int subTaskIndex; + + protected Map beingWrittenFile; + + protected String transactionId; + + protected String transactionDir; + + private long checkpointId; + + private TransactionFileNameGenerator transactionFileNameGenerator; + + protected List sinkColumnsIndexInRow; + + private String targetPath; + + private String tmpPath; + + private PartitionDirNameGenerator partitionDirNameGenerator; + + private FileSystem fileSystem; + + private Map> partitionDirAndValsMap; + + public AbstractTransactionStateFileWriter(@NonNull SeaTunnelRowType seaTunnelRowTypeInfo, + @NonNull TransactionFileNameGenerator transactionFileNameGenerator, + @NonNull PartitionDirNameGenerator partitionDirNameGenerator, + @NonNull List sinkColumnsIndexInRow, + @NonNull String tmpPath, + @NonNull String targetPath, + @NonNull String jobId, + int subTaskIndex, + @NonNull FileSystem fileSystem) { + checkArgument(subTaskIndex > -1); + + this.seaTunnelRowTypeInfo = seaTunnelRowTypeInfo; + this.transactionFileNameGenerator = transactionFileNameGenerator; + this.sinkColumnsIndexInRow = sinkColumnsIndexInRow; + this.tmpPath = tmpPath; + this.targetPath = targetPath; + this.jobId = jobId; + this.subTaskIndex = subTaskIndex; + this.partitionDirNameGenerator = partitionDirNameGenerator; + this.fileSystem = fileSystem; + } + + public String getOrCreateFilePathBeingWritten(@NonNull SeaTunnelRow seaTunnelRow) { + Map> dataPartitionDirAndValsMap = this.partitionDirNameGenerator.generatorPartitionDir(seaTunnelRow); + String beingWrittenFileKey = dataPartitionDirAndValsMap.keySet().toArray()[0].toString(); + // get filePath from beingWrittenFile + String beingWrittenFilePath = beingWrittenFile.get(beingWrittenFileKey); + if (beingWrittenFilePath != null) { + return beingWrittenFilePath; + } else { + StringBuilder sbf = new StringBuilder(this.transactionDir); + sbf.append("/").append(beingWrittenFileKey).append("/").append(transactionFileNameGenerator.generateFileName(this.transactionId)); + String newBeingWrittenFilePath = sbf.toString(); + beingWrittenFile.put(beingWrittenFileKey, newBeingWrittenFilePath); + if (!Constant.NON_PARTITION.equals(dataPartitionDirAndValsMap.keySet().toArray()[0].toString())){ + partitionDirAndValsMap.putAll(dataPartitionDirAndValsMap); + } + return newBeingWrittenFilePath; + } + } + + public String getTargetLocation(@NonNull String seaTunnelFilePath) { + String tmpPath = seaTunnelFilePath.replaceAll(this.transactionDir, targetPath); + return tmpPath.replaceAll(Constant.NON_PARTITION + "/", ""); + } + + @Override + public String beginTransaction(@NonNull Long checkpointId) { + this.finishAndCloseWriteFile(); + this.transactionId = "T" + Constant.TRANSACTION_ID_SPLIT + jobId + Constant.TRANSACTION_ID_SPLIT + subTaskIndex + Constant.TRANSACTION_ID_SPLIT + checkpointId; + this.transactionDir = getTransactionDir(this.transactionId); + this.needMoveFiles = new HashMap<>(); + this.partitionDirAndValsMap = new HashMap<>(); + this.beingWrittenFile = new HashMap<>(); + this.beginTransaction(this.transactionId); + this.checkpointId = checkpointId; + return this.transactionId; + } + + private String getTransactionDir(@NonNull String transactionId) { + StringBuilder sbf = new StringBuilder(this.tmpPath); + sbf.append("/").append(Constant.SEATUNNEL).append("/").append(jobId).append("/").append(transactionId); + return sbf.toString(); + } + + public abstract void beginTransaction(String transactionId); + + @Override + public void abortTransaction() { + this.finishAndCloseWriteFile(); + //drop transaction dir + try { + abortTransaction(this.transactionId); + fileSystem.deleteFile(this.transactionDir); + } catch (IOException e) { + throw new RuntimeException("abort transaction " + this.transactionId + " error.", e); + } + } + + public abstract void abortTransaction(String transactionId); + + @Override + public List getTransactionAfter(@NonNull String transactionId) { + StringBuilder sbf = new StringBuilder(this.targetPath); + sbf.append("/").append(Constant.SEATUNNEL).append("/").append(jobId).append("/"); + String jobDir = sbf.toString(); + + //get all transaction dir + try { + List transactionDirList = fileSystem.dirList(jobDir); + List transactionIdList = transactionDirList.stream().map(dir -> dir.replaceAll(jobDir, "")).collect(Collectors.toList()); + return transactionIdList; + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + public Optional prepareCommit() { + this.finishAndCloseWriteFile(); + // this.needMoveFiles will be clear when beginTransaction, so we need copy the needMoveFiles. + Map commitMap = new HashMap<>(); + commitMap.putAll(this.needMoveFiles); + + Map> copyMap = this.partitionDirAndValsMap.entrySet().stream() + .collect(Collectors.toMap(e -> e.getKey(), e -> new ArrayList(e.getValue()))); + return Optional.of(new FileCommitInfo(commitMap, copyMap, this.transactionDir)); + } + + @Override + public void abortTransactions(List transactionIds) { + if (CollectionUtils.isEmpty(transactionIds)) { + return; + } + + transactionIds.stream().forEach(transactionId -> { + try { + abortTransaction(transactionId); + fileSystem.deleteFile(transactionId); + } catch (IOException e) { + throw new RuntimeException("abort transaction " + transactionId + " error.", e); + } + }); + } + + @Override + public List snapshotState(long checkpointId) { + ArrayList fileSinkStates = Lists.newArrayList(new FileSinkState(this.transactionId, this.checkpointId)); + return fileSinkStates; + } +} diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/writer/FileSinkPartitionDirNameGenerator.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/writer/FileSinkPartitionDirNameGenerator.java new file mode 100644 index 00000000000..a9175409fab --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/writer/FileSinkPartitionDirNameGenerator.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.sink.writer; + +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.common.utils.VariablesSubstitute; +import org.apache.seatunnel.connectors.seatunnel.file.config.Constant; + +import lombok.Data; +import org.apache.commons.collections4.CollectionUtils; +import org.apache.commons.lang3.StringUtils; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +@Data +public class FileSinkPartitionDirNameGenerator implements PartitionDirNameGenerator { + private List partitionFieldList; + + private List partitionFieldsIndexInRow; + + private String partitionDirExpression; + + private String[] keys; + + private String[] values; + + public FileSinkPartitionDirNameGenerator(List partitionFieldList, + List partitionFieldsIndexInRow, + String partitionDirExpression) { + this.partitionFieldList = partitionFieldList; + this.partitionFieldsIndexInRow = partitionFieldsIndexInRow; + this.partitionDirExpression = partitionDirExpression; + + if (!CollectionUtils.isEmpty(partitionFieldList)) { + keys = new String[partitionFieldList.size()]; + values = new String[partitionFieldList.size()]; + for (int i = 0; i < partitionFieldList.size(); i++) { + keys[i] = "k" + i; + values[i] = "v" + i; + } + } + } + + @Override + public Map> generatorPartitionDir(SeaTunnelRow seaTunnelRow) { + Map> partitionDirAndValsMap = new HashMap<>(1); + if (CollectionUtils.isEmpty(this.partitionFieldsIndexInRow)) { + partitionDirAndValsMap.put(Constant.NON_PARTITION, null); + return partitionDirAndValsMap; + } + + List vals = new ArrayList<>(partitionFieldsIndexInRow.size()); + String partitionDir; + if (StringUtils.isBlank(partitionDirExpression)) { + StringBuilder sbd = new StringBuilder(); + for (int i = 0; i < partitionFieldsIndexInRow.size(); i++) { + sbd.append(partitionFieldList.get(i)) + .append("=") + .append(seaTunnelRow.getFields()[partitionFieldsIndexInRow.get(i)]) + .append("/"); + vals.add(seaTunnelRow.getFields()[partitionFieldsIndexInRow.get(i)].toString()); + } + partitionDir = sbd.toString(); + } else { + Map valueMap = new HashMap<>(partitionFieldList.size() * 2); + for (int i = 0; i < partitionFieldsIndexInRow.size(); i++) { + valueMap.put(keys[i], partitionFieldList.get(i)); + valueMap.put(values[i], seaTunnelRow.getFields()[partitionFieldsIndexInRow.get(i)].toString()); + vals.add(seaTunnelRow.getFields()[partitionFieldsIndexInRow.get(i)].toString()); + } + partitionDir = VariablesSubstitute.substitute(partitionDirExpression, valueMap); + } + + partitionDirAndValsMap.put(partitionDir, vals); + return partitionDirAndValsMap; + } +} diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/writer/FileSinkTransactionFileNameGenerator.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/writer/FileSinkTransactionFileNameGenerator.java new file mode 100644 index 00000000000..ba005c7de49 --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/writer/FileSinkTransactionFileNameGenerator.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.sink.writer; + +import org.apache.seatunnel.common.utils.VariablesSubstitute; +import org.apache.seatunnel.connectors.seatunnel.file.config.Constant; +import org.apache.seatunnel.connectors.seatunnel.file.config.FileFormat; +import org.apache.seatunnel.connectors.seatunnel.file.sink.transaction.TransactionFileNameGenerator; + +import lombok.NonNull; +import org.apache.commons.lang3.StringUtils; + +import java.time.ZonedDateTime; +import java.time.format.DateTimeFormatter; +import java.util.HashMap; +import java.util.Map; +import java.util.UUID; + +public class FileSinkTransactionFileNameGenerator implements TransactionFileNameGenerator { + private FileFormat fileFormat; + + private String fileNameExpression; + + private String timeFormat; + + public FileSinkTransactionFileNameGenerator(@NonNull FileFormat fileFormat, + String fileNameExpression, + @NonNull String timeFormat) { + this.fileFormat = fileFormat; + this.fileNameExpression = fileNameExpression; + this.timeFormat = timeFormat; + } + + @Override + public String generateFileName(String transactionId) { + if (StringUtils.isBlank(fileNameExpression)) { + return transactionId + fileFormat.getSuffix(); + } + DateTimeFormatter df = DateTimeFormatter.ofPattern(timeFormat); + final String formattedDate = df.format(ZonedDateTime.now()); + + final Map valuesMap = new HashMap<>(4); + valuesMap.put("uuid", UUID.randomUUID().toString()); + valuesMap.put("now", formattedDate); + valuesMap.put(timeFormat, formattedDate); + valuesMap.put(Constant.TRANSACTION_EXPRESSION, transactionId); + String substitute = VariablesSubstitute.substitute(fileNameExpression, valuesMap); + return substitute + fileFormat.getSuffix(); + } +} diff --git a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/file/writer/FileWriter.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/writer/FileWriter.java similarity index 76% rename from seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/file/writer/FileWriter.java rename to seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/writer/FileWriter.java index 8ee8777a1e7..276c981fa64 100644 --- a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/file/writer/FileWriter.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/writer/FileWriter.java @@ -15,21 +15,18 @@ * limitations under the License. */ -package org.apache.seatunnel.connectors.seatunnel.hive.sink.file.writer; +package org.apache.seatunnel.connectors.seatunnel.file.sink.writer; import org.apache.seatunnel.api.table.type.SeaTunnelRow; import lombok.NonNull; -import java.util.Map; +import java.io.Serializable; -public interface FileWriter { +public interface FileWriter extends Serializable { void write(@NonNull SeaTunnelRow seaTunnelRow); - @NonNull - Map getNeedMoveFiles(); - /** * In this method we need finish write the file. The following operations are often required: * 1. Flush memory to disk. @@ -37,13 +34,4 @@ public interface FileWriter { * 3. Add the mapping relationship between seatunnel file path and hive file path to needMoveFiles. */ void finishAndCloseWriteFile(); - - /** - * The writer needs to be reset after each checkpoint is completed - * - * @param checkpointId checkpointId - */ - void resetFileWriter(@NonNull String checkpointId); - - void abort(); } diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/writer/PartitionDirNameGenerator.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/writer/PartitionDirNameGenerator.java new file mode 100644 index 00000000000..05c90256bda --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/writer/PartitionDirNameGenerator.java @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.sink.writer; + +import org.apache.seatunnel.api.table.type.SeaTunnelRow; + +import java.io.Serializable; +import java.util.List; +import java.util.Map; + +public interface PartitionDirNameGenerator extends Serializable { + Map> generatorPartitionDir(SeaTunnelRow seaTunnelRow); +} diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/writer/TestFileSinkPartitionDirNameGenerator.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/writer/TestFileSinkPartitionDirNameGenerator.java new file mode 100644 index 00000000000..0867f104ad0 --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/writer/TestFileSinkPartitionDirNameGenerator.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.writer; + +import org.apache.seatunnel.api.table.type.BasicType; +import org.apache.seatunnel.api.table.type.SeaTunnelDataType; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.connectors.seatunnel.file.config.Constant; +import org.apache.seatunnel.connectors.seatunnel.file.sink.writer.FileSinkPartitionDirNameGenerator; +import org.apache.seatunnel.connectors.seatunnel.file.sink.writer.PartitionDirNameGenerator; + +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.powermock.modules.junit4.PowerMockRunner; + +import java.util.ArrayList; +import java.util.List; + +@RunWith(PowerMockRunner.class) +public class TestFileSinkPartitionDirNameGenerator { + + @SuppressWarnings({"checkstyle:MagicNumber", "checkstyle:RegexpSingleline"}) + @Test + public void testPartitionDirNameGenerator() { + String[] fieldNames = new String[]{"c1", "c2", "c3", "c4"}; + SeaTunnelDataType[] seaTunnelDataTypes = new SeaTunnelDataType[]{BasicType.BOOLEAN_TYPE, BasicType.INT_TYPE, BasicType.STRING_TYPE, BasicType.INT_TYPE}; + SeaTunnelRowType seaTunnelRowTypeInfo = new SeaTunnelRowType(fieldNames, seaTunnelDataTypes); + + Object[] row1 = new Object[]{true, 1, "test", 3}; + SeaTunnelRow seaTunnelRow = new SeaTunnelRow(row1); + + List partitionFieldList = new ArrayList<>(); + partitionFieldList.add("c3"); + partitionFieldList.add("c4"); + + List partitionFieldsIndexInRow = new ArrayList<>(); + partitionFieldsIndexInRow.add(2); + partitionFieldsIndexInRow.add(3); + + PartitionDirNameGenerator partitionDirNameGenerator = new FileSinkPartitionDirNameGenerator(partitionFieldList, partitionFieldsIndexInRow, "${v0}/${v1}"); + String partitionDir = partitionDirNameGenerator.generatorPartitionDir(seaTunnelRow).keySet().toArray()[0].toString(); + Assert.assertEquals("test/3", partitionDir); + + partitionDirNameGenerator = new FileSinkPartitionDirNameGenerator(partitionFieldList, partitionFieldsIndexInRow, "${k0}=${v0}/${k1}=${v1}"); + partitionDir = partitionDirNameGenerator.generatorPartitionDir(seaTunnelRow).keySet().toArray()[0].toString(); + Assert.assertEquals("c3=test/c4=3", partitionDir); + + partitionDirNameGenerator = new FileSinkPartitionDirNameGenerator(null, null, "${k0}=${v0}/${k1}=${v1}"); + partitionDir = partitionDirNameGenerator.generatorPartitionDir(seaTunnelRow).keySet().toArray()[0].toString(); + Assert.assertEquals(Constant.NON_PARTITION, partitionDir); + } +} diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/writer/TestFileSinkTransactionFileNameGenerator.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/writer/TestFileSinkTransactionFileNameGenerator.java new file mode 100644 index 00000000000..275aee86317 --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/writer/TestFileSinkTransactionFileNameGenerator.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.writer; + +import org.apache.seatunnel.connectors.seatunnel.file.config.FileFormat; +import org.apache.seatunnel.connectors.seatunnel.file.sink.writer.FileSinkTransactionFileNameGenerator; + +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.powermock.modules.junit4.PowerMockRunner; + +import java.time.ZonedDateTime; +import java.time.format.DateTimeFormatter; + +@RunWith(PowerMockRunner.class) +public class TestFileSinkTransactionFileNameGenerator { + + @Test + public void testGenerateFileName() { + FileSinkTransactionFileNameGenerator fileNameGenerator = new FileSinkTransactionFileNameGenerator(FileFormat.TEXT, "test_${transactionId}_${uuid}_${now}", "yyyy.MM.dd"); + DateTimeFormatter df = DateTimeFormatter.ofPattern("yyyy.MM.dd"); + final String formattedDate = df.format(ZonedDateTime.now()); + String fileName = fileNameGenerator.generateFileName("T_12345678_1_0"); + Assert.assertTrue(fileName.startsWith("test_T_12345678_1_0_")); + Assert.assertTrue(fileName.endsWith(formattedDate + ".txt")); + + fileNameGenerator = new FileSinkTransactionFileNameGenerator(FileFormat.TEXT, null, "yyyy.MM.dd"); + fileName = fileNameGenerator.generateFileName("T_12345678_1_0"); + Assert.assertEquals("T_12345678_1_0.txt", fileName); + } +} diff --git a/seatunnel-connectors-v2/connector-file/connector-file-hadoop/pom.xml b/seatunnel-connectors-v2/connector-file/connector-file-hadoop/pom.xml new file mode 100644 index 00000000000..400f47d76d8 --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-hadoop/pom.xml @@ -0,0 +1,61 @@ + + + + + connector-file + org.apache.seatunnel + ${revision} + + 4.0.0 + + connector-file-hadoop + + + + org.apache.seatunnel + connector-file-base + ${project.version} + + + org.apache.flink + flink-shaded-hadoop-2 + ${flink-shaded-hadoop-2.version} + provided + + + junit + junit + test + + + + org.powermock + powermock-module-junit4 + test + + + org.powermock + powermock-api-mockito2 + test + + + \ No newline at end of file diff --git a/seatunnel-connectors-v2/connector-file/connector-file-hadoop/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/hdfs/HdfsFileSink.java b/seatunnel-connectors-v2/connector-file/connector-file-hadoop/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/hdfs/HdfsFileSink.java new file mode 100644 index 00000000000..240a4bbc753 --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-hadoop/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/hdfs/HdfsFileSink.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.sink.hdfs; + +import org.apache.seatunnel.api.sink.SeaTunnelSink; +import org.apache.seatunnel.connectors.seatunnel.file.sink.AbstractFileSink; +import org.apache.seatunnel.connectors.seatunnel.file.sink.spi.SinkFileSystemPlugin; + +import com.google.auto.service.AutoService; + +@AutoService(SeaTunnelSink.class) +public class HdfsFileSink extends AbstractFileSink { + @Override + public SinkFileSystemPlugin getSinkFileSystemPlugin() { + return new HdfsFileSinkPlugin(); + } +} diff --git a/seatunnel-connectors-v2/connector-file/connector-file-hadoop/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/hdfs/HdfsFileSinkPlugin.java b/seatunnel-connectors-v2/connector-file/connector-file-hadoop/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/hdfs/HdfsFileSinkPlugin.java new file mode 100644 index 00000000000..6e6c9380cf0 --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-hadoop/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/hdfs/HdfsFileSinkPlugin.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.sink.hdfs; + +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.connectors.seatunnel.file.sink.config.FileSystemType; +import org.apache.seatunnel.connectors.seatunnel.file.sink.spi.FileSystem; +import org.apache.seatunnel.connectors.seatunnel.file.sink.spi.FileSystemCommitter; +import org.apache.seatunnel.connectors.seatunnel.file.sink.spi.SinkFileSystemPlugin; +import org.apache.seatunnel.connectors.seatunnel.file.sink.transaction.TransactionFileNameGenerator; +import org.apache.seatunnel.connectors.seatunnel.file.sink.transaction.TransactionStateFileWriter; +import org.apache.seatunnel.connectors.seatunnel.file.sink.writer.PartitionDirNameGenerator; + +import lombok.NonNull; + +import java.util.List; +import java.util.Optional; + +public class HdfsFileSinkPlugin implements SinkFileSystemPlugin { + @Override + public String getPluginName() { + return FileSystemType.HDFS.getSinkFileSystemPluginName(); + } + + @Override + public Optional getTransactionStateFileWriter(@NonNull SeaTunnelRowType seaTunnelRowTypeInfo, + @NonNull TransactionFileNameGenerator transactionFileNameGenerator, + @NonNull PartitionDirNameGenerator partitionDirNameGenerator, + @NonNull List sinkColumnsIndexInRow, + @NonNull String tmpPath, + @NonNull String targetPath, + @NonNull String jobId, + int subTaskIndex, + @NonNull String fieldDelimiter, + @NonNull String rowDelimiter, + @NonNull FileSystem fileSystem) { + return Optional.of(new HdfsTxtTransactionStateFileWriter(seaTunnelRowTypeInfo, + transactionFileNameGenerator, + partitionDirNameGenerator, + sinkColumnsIndexInRow, + tmpPath, + targetPath, + jobId, + subTaskIndex, + fieldDelimiter, + rowDelimiter, + fileSystem)); + } + + @Override + public Optional getFileSystemCommitter() { + return Optional.of(new HdfsFileSystemCommitter()); + } + + @Override + public Optional getFileSystem() { + return Optional.of(new HdfsFileSystem()); + } +} diff --git a/seatunnel-connectors-v2/connector-file/connector-file-hadoop/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/hdfs/HdfsFileSystem.java b/seatunnel-connectors-v2/connector-file/connector-file-hadoop/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/hdfs/HdfsFileSystem.java new file mode 100644 index 00000000000..00d7c6f064f --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-hadoop/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/hdfs/HdfsFileSystem.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.sink.hdfs; + +import org.apache.seatunnel.connectors.seatunnel.file.sink.spi.FileSystem; + +import org.apache.hadoop.fs.Path; + +import java.io.IOException; +import java.util.List; +import java.util.stream.Collectors; + +public class HdfsFileSystem implements FileSystem { + @Override + public void deleteFile(String path) throws IOException { + HdfsUtils.deleteFile(path); + } + + @Override + public List dirList(String dirPath) throws IOException { + List paths = HdfsUtils.dirList(dirPath); + return paths.stream().map(dir -> dir.getName()).collect(Collectors.toList()); + } +} diff --git a/seatunnel-connectors-v2/connector-file/connector-file-hadoop/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/hdfs/HdfsFileSystemCommitter.java b/seatunnel-connectors-v2/connector-file/connector-file-hadoop/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/hdfs/HdfsFileSystemCommitter.java new file mode 100644 index 00000000000..69884720636 --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-hadoop/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/hdfs/HdfsFileSystemCommitter.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.sink.hdfs; + +import org.apache.seatunnel.connectors.seatunnel.file.sink.FileAggregatedCommitInfo; +import org.apache.seatunnel.connectors.seatunnel.file.sink.spi.FileSystemCommitter; + +import lombok.NonNull; + +import java.io.IOException; +import java.util.Map; + +public class HdfsFileSystemCommitter implements FileSystemCommitter { + @Override + public void commitTransaction(@NonNull FileAggregatedCommitInfo aggregateCommitInfo) throws IOException { + for (Map.Entry> entry : aggregateCommitInfo.getTransactionMap().entrySet()) { + for (Map.Entry mvFileEntry : entry.getValue().entrySet()) { + HdfsUtils.renameFile(mvFileEntry.getKey(), mvFileEntry.getValue(), true); + } + // delete the transaction dir + HdfsUtils.deleteFile(entry.getKey()); + } + } + + @Override + public void abortTransaction(@NonNull FileAggregatedCommitInfo aggregateCommitInfo) throws IOException { + for (Map.Entry> entry : aggregateCommitInfo.getTransactionMap().entrySet()) { + // rollback the file + for (Map.Entry mvFileEntry : entry.getValue().entrySet()) { + if (HdfsUtils.fileExist(mvFileEntry.getValue()) && !HdfsUtils.fileExist(mvFileEntry.getKey())) { + HdfsUtils.renameFile(mvFileEntry.getValue(), mvFileEntry.getKey(), true); + } + } + // delete the transaction dir + HdfsUtils.deleteFile(entry.getKey()); + } + } +} diff --git a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/file/writer/HdfsTxtFileWriter.java b/seatunnel-connectors-v2/connector-file/connector-file-hadoop/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/hdfs/HdfsTxtTransactionStateFileWriter.java similarity index 57% rename from seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/file/writer/HdfsTxtFileWriter.java rename to seatunnel-connectors-v2/connector-file/connector-file-hadoop/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/hdfs/HdfsTxtTransactionStateFileWriter.java index 71b26568fd7..81882c414c6 100644 --- a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/file/writer/HdfsTxtFileWriter.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-hadoop/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/hdfs/HdfsTxtTransactionStateFileWriter.java @@ -15,98 +15,75 @@ * limitations under the License. */ -package org.apache.seatunnel.connectors.seatunnel.hive.sink.file.writer; +package org.apache.seatunnel.connectors.seatunnel.file.sink.hdfs; import org.apache.seatunnel.api.table.type.SeaTunnelRow; import org.apache.seatunnel.api.table.type.SeaTunnelRowType; -import org.apache.seatunnel.connectors.seatunnel.hive.sink.HiveSinkConfig; +import org.apache.seatunnel.connectors.seatunnel.file.sink.spi.FileSystem; +import org.apache.seatunnel.connectors.seatunnel.file.sink.transaction.TransactionFileNameGenerator; +import org.apache.seatunnel.connectors.seatunnel.file.sink.writer.AbstractTransactionStateFileWriter; +import org.apache.seatunnel.connectors.seatunnel.file.sink.writer.PartitionDirNameGenerator; -import lombok.Lombok; import lombok.NonNull; import org.apache.hadoop.fs.FSDataOutputStream; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; -import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.stream.Collectors; -import java.util.stream.IntStream; -public class HdfsTxtFileWriter extends AbstractFileWriter { - private static final Logger LOGGER = LoggerFactory.getLogger(HdfsTxtFileWriter.class); +public class HdfsTxtTransactionStateFileWriter extends AbstractTransactionStateFileWriter { + private static final Logger LOGGER = LoggerFactory.getLogger(HdfsTxtTransactionStateFileWriter.class); private Map beingWrittenOutputStream; - protected final int[] sinkColumnIndexes; - public HdfsTxtFileWriter(SeaTunnelRowType seaTunnelRowType, - HiveSinkConfig hiveSinkConfig, - long sinkId, - int subTaskIndex) { - super(seaTunnelRowType, hiveSinkConfig, sinkId, subTaskIndex); + private String fieldDelimiter; + private String rowDelimiter; + + public HdfsTxtTransactionStateFileWriter(@NonNull SeaTunnelRowType seaTunnelRowTypeInfo, + @NonNull TransactionFileNameGenerator transactionFileNameGenerator, + @NonNull PartitionDirNameGenerator partitionDirNameGenerator, + @NonNull List sinkColumnsIndexInRow, + @NonNull String tmpPath, + @NonNull String targetPath, + @NonNull String jobId, + int subTaskIndex, + @NonNull String fieldDelimiter, + @NonNull String rowDelimiter, + @NonNull FileSystem fileSystem) { + super(seaTunnelRowTypeInfo, transactionFileNameGenerator, partitionDirNameGenerator, sinkColumnsIndexInRow, tmpPath, targetPath, jobId, subTaskIndex, fileSystem); + + this.fieldDelimiter = fieldDelimiter; + this.rowDelimiter = rowDelimiter; beingWrittenOutputStream = new HashMap<>(); - List sinkColumns = hiveSinkConfig.getSinkColumns(); - if (sinkColumns == null || sinkColumns.size() == 0) { - this.sinkColumnIndexes = IntStream.range(0, seaTunnelRowType.getTotalFields()).toArray(); - } else { - this.sinkColumnIndexes = IntStream.range(0, seaTunnelRowType.getTotalFields()) - .filter(i -> sinkColumns.contains(seaTunnelRowType.getFieldName(i))) - .toArray(); - } - } - - @Override - @NonNull - public String getFileSuffix() { - return "txt"; } @Override - public void resetMoreFileWriter(@NonNull String checkpointId) { + public void beginTransaction(String transactionId) { this.beingWrittenOutputStream = new HashMap<>(); } @Override - public void abortMore() { - // delete files - beingWrittenOutputStream.keySet().stream().forEach(file -> { - try { - boolean deleted = HdfsUtils.deleteFile(file); - if (!deleted) { - LOGGER.error("delete file {} error", file); - throw new IOException(String.format("delete file {} error", file)); - } - } catch (IOException e) { - LOGGER.error("delete file {} error", file); - throw new RuntimeException(e); - } - }); - + public void abortTransaction(String transactionId) { this.beingWrittenOutputStream = new HashMap<>(); } @Override public void write(@NonNull SeaTunnelRow seaTunnelRow) { - Lombok.checkNotNull(seaTunnelRow, "seaTunnelRow is null"); String filePath = getOrCreateFilePathBeingWritten(seaTunnelRow); FSDataOutputStream fsDataOutputStream = getOrCreateOutputStream(filePath); String line = transformRowToLine(seaTunnelRow); try { fsDataOutputStream.write(line.getBytes()); - fsDataOutputStream.write(hiveSinkConfig.getHiveTxtFileLineDelimiter().getBytes()); + fsDataOutputStream.write(rowDelimiter.getBytes()); } catch (IOException e) { LOGGER.error("write data to file {} error", filePath); throw new RuntimeException(e); } } - @NonNull - @Override - public Map getNeedMoveFiles() { - return this.needMoveFiles; - } - @Override public void finishAndCloseWriteFile() { beingWrittenOutputStream.entrySet().forEach(entry -> { @@ -123,7 +100,7 @@ public void finishAndCloseWriteFile() { } } - needMoveFiles.put(entry.getKey(), getHiveLocation(entry.getKey())); + needMoveFiles.put(entry.getKey(), getTargetLocation(entry.getKey())); }); } @@ -142,10 +119,6 @@ private FSDataOutputStream getOrCreateOutputStream(@NonNull String filePath) { } private String transformRowToLine(@NonNull SeaTunnelRow seaTunnelRow) { - return Arrays.stream(sinkColumnIndexes) - .boxed() - .map(seaTunnelRow::getField) - .map(value -> value == null ? "" : value.toString()) - .collect(Collectors.joining(hiveSinkConfig.getHiveTxtFileFieldDelimiter())); + return this.sinkColumnsIndexInRow.stream().map(index -> seaTunnelRow.getFields()[index] == null ? "" : seaTunnelRow.getFields()[index].toString()).collect(Collectors.joining(fieldDelimiter)); } } diff --git a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/file/writer/HdfsUtils.java b/seatunnel-connectors-v2/connector-file/connector-file-hadoop/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/hdfs/HdfsUtils.java similarity index 60% rename from seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/file/writer/HdfsUtils.java rename to seatunnel-connectors-v2/connector-file/connector-file-hadoop/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/hdfs/HdfsUtils.java index 23b1e5843f2..421c7f7ebbd 100644 --- a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/file/writer/HdfsUtils.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-hadoop/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/hdfs/HdfsUtils.java @@ -15,17 +15,22 @@ * limitations under the License. */ -package org.apache.seatunnel.connectors.seatunnel.hive.sink.file.writer; +package org.apache.seatunnel.connectors.seatunnel.file.sink.hdfs; import lombok.NonNull; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.FileNotFoundException; import java.io.IOException; +import java.net.URI; +import java.util.ArrayList; +import java.util.List; public class HdfsUtils { private static final Logger LOGGER = LoggerFactory.getLogger(HdfsUtils.class); @@ -35,9 +40,11 @@ public class HdfsUtils { public static FileSystem getHdfsFs(@NonNull String path) throws IOException { Configuration conf = new Configuration(); + LOGGER.info(System.getenv("HADOOP_CONF_DIR")); + conf.addResource(new Path(System.getenv("HADOOP_CONF_DIR") + "/core-site.xml")); + conf.addResource(new Path(System.getenv("HADOOP_CONF_DIR") + "/hdfs-site.xml")); conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem"); - conf.set("fs.defaultFs", path); - return FileSystem.get(conf); + return FileSystem.get(URI.create(path), conf); } public static FSDataOutputStream getOutputStream(@NonNull String outFilePath) throws IOException { @@ -47,9 +54,19 @@ public static FSDataOutputStream getOutputStream(@NonNull String outFilePath) th return fsDataOutputStream; } - public static boolean deleteFile(@NonNull String file) throws IOException { + public static void createFile(@NonNull String filePath) throws IOException { + FileSystem hdfsFs = getHdfsFs(filePath); + Path path = new Path(filePath); + if (!hdfsFs.createNewFile(path)) { + throw new IOException("create file " + filePath + " error"); + } + } + + public static void deleteFile(@NonNull String file) throws IOException { FileSystem hdfsFs = getHdfsFs(file); - return hdfsFs.delete(new Path(file), true); + if (!hdfsFs.delete(new Path(file), true)) { + throw new IOException("delete file " + file + " error"); + } } /** @@ -74,17 +91,22 @@ public static void renameFile(@NonNull String oldName, @NonNull String newName, if (!fileExist(newName.substring(0, newName.lastIndexOf("/")))) { createDir(newName.substring(0, newName.lastIndexOf("/"))); } - LOGGER.info("rename file :[" + oldPath + "] to [" + newPath + "] finish"); - hdfsFs.rename(oldPath, newPath); + if (hdfsFs.rename(oldPath, newPath)) { + LOGGER.info("rename file :[" + oldPath + "] to [" + newPath + "] finish"); + } else { + throw new IOException("rename file :[" + oldPath + "] to [" + newPath + "] error"); + } } - public static boolean createDir(@NonNull String filePath) + public static void createDir(@NonNull String filePath) throws IOException { FileSystem hdfsFs = getHdfsFs(filePath); Path dfs = new Path(filePath); - return hdfsFs.mkdirs(dfs); + if (!hdfsFs.mkdirs(dfs)) { + throw new IOException("create dir " + filePath + " error"); + } } public static boolean fileExist(@NonNull String filePath) @@ -93,4 +115,23 @@ public static boolean fileExist(@NonNull String filePath) Path fileName = new Path(filePath); return hdfsFs.exists(fileName); } + + /** + * get the dir in filePath + */ + public static List dirList(@NonNull String filePath) + throws FileNotFoundException, IOException { + FileSystem hdfsFs = getHdfsFs(filePath); + List pathList = new ArrayList(); + Path fileName = new Path(filePath); + FileStatus[] status = hdfsFs.listStatus(fileName); + if (status != null && status.length > 0) { + for (FileStatus fileStatus : status) { + if (fileStatus.isDirectory()) { + pathList.add(fileStatus.getPath()); + } + } + } + return pathList; + } } diff --git a/seatunnel-connectors-v2/connector-file/connector-file-hadoop/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/sink/hdfs/FileSinkAggregatedCommitterTest.java b/seatunnel-connectors-v2/connector-file/connector-file-hadoop/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/sink/hdfs/FileSinkAggregatedCommitterTest.java new file mode 100644 index 00000000000..3262ccd1cb2 --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-hadoop/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/sink/hdfs/FileSinkAggregatedCommitterTest.java @@ -0,0 +1,142 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.sink.hdfs; + +import org.apache.seatunnel.connectors.seatunnel.file.sink.FileAggregatedCommitInfo; +import org.apache.seatunnel.connectors.seatunnel.file.sink.FileCommitInfo; +import org.apache.seatunnel.connectors.seatunnel.file.sink.FileSinkAggregatedCommitter; + +import org.junit.Assert; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.stream.Collectors; + +public class FileSinkAggregatedCommitterTest { + @SuppressWarnings("checkstyle:UnnecessaryParentheses") + @Test + public void testCommit() throws Exception { + FileSinkAggregatedCommitter fileSinkAggregatedCommitter = new FileSinkAggregatedCommitter(new HdfsFileSystemCommitter()); + Map> transactionFiles = new HashMap<>(); + Random random = new Random(); + Long jobId = random.nextLong(); + String transactionDir = String.format("/tmp/seatunnel/seatunnel/%s/T_%s_0_1", jobId, jobId); + String targetDir = String.format("/tmp/hive/warehouse/%s", jobId); + Map needMoveFiles = new HashMap<>(); + needMoveFiles.put(transactionDir + "/c3=4/c4=rrr/test1.txt", targetDir + "/c3=4/c4=rrr/test1.txt"); + needMoveFiles.put(transactionDir + "/c3=4/c4=bbb/test1.txt", targetDir + "/c3=4/c4=bbb/test1.txt"); + HdfsUtils.createFile(transactionDir + "/c3=4/c4=rrr/test1.txt"); + HdfsUtils.createFile(transactionDir + "/c3=4/c4=bbb/test1.txt"); + + transactionFiles.put(transactionDir, needMoveFiles); + + Map> partitionDirAndVals = new HashMap<>(); + partitionDirAndVals.put("/c3=4/c4=rrr", Arrays.stream((new String[]{"4", "rrr"})).collect(Collectors.toList())); + partitionDirAndVals.put("/c3=4/c4=bbb", Arrays.stream((new String[]{"4", "bbb"})).collect(Collectors.toList())); + + FileAggregatedCommitInfo fileAggregatedCommitInfo = new FileAggregatedCommitInfo(transactionFiles, partitionDirAndVals); + List fileAggregatedCommitInfoList = new ArrayList<>(); + fileAggregatedCommitInfoList.add(fileAggregatedCommitInfo); + fileSinkAggregatedCommitter.commit(fileAggregatedCommitInfoList); + + Assert.assertTrue(HdfsUtils.fileExist(targetDir + "/c3=4/c4=bbb/test1.txt")); + Assert.assertTrue(HdfsUtils.fileExist(targetDir + "/c3=4/c4=rrr/test1.txt")); + Assert.assertTrue(!HdfsUtils.fileExist(transactionDir)); + } + + @SuppressWarnings("checkstyle:UnnecessaryParentheses") + @Test + public void testCombine() throws Exception { + FileSinkAggregatedCommitter fileSinkAggregatedCommitter = new FileSinkAggregatedCommitter(new HdfsFileSystemCommitter()); + Map> transactionFiles = new HashMap<>(); + Random random = new Random(); + Long jobId = random.nextLong(); + String transactionDir = String.format("/tmp/seatunnel/seatunnel/%s/T_%s_0_1", jobId, jobId); + String targetDir = String.format("/tmp/hive/warehouse/%s", jobId); + Map needMoveFiles = new HashMap<>(); + needMoveFiles.put(transactionDir + "/c3=3/c4=rrr/test1.txt", targetDir + "/c3=3/c4=rrr/test1.txt"); + needMoveFiles.put(transactionDir + "/c3=4/c4=bbb/test1.txt", targetDir + "/c3=4/c4=bbb/test1.txt"); + Map> partitionDirAndVals = new HashMap<>(); + partitionDirAndVals.put("/c3=3/c4=rrr", Arrays.stream((new String[]{"3", "rrr"})).collect(Collectors.toList())); + partitionDirAndVals.put("/c3=4/c4=bbb", Arrays.stream((new String[]{"4", "bbb"})).collect(Collectors.toList())); + FileCommitInfo fileCommitInfo = new FileCommitInfo(needMoveFiles, partitionDirAndVals, transactionDir); + HdfsUtils.createFile(transactionDir + "/c3=3/c4=rrr/test1.txt"); + HdfsUtils.createFile(transactionDir + "/c3=4/c4=bbb/test1.txt"); + + Map needMoveFiles1 = new HashMap<>(); + needMoveFiles1.put(transactionDir + "/c3=4/c4=rrr/test2.txt", targetDir + "/c3=4/c4=rrr/test2.txt"); + needMoveFiles1.put(transactionDir + "/c3=4/c4=bbb/test2.txt", targetDir + "/c3=4/c4=bbb/test2.txt"); + Map> partitionDirAndVals1 = new HashMap<>(); + partitionDirAndVals.put("/c3=4/c4=rrr", Arrays.stream((new String[]{"4", "rrr"})).collect(Collectors.toList())); + partitionDirAndVals.put("/c3=4/c4=bbb", Arrays.stream((new String[]{"4", "bbb"})).collect(Collectors.toList())); + FileCommitInfo fileCommitInfo1 = new FileCommitInfo(needMoveFiles1, partitionDirAndVals1, transactionDir); + List fileCommitInfoList = new ArrayList<>(); + fileCommitInfoList.add(fileCommitInfo); + fileCommitInfoList.add(fileCommitInfo1); + + FileAggregatedCommitInfo combine = fileSinkAggregatedCommitter.combine(fileCommitInfoList); + Assert.assertEquals(1, combine.getTransactionMap().size()); + Assert.assertEquals(4, combine.getTransactionMap().get(transactionDir).size()); + Assert.assertEquals(targetDir + "/c3=3/c4=rrr/test1.txt", combine.getTransactionMap().get(transactionDir).get(transactionDir + "/c3=3/c4=rrr/test1.txt")); + Assert.assertEquals(targetDir + "/c3=4/c4=bbb/test1.txt", combine.getTransactionMap().get(transactionDir).get(transactionDir + "/c3=4/c4=bbb/test1.txt")); + Assert.assertEquals(targetDir + "/c3=4/c4=rrr/test2.txt", combine.getTransactionMap().get(transactionDir).get(transactionDir + "/c3=4/c4=rrr/test2.txt")); + Assert.assertEquals(targetDir + "/c3=4/c4=bbb/test2.txt", combine.getTransactionMap().get(transactionDir).get(transactionDir + "/c3=4/c4=bbb/test2.txt")); + Assert.assertEquals(3, combine.getPartitionDirAndValsMap().keySet().size()); + } + + @SuppressWarnings("checkstyle:UnnecessaryParentheses") + @Test + public void testAbort() throws Exception { + FileSinkAggregatedCommitter fileSinkAggregatedCommitter = new FileSinkAggregatedCommitter(new HdfsFileSystemCommitter()); + Map> transactionFiles = new HashMap<>(); + Random random = new Random(); + Long jobId = random.nextLong(); + String transactionDir = String.format("/tmp/seatunnel/seatunnel/%s/T_%s_0_1", jobId, jobId); + String targetDir = String.format("/tmp/hive/warehouse/%s", jobId); + Map needMoveFiles = new HashMap<>(); + needMoveFiles.put(transactionDir + "/c3=4/c4=rrr/test1.txt", targetDir + "/c3=4/c4=rrr/test1.txt"); + needMoveFiles.put(transactionDir + "/c3=4/c4=bbb/test1.txt", targetDir + "/c3=4/c4=bbb/test1.txt"); + Map> partitionDirAndVals = new HashMap<>(); + partitionDirAndVals.put("/c3=4/c4=rrr", Arrays.stream((new String[]{"4", "rrr"})).collect(Collectors.toList())); + partitionDirAndVals.put("/c3=4/c4=bbb", Arrays.stream((new String[]{"4", "bbb"})).collect(Collectors.toList())); + HdfsUtils.createFile(transactionDir + "/c3=4/c4=rrr/test1.txt"); + HdfsUtils.createFile(transactionDir + "/c3=4/c4=bbb/test1.txt"); + + transactionFiles.put(transactionDir, needMoveFiles); + FileAggregatedCommitInfo fileAggregatedCommitInfo = new FileAggregatedCommitInfo(transactionFiles, partitionDirAndVals); + List fileAggregatedCommitInfoList = new ArrayList<>(); + fileAggregatedCommitInfoList.add(fileAggregatedCommitInfo); + fileSinkAggregatedCommitter.commit(fileAggregatedCommitInfoList); + + Assert.assertTrue(HdfsUtils.fileExist(targetDir + "/c3=4/c4=bbb/test1.txt")); + Assert.assertTrue(HdfsUtils.fileExist(targetDir + "/c3=4/c4=rrr/test1.txt")); + Assert.assertTrue(!HdfsUtils.fileExist(transactionDir)); + + fileSinkAggregatedCommitter.abort(fileAggregatedCommitInfoList); + Assert.assertTrue(!HdfsUtils.fileExist(targetDir + "/c3=4/c4=bbb/test1.txt")); + Assert.assertTrue(!HdfsUtils.fileExist(targetDir + "/c3=4/c4=rrr/test1.txt")); + + // transactionDir will being delete when abort + Assert.assertTrue(!HdfsUtils.fileExist(transactionDir)); + } +} diff --git a/seatunnel-connectors-v2/connector-file/connector-file-hadoop/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/sink/hdfs/TestHdfsTxtTransactionStateFileWriter.java b/seatunnel-connectors-v2/connector-file/connector-file-hadoop/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/sink/hdfs/TestHdfsTxtTransactionStateFileWriter.java new file mode 100644 index 00000000000..d8633e86d16 --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-hadoop/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/sink/hdfs/TestHdfsTxtTransactionStateFileWriter.java @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.sink.hdfs; + +import org.apache.seatunnel.api.table.type.BasicType; +import org.apache.seatunnel.api.table.type.SeaTunnelDataType; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.connectors.seatunnel.file.config.FileFormat; +import org.apache.seatunnel.connectors.seatunnel.file.sink.FileCommitInfo; +import org.apache.seatunnel.connectors.seatunnel.file.sink.transaction.TransactionStateFileWriter; +import org.apache.seatunnel.connectors.seatunnel.file.sink.writer.FileSinkPartitionDirNameGenerator; +import org.apache.seatunnel.connectors.seatunnel.file.sink.writer.FileSinkTransactionFileNameGenerator; + +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +@RunWith(JUnit4.class) +public class TestHdfsTxtTransactionStateFileWriter { + + @SuppressWarnings("checkstyle:MagicNumber") + @Test + public void testHdfsTextTransactionStateFileWriter() throws Exception { + String[] fieldNames = new String[]{"c1", "c2", "c3", "c4"}; + SeaTunnelDataType[] seaTunnelDataTypes = new SeaTunnelDataType[]{BasicType.BOOLEAN_TYPE, BasicType.INT_TYPE, BasicType.STRING_TYPE, BasicType.INT_TYPE}; + SeaTunnelRowType seaTunnelRowTypeInfo = new SeaTunnelRowType(fieldNames, seaTunnelDataTypes); + + List sinkColumnIndexInRow = new ArrayList<>(); + sinkColumnIndexInRow.add(0); + sinkColumnIndexInRow.add(1); + + List hivePartitionFieldList = new ArrayList<>(); + hivePartitionFieldList.add("c3"); + hivePartitionFieldList.add("c4"); + + List partitionFieldIndexInRow = new ArrayList<>(); + partitionFieldIndexInRow.add(2); + partitionFieldIndexInRow.add(3); + + String jobId = System.currentTimeMillis() + ""; + String targetPath = "/tmp/hive/warehouse/seatunnel.db/test1"; + String tmpPath = "/tmp/seatunnel"; + + TransactionStateFileWriter fileWriter = new HdfsTxtTransactionStateFileWriter(seaTunnelRowTypeInfo, + new FileSinkTransactionFileNameGenerator(FileFormat.TEXT, null, "yyyy.MM.dd"), + new FileSinkPartitionDirNameGenerator(hivePartitionFieldList, partitionFieldIndexInRow, "${k0}=${v0}/${k1}=${v1}"), + sinkColumnIndexInRow, + tmpPath, + targetPath, + jobId, + 0, + String.valueOf('\001'), + "\n", + new HdfsFileSystem()); + + String transactionId = fileWriter.beginTransaction(1L); + + SeaTunnelRow seaTunnelRow = new SeaTunnelRow(new Object[]{true, 1, "str1", "str2"}); + fileWriter.write(seaTunnelRow); + + SeaTunnelRow seaTunnelRow1 = new SeaTunnelRow(new Object[]{true, 1, "str1", "str3"}); + fileWriter.write(seaTunnelRow1); + + Optional fileCommitInfoOptional = fileWriter.prepareCommit(); + //check file exists and file content + Assert.assertTrue(fileCommitInfoOptional.isPresent()); + FileCommitInfo fileCommitInfo = fileCommitInfoOptional.get(); + String transactionDir = tmpPath + "/seatunnel/" + jobId + "/" + transactionId; + Assert.assertEquals(transactionDir, fileCommitInfo.getTransactionDir()); + Assert.assertEquals(2, fileCommitInfo.getNeedMoveFiles().size()); + Map needMoveFiles = fileCommitInfo.getNeedMoveFiles(); + Assert.assertEquals(targetPath + "/c3=str1/c4=str2/" + transactionId + ".txt", needMoveFiles.get(transactionDir + "/c3=str1/c4=str2/" + transactionId + ".txt")); + Assert.assertEquals(targetPath + "/c3=str1/c4=str3/" + transactionId + ".txt", needMoveFiles.get(transactionDir + "/c3=str1/c4=str3/" + transactionId + ".txt")); + + Map> partitionDirAndValsMap = fileCommitInfo.getPartitionDirAndValsMap(); + Assert.assertEquals(2, partitionDirAndValsMap.size()); + Assert.assertTrue(partitionDirAndValsMap.keySet().contains("c3=str1/c4=str2")); + Assert.assertTrue(partitionDirAndValsMap.keySet().contains("c3=str1/c4=str3")); + Assert.assertTrue(partitionDirAndValsMap.get("c3=str1/c4=str2").size() == 2); + Assert.assertEquals("str1", partitionDirAndValsMap.get("c3=str1/c4=str2").get(0)); + Assert.assertEquals("str2", partitionDirAndValsMap.get("c3=str1/c4=str2").get(1)); + Assert.assertEquals("str1", partitionDirAndValsMap.get("c3=str1/c4=str3").get(0)); + Assert.assertEquals("str3", partitionDirAndValsMap.get("c3=str1/c4=str3").get(1)); + } +} diff --git a/seatunnel-connectors-v2/connector-file/connector-file-local/pom.xml b/seatunnel-connectors-v2/connector-file/connector-file-local/pom.xml new file mode 100644 index 00000000000..1ac5bb77b55 --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-local/pom.xml @@ -0,0 +1,54 @@ + + + + + connector-file + org.apache.seatunnel + ${revision} + + 4.0.0 + + connector-file-local + + + + org.apache.seatunnel + connector-file-base + ${project.version} + + + junit + junit + test + + + org.powermock + powermock-module-junit4 + test + + + org.powermock + powermock-api-mockito2 + test + + + \ No newline at end of file diff --git a/seatunnel-connectors-v2/connector-file/connector-file-local/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/local/FileUtils.java b/seatunnel-connectors-v2/connector-file/connector-file-local/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/local/FileUtils.java new file mode 100644 index 00000000000..b951ff8eab9 --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-local/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/local/FileUtils.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.sink.local; + +import lombok.NonNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; + +public class FileUtils { + private static final Logger LOGGER = LoggerFactory.getLogger(FileUtils.class); + public static File createDir(@NonNull String dirPath) { + if (dirPath == null || "".equals(dirPath)) { + return null; + } + File file = new File(dirPath); + if (!file.exists() || !file.isDirectory()) { + file.mkdirs(); + } + return file; + } + + public static File createFile(@NonNull String filePath) throws IOException { + if (filePath == null || "".equals(filePath)) { + return null; + } + File file = new File(filePath); + if (!file.getParentFile().exists()) { + file.getParentFile().mkdirs(); + } + + if (!file.exists() || !file.isFile()) { + file.createNewFile(); + } + return file; + } + + public static boolean fileExist(@NonNull String filePath) { + File file = new File(filePath); + return file.exists(); + } + + public static void renameFile(@NonNull String oldName, @NonNull String newName) throws IOException { + LOGGER.info("begin rename file oldName :[" + oldName + "] to newName :[" + newName + "]"); + File oldPath = new File(oldName); + File newPath = new File(newName); + + if (!newPath.getParentFile().exists()) { + newPath.getParentFile().mkdirs(); + } + + if (oldPath.renameTo(newPath)) { + LOGGER.info("rename file :[" + oldPath + "] to [" + newPath + "] finish"); + } else { + throw new IOException("rename file :[" + oldPath + "] to [" + newPath + "] error"); + } + } + + public static void deleteFile(@NonNull String filePath) throws IOException { + File file = new File(filePath); + if (file.exists()) { + if (file.isDirectory()) { + deleteFiles(file); + } + file.delete(); + } + } + + private static boolean deleteFiles(@NonNull File file) { + try { + File[] files = file.listFiles(); + for (int i = 0; i < files.length; i++) { + File thisFile = files[i]; + if (thisFile.isDirectory()) { + deleteFiles(thisFile); + } + thisFile.delete(); + } + file.delete(); + + } catch (Exception e) { + LOGGER.error("delete file [" + file.getPath() + "] error"); + return false; + } + return true; + } +} diff --git a/seatunnel-connectors-v2/connector-file/connector-file-local/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/local/LocalFileSink.java b/seatunnel-connectors-v2/connector-file/connector-file-local/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/local/LocalFileSink.java new file mode 100644 index 00000000000..6e4b503e946 --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-local/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/local/LocalFileSink.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.sink.local; + +import org.apache.seatunnel.api.sink.SeaTunnelSink; +import org.apache.seatunnel.connectors.seatunnel.file.sink.AbstractFileSink; +import org.apache.seatunnel.connectors.seatunnel.file.sink.spi.SinkFileSystemPlugin; + +import com.google.auto.service.AutoService; + +@AutoService(SeaTunnelSink.class) +public class LocalFileSink extends AbstractFileSink { + @Override + public SinkFileSystemPlugin getSinkFileSystemPlugin() { + return new LocalFileSinkPlugin(); + } +} diff --git a/seatunnel-connectors-v2/connector-file/connector-file-local/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/local/LocalFileSinkPlugin.java b/seatunnel-connectors-v2/connector-file/connector-file-local/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/local/LocalFileSinkPlugin.java new file mode 100644 index 00000000000..1d4bc43e57f --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-local/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/local/LocalFileSinkPlugin.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.sink.local; + +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.connectors.seatunnel.file.sink.config.FileSystemType; +import org.apache.seatunnel.connectors.seatunnel.file.sink.spi.FileSystem; +import org.apache.seatunnel.connectors.seatunnel.file.sink.spi.FileSystemCommitter; +import org.apache.seatunnel.connectors.seatunnel.file.sink.spi.SinkFileSystemPlugin; +import org.apache.seatunnel.connectors.seatunnel.file.sink.transaction.TransactionFileNameGenerator; +import org.apache.seatunnel.connectors.seatunnel.file.sink.transaction.TransactionStateFileWriter; +import org.apache.seatunnel.connectors.seatunnel.file.sink.writer.PartitionDirNameGenerator; + +import lombok.NonNull; + +import java.util.List; +import java.util.Optional; + +public class LocalFileSinkPlugin implements SinkFileSystemPlugin { + @Override + public String getPluginName() { + return FileSystemType.LOCAL.getSinkFileSystemPluginName(); + } + + @Override + public Optional getTransactionStateFileWriter(@NonNull SeaTunnelRowType seaTunnelRowTypeInfo, + @NonNull TransactionFileNameGenerator transactionFileNameGenerator, + @NonNull PartitionDirNameGenerator partitionDirNameGenerator, + @NonNull List sinkColumnsIndexInRow, + @NonNull String tmpPath, + @NonNull String targetPath, + @NonNull String jobId, + int subTaskIndex, + @NonNull String fieldDelimiter, + @NonNull String rowDelimiter, + @NonNull FileSystem fileSystem) { + return Optional.of(new LocalTxtTransactionStateFileWriter(seaTunnelRowTypeInfo, + transactionFileNameGenerator, + partitionDirNameGenerator, + sinkColumnsIndexInRow, + tmpPath, + targetPath, + jobId, + subTaskIndex, + fieldDelimiter, + rowDelimiter, + fileSystem)); + } + + @Override + public Optional getFileSystemCommitter() { + return Optional.of(new LocalFileSystemCommitter()); + } + + @Override + public Optional getFileSystem() { + return Optional.of(new LocalFileSystem()); + } +} diff --git a/seatunnel-connectors-v2/connector-file/connector-file-local/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/local/LocalFileSystem.java b/seatunnel-connectors-v2/connector-file/connector-file-local/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/local/LocalFileSystem.java new file mode 100644 index 00000000000..6f68c2305fe --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-local/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/local/LocalFileSystem.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.sink.local; + +import org.apache.seatunnel.connectors.seatunnel.file.sink.spi.FileSystem; + +import java.io.File; +import java.io.IOException; +import java.util.Arrays; +import java.util.List; + +public class LocalFileSystem implements FileSystem { + @Override + public void deleteFile(String path) throws IOException { + File file = new File(path); + file.delete(); + } + + @Override + public List dirList(String dirPath) throws IOException { + File file = new File(dirPath); + String[] list = file.list(); + if (list == null) { + return null; + } + return Arrays.asList(list); + } +} diff --git a/seatunnel-connectors-v2/connector-file/connector-file-local/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/local/LocalFileSystemCommitter.java b/seatunnel-connectors-v2/connector-file/connector-file-local/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/local/LocalFileSystemCommitter.java new file mode 100644 index 00000000000..38e1d06d623 --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-local/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/local/LocalFileSystemCommitter.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.sink.local; + +import org.apache.seatunnel.connectors.seatunnel.file.sink.FileAggregatedCommitInfo; +import org.apache.seatunnel.connectors.seatunnel.file.sink.spi.FileSystemCommitter; + +import lombok.NonNull; + +import java.io.File; +import java.io.IOException; +import java.util.Map; + +public class LocalFileSystemCommitter implements FileSystemCommitter { + @Override + public void commitTransaction(@NonNull FileAggregatedCommitInfo aggregateCommitInfo) throws IOException { + for (Map.Entry> entry : aggregateCommitInfo.getTransactionMap().entrySet()) { + for (Map.Entry mvFileEntry : entry.getValue().entrySet()) { + FileUtils.renameFile(mvFileEntry.getKey(), mvFileEntry.getValue()); + } + // delete the transaction dir + FileUtils.deleteFile(entry.getKey()); + } + } + + @Override + public void abortTransaction(@NonNull FileAggregatedCommitInfo aggregateCommitInfo) throws IOException { + for (Map.Entry> entry : aggregateCommitInfo.getTransactionMap().entrySet()) { + // rollback the file + for (Map.Entry mvFileEntry : entry.getValue().entrySet()) { + File oldFile = new File(mvFileEntry.getKey()); + File newFile = new File(mvFileEntry.getValue()); + if (newFile.exists() && !oldFile.exists()) { + FileUtils.renameFile(mvFileEntry.getValue(), mvFileEntry.getKey()); + } + } + // delete the transaction dir + FileUtils.deleteFile(entry.getKey()); + } + } +} diff --git a/seatunnel-connectors-v2/connector-file/connector-file-local/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/local/LocalTxtTransactionStateFileWriter.java b/seatunnel-connectors-v2/connector-file/connector-file-local/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/local/LocalTxtTransactionStateFileWriter.java new file mode 100644 index 00000000000..d04939a7049 --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-local/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/local/LocalTxtTransactionStateFileWriter.java @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.sink.local; + +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.connectors.seatunnel.file.sink.spi.FileSystem; +import org.apache.seatunnel.connectors.seatunnel.file.sink.transaction.TransactionFileNameGenerator; +import org.apache.seatunnel.connectors.seatunnel.file.sink.writer.AbstractTransactionStateFileWriter; +import org.apache.seatunnel.connectors.seatunnel.file.sink.writer.PartitionDirNameGenerator; + +import lombok.NonNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +public class LocalTxtTransactionStateFileWriter extends AbstractTransactionStateFileWriter { + private static final Logger LOGGER = LoggerFactory.getLogger(LocalTxtTransactionStateFileWriter.class); + private Map beingWrittenOutputStream; + + private String fieldDelimiter; + private String rowDelimiter; + + public LocalTxtTransactionStateFileWriter(@NonNull SeaTunnelRowType seaTunnelRowTypeInfo, + @NonNull TransactionFileNameGenerator transactionFileNameGenerator, + @NonNull PartitionDirNameGenerator partitionDirNameGenerator, + @NonNull List sinkColumnsIndexInRow, + @NonNull String tmpPath, + @NonNull String targetPath, + @NonNull String jobId, + int subTaskIndex, + @NonNull String fieldDelimiter, + @NonNull String rowDelimiter, + @NonNull FileSystem fileSystem) { + super(seaTunnelRowTypeInfo, transactionFileNameGenerator, partitionDirNameGenerator, sinkColumnsIndexInRow, tmpPath, targetPath, jobId, subTaskIndex, fileSystem); + + this.fieldDelimiter = fieldDelimiter; + this.rowDelimiter = rowDelimiter; + beingWrittenOutputStream = new HashMap<>(); + } + + @Override + public void beginTransaction(String transactionId) { + this.beingWrittenOutputStream = new HashMap<>(); + } + + @Override + public void abortTransaction(String transactionId) { + this.beingWrittenOutputStream = new HashMap<>(); + } + + @Override + public void write(@NonNull SeaTunnelRow seaTunnelRow) { + String filePath = getOrCreateFilePathBeingWritten(seaTunnelRow); + FileOutputStream fileOutputStream = getOrCreateOutputStream(filePath); + String line = transformRowToLine(seaTunnelRow); + try { + fileOutputStream.write(line.getBytes()); + fileOutputStream.write(rowDelimiter.getBytes()); + } catch (IOException e) { + LOGGER.error("write data to file {} error", filePath); + throw new RuntimeException(e); + } + } + + @Override + public void finishAndCloseWriteFile() { + beingWrittenOutputStream.entrySet().forEach(entry -> { + try { + entry.getValue().flush(); + } catch (IOException e) { + LOGGER.error("error when flush file {}", entry.getKey()); + throw new RuntimeException(e); + } finally { + try { + entry.getValue().close(); + } catch (IOException e) { + LOGGER.error("error when close output stream {}", entry.getKey()); + } + } + + needMoveFiles.put(entry.getKey(), getTargetLocation(entry.getKey())); + }); + } + + private FileOutputStream getOrCreateOutputStream(@NonNull String filePath) { + FileOutputStream fileOutputStream = beingWrittenOutputStream.get(filePath); + if (fileOutputStream == null) { + try { + FileUtils.createFile(filePath); + fileOutputStream = new FileOutputStream(new File(filePath)); + beingWrittenOutputStream.put(filePath, fileOutputStream); + } catch (IOException e) { + LOGGER.error("can not get output file stream"); + throw new RuntimeException(e); + } + } + return fileOutputStream; + } + + private String transformRowToLine(@NonNull SeaTunnelRow seaTunnelRow) { + return this.sinkColumnsIndexInRow.stream().map(index -> seaTunnelRow.getFields()[index] == null ? "" : seaTunnelRow.getFields()[index].toString()).collect(Collectors.joining(fieldDelimiter)); + } +} diff --git a/seatunnel-connectors-v2/connector-file/connector-file-local/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/sink/local/FileSinkAggregatedCommitterTest.java b/seatunnel-connectors-v2/connector-file/connector-file-local/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/sink/local/FileSinkAggregatedCommitterTest.java new file mode 100644 index 00000000000..720ad3eba78 --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-local/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/sink/local/FileSinkAggregatedCommitterTest.java @@ -0,0 +1,144 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.sink.local; + +import org.apache.seatunnel.connectors.seatunnel.file.sink.FileAggregatedCommitInfo; +import org.apache.seatunnel.connectors.seatunnel.file.sink.FileCommitInfo; +import org.apache.seatunnel.connectors.seatunnel.file.sink.FileSinkAggregatedCommitter; + +import org.junit.Assert; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.stream.Collectors; + +public class FileSinkAggregatedCommitterTest { + @SuppressWarnings("checkstyle:UnnecessaryParentheses") + public void testCommit() throws Exception { + FileSinkAggregatedCommitter fileSinkAggregatedCommitter = new FileSinkAggregatedCommitter(new LocalFileSystemCommitter()); + Map> transactionFiles = new HashMap<>(); + Random random = new Random(); + Long jobIdLong = random.nextLong(); + String jobId = "Job_" + jobIdLong; + String transactionDir = String.format("/tmp/seatunnel/seatunnel/%s/T_%s_0_1", jobId, jobId); + String targetDir = String.format("/tmp/hive/warehouse/%s", jobId); + Map needMoveFiles = new HashMap<>(); + needMoveFiles.put(transactionDir + "/c3=4/c4=rrr/test1.txt", targetDir + "/c3=4/c4=rrr/test1.txt"); + needMoveFiles.put(transactionDir + "/c3=4/c4=bbb/test1.txt", targetDir + "/c3=4/c4=bbb/test1.txt"); + FileUtils.createFile(transactionDir + "/c3=4/c4=rrr/test1.txt"); + FileUtils.createFile(transactionDir + "/c3=4/c4=bbb/test1.txt"); + + transactionFiles.put(transactionDir, needMoveFiles); + + Map> partitionDirAndVals = new HashMap<>(); + partitionDirAndVals.put("/c3=4/c4=rrr", Arrays.stream((new String[]{"4", "rrr"})).collect(Collectors.toList())); + partitionDirAndVals.put("/c3=4/c4=bbb", Arrays.stream((new String[]{"4", "bbb"})).collect(Collectors.toList())); + + FileAggregatedCommitInfo fileAggregatedCommitInfo = new FileAggregatedCommitInfo(transactionFiles, partitionDirAndVals); + List fileAggregatedCommitInfoList = new ArrayList<>(); + fileAggregatedCommitInfoList.add(fileAggregatedCommitInfo); + fileSinkAggregatedCommitter.commit(fileAggregatedCommitInfoList); + + Assert.assertTrue(FileUtils.fileExist(targetDir + "/c3=4/c4=bbb/test1.txt")); + Assert.assertTrue(FileUtils.fileExist(targetDir + "/c3=4/c4=rrr/test1.txt")); + Assert.assertTrue(!FileUtils.fileExist(transactionDir)); + } + + @SuppressWarnings("checkstyle:UnnecessaryParentheses") + @Test + public void testCombine() throws Exception { + FileSinkAggregatedCommitter fileSinkAggregatedCommitter = new FileSinkAggregatedCommitter(new LocalFileSystemCommitter()); + Map> transactionFiles = new HashMap<>(); + Random random = new Random(); + Long jobIdLong = random.nextLong(); + String jobId = "Job_" + jobIdLong; + String transactionDir = String.format("/tmp/seatunnel/seatunnel/%s/T_%s_0_1", jobId, jobId); + String targetDir = String.format("/tmp/hive/warehouse/%s", jobId); + Map needMoveFiles = new HashMap<>(); + needMoveFiles.put(transactionDir + "/c3=3/c4=rrr/test1.txt", targetDir + "/c3=3/c4=rrr/test1.txt"); + needMoveFiles.put(transactionDir + "/c3=4/c4=bbb/test1.txt", targetDir + "/c3=4/c4=bbb/test1.txt"); + Map> partitionDirAndVals = new HashMap<>(); + partitionDirAndVals.put("/c3=3/c4=rrr", Arrays.stream((new String[]{"3", "rrr"})).collect(Collectors.toList())); + partitionDirAndVals.put("/c3=4/c4=bbb", Arrays.stream((new String[]{"4", "bbb"})).collect(Collectors.toList())); + FileCommitInfo fileCommitInfo = new FileCommitInfo(needMoveFiles, partitionDirAndVals, transactionDir); + FileUtils.createFile(transactionDir + "/c3=3/c4=rrr/test1.txt"); + FileUtils.createFile(transactionDir + "/c3=4/c4=bbb/test1.txt"); + + Map needMoveFiles1 = new HashMap<>(); + needMoveFiles1.put(transactionDir + "/c3=4/c4=rrr/test2.txt", targetDir + "/c3=4/c4=rrr/test2.txt"); + needMoveFiles1.put(transactionDir + "/c3=4/c4=bbb/test2.txt", targetDir + "/c3=4/c4=bbb/test2.txt"); + Map> partitionDirAndVals1 = new HashMap<>(); + partitionDirAndVals.put("/c3=4/c4=rrr", Arrays.stream((new String[]{"4", "rrr"})).collect(Collectors.toList())); + partitionDirAndVals.put("/c3=4/c4=bbb", Arrays.stream((new String[]{"4", "bbb"})).collect(Collectors.toList())); + FileCommitInfo fileCommitInfo1 = new FileCommitInfo(needMoveFiles1, partitionDirAndVals1, transactionDir); + List fileCommitInfoList = new ArrayList<>(); + fileCommitInfoList.add(fileCommitInfo); + fileCommitInfoList.add(fileCommitInfo1); + + FileAggregatedCommitInfo combine = fileSinkAggregatedCommitter.combine(fileCommitInfoList); + Assert.assertEquals(1, combine.getTransactionMap().size()); + Assert.assertEquals(4, combine.getTransactionMap().get(transactionDir).size()); + Assert.assertEquals(targetDir + "/c3=3/c4=rrr/test1.txt", combine.getTransactionMap().get(transactionDir).get(transactionDir + "/c3=3/c4=rrr/test1.txt")); + Assert.assertEquals(targetDir + "/c3=4/c4=bbb/test1.txt", combine.getTransactionMap().get(transactionDir).get(transactionDir + "/c3=4/c4=bbb/test1.txt")); + Assert.assertEquals(targetDir + "/c3=4/c4=rrr/test2.txt", combine.getTransactionMap().get(transactionDir).get(transactionDir + "/c3=4/c4=rrr/test2.txt")); + Assert.assertEquals(targetDir + "/c3=4/c4=bbb/test2.txt", combine.getTransactionMap().get(transactionDir).get(transactionDir + "/c3=4/c4=bbb/test2.txt")); + Assert.assertEquals(3, combine.getPartitionDirAndValsMap().keySet().size()); + } + + @SuppressWarnings("checkstyle:UnnecessaryParentheses") + @Test + public void testAbort() throws Exception { + FileSinkAggregatedCommitter fileSinkAggregatedCommitter = new FileSinkAggregatedCommitter(new LocalFileSystemCommitter()); + Map> transactionFiles = new HashMap<>(); + Random random = new Random(); + Long jobIdLong = random.nextLong(); + String jobId = "Job_" + jobIdLong; + String transactionDir = String.format("/tmp/seatunnel/seatunnel/%s/T_%s_0_1", jobId, jobId); + String targetDir = String.format("/tmp/hive/warehouse/%s", jobId); + Map needMoveFiles = new HashMap<>(); + needMoveFiles.put(transactionDir + "/c3=4/c4=rrr/test1.txt", targetDir + "/c3=4/c4=rrr/test1.txt"); + needMoveFiles.put(transactionDir + "/c3=4/c4=bbb/test1.txt", targetDir + "/c3=4/c4=bbb/test1.txt"); + Map> partitionDirAndVals = new HashMap<>(); + partitionDirAndVals.put("/c3=4/c4=rrr", Arrays.stream((new String[]{"4", "rrr"})).collect(Collectors.toList())); + partitionDirAndVals.put("/c3=4/c4=bbb", Arrays.stream((new String[]{"4", "bbb"})).collect(Collectors.toList())); + FileUtils.createFile(transactionDir + "/c3=4/c4=rrr/test1.txt"); + FileUtils.createFile(transactionDir + "/c3=4/c4=bbb/test1.txt"); + + transactionFiles.put(transactionDir, needMoveFiles); + FileAggregatedCommitInfo fileAggregatedCommitInfo = new FileAggregatedCommitInfo(transactionFiles, partitionDirAndVals); + List fileAggregatedCommitInfoList = new ArrayList<>(); + fileAggregatedCommitInfoList.add(fileAggregatedCommitInfo); + fileSinkAggregatedCommitter.commit(fileAggregatedCommitInfoList); + + Assert.assertTrue(FileUtils.fileExist(targetDir + "/c3=4/c4=bbb/test1.txt")); + Assert.assertTrue(FileUtils.fileExist(targetDir + "/c3=4/c4=rrr/test1.txt")); + Assert.assertTrue(!FileUtils.fileExist(transactionDir)); + + fileSinkAggregatedCommitter.abort(fileAggregatedCommitInfoList); + Assert.assertTrue(!FileUtils.fileExist(targetDir + "/c3=4/c4=bbb/test1.txt")); + Assert.assertTrue(!FileUtils.fileExist(targetDir + "/c3=4/c4=rrr/test1.txt")); + + // transactionDir will being delete when abort + Assert.assertTrue(!FileUtils.fileExist(transactionDir)); + } +} diff --git a/seatunnel-connectors-v2/connector-file/connector-file-local/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/sink/local/TestLocalTxtTransactionStateFileWriter.java b/seatunnel-connectors-v2/connector-file/connector-file-local/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/sink/local/TestLocalTxtTransactionStateFileWriter.java new file mode 100644 index 00000000000..d739ac41c2d --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-local/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/sink/local/TestLocalTxtTransactionStateFileWriter.java @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.sink.local; + +import org.apache.seatunnel.api.table.type.BasicType; +import org.apache.seatunnel.api.table.type.SeaTunnelDataType; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.connectors.seatunnel.file.config.FileFormat; +import org.apache.seatunnel.connectors.seatunnel.file.sink.FileCommitInfo; +import org.apache.seatunnel.connectors.seatunnel.file.sink.transaction.TransactionStateFileWriter; +import org.apache.seatunnel.connectors.seatunnel.file.sink.writer.FileSinkPartitionDirNameGenerator; +import org.apache.seatunnel.connectors.seatunnel.file.sink.writer.FileSinkTransactionFileNameGenerator; + +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +@RunWith(JUnit4.class) +public class TestLocalTxtTransactionStateFileWriter { + + @SuppressWarnings("checkstyle:MagicNumber") + @Test + public void testHdfsTextTransactionStateFileWriter() throws Exception { + String[] fieldNames = new String[]{"c1", "c2", "c3", "c4"}; + SeaTunnelDataType[] seaTunnelDataTypes = new SeaTunnelDataType[]{BasicType.BOOLEAN_TYPE, BasicType.INT_TYPE, BasicType.STRING_TYPE, BasicType.INT_TYPE}; + SeaTunnelRowType seaTunnelRowTypeInfo = new SeaTunnelRowType(fieldNames, seaTunnelDataTypes); + + List sinkColumnIndexInRow = new ArrayList<>(); + sinkColumnIndexInRow.add(0); + sinkColumnIndexInRow.add(1); + + List hivePartitionFieldList = new ArrayList<>(); + hivePartitionFieldList.add("c3"); + hivePartitionFieldList.add("c4"); + + List partitionFieldIndexInRow = new ArrayList<>(); + partitionFieldIndexInRow.add(2); + partitionFieldIndexInRow.add(3); + + String jobId = System.currentTimeMillis() + ""; + String targetPath = "/tmp/hive/warehouse/seatunnel.db/test1"; + String tmpPath = "/tmp/seatunnel"; + + TransactionStateFileWriter fileWriter = new LocalTxtTransactionStateFileWriter(seaTunnelRowTypeInfo, + new FileSinkTransactionFileNameGenerator(FileFormat.TEXT, null, "yyyy.MM.dd"), + new FileSinkPartitionDirNameGenerator(hivePartitionFieldList, partitionFieldIndexInRow, "${k0}=${v0}/${k1}=${v1}"), + sinkColumnIndexInRow, + tmpPath, + targetPath, + jobId, + 0, + String.valueOf('\001'), + "\n", + new LocalFileSystem()); + + String transactionId = fileWriter.beginTransaction(1L); + + SeaTunnelRow seaTunnelRow = new SeaTunnelRow(new Object[]{true, 1, "str1", "str2"}); + fileWriter.write(seaTunnelRow); + + SeaTunnelRow seaTunnelRow1 = new SeaTunnelRow(new Object[]{true, 1, "str1", "str3"}); + fileWriter.write(seaTunnelRow1); + + Optional fileCommitInfoOptional = fileWriter.prepareCommit(); + //check file exists and file content + Assert.assertTrue(fileCommitInfoOptional.isPresent()); + FileCommitInfo fileCommitInfo = fileCommitInfoOptional.get(); + String transactionDir = tmpPath + "/seatunnel/" + jobId + "/" + transactionId; + Assert.assertEquals(transactionDir, fileCommitInfo.getTransactionDir()); + Assert.assertEquals(2, fileCommitInfo.getNeedMoveFiles().size()); + Map needMoveFiles = fileCommitInfo.getNeedMoveFiles(); + Assert.assertEquals(targetPath + "/c3=str1/c4=str2/" + transactionId + ".txt", needMoveFiles.get(transactionDir + "/c3=str1/c4=str2/" + transactionId + ".txt")); + Assert.assertEquals(targetPath + "/c3=str1/c4=str3/" + transactionId + ".txt", needMoveFiles.get(transactionDir + "/c3=str1/c4=str3/" + transactionId + ".txt")); + + Map> partitionDirAndValsMap = fileCommitInfo.getPartitionDirAndValsMap(); + Assert.assertEquals(2, partitionDirAndValsMap.size()); + Assert.assertTrue(partitionDirAndValsMap.keySet().contains("c3=str1/c4=str2")); + Assert.assertTrue(partitionDirAndValsMap.keySet().contains("c3=str1/c4=str3")); + Assert.assertTrue(partitionDirAndValsMap.get("c3=str1/c4=str2").size() == 2); + Assert.assertEquals("str1", partitionDirAndValsMap.get("c3=str1/c4=str2").get(0)); + Assert.assertEquals("str2", partitionDirAndValsMap.get("c3=str1/c4=str2").get(1)); + Assert.assertEquals("str1", partitionDirAndValsMap.get("c3=str1/c4=str3").get(0)); + Assert.assertEquals("str3", partitionDirAndValsMap.get("c3=str1/c4=str3").get(1)); + } +} diff --git a/seatunnel-connectors-v2/connector-file/pom.xml b/seatunnel-connectors-v2/connector-file/pom.xml new file mode 100644 index 00000000000..b84010b8aa8 --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/pom.xml @@ -0,0 +1,37 @@ + + + + + seatunnel-connectors-v2 + org.apache.seatunnel + ${revision} + + 4.0.0 + connector-file + pom + + + connector-file-base + connector-file-hadoop + connector-file-local + + \ No newline at end of file diff --git a/seatunnel-connectors-v2/connector-hive/pom.xml b/seatunnel-connectors-v2/connector-hive/pom.xml index eb2ffa797de..a4842e0ed6b 100644 --- a/seatunnel-connectors-v2/connector-hive/pom.xml +++ b/seatunnel-connectors-v2/connector-hive/pom.xml @@ -30,6 +30,12 @@ connector-hive + + org.apache.hive + hive-exec + provided + + org.apache.seatunnel seatunnel-api @@ -43,14 +49,62 @@ provided + + org.apache.seatunnel + seatunnel-core-base + ${project.version} + test + + org.apache.commons commons-lang3 + + org.apache.orc + orc-core + + + org.apache.hadoop + hadoop-common + + + org.apache.hadoop + hadoop-hdfs + + + + + org.apache.commons + commons-collections4 + + + + org.apache.seatunnel + connector-file-hadoop + ${project.version} + + + + org.codehaus.janino + janino + ${janino.version} + + junit junit + + org.powermock + powermock-module-junit4 + test + + + org.powermock + powermock-api-mockito2 + test + diff --git a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/config/Constant.java b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/config/Constant.java new file mode 100644 index 00000000000..dd0b2ab5465 --- /dev/null +++ b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/config/Constant.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.hive.config; + +public class Constant { + public static final String HIVE_RESULT_TABLE_NAME = "hive_table_name"; + public static final String HIVE_METASTORE_URIS = "hive_metastore_uris"; +} diff --git a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/config/SourceConfig.java b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/config/SourceConfig.java new file mode 100644 index 00000000000..7054403164b --- /dev/null +++ b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/config/SourceConfig.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.hive.config; + +import static org.apache.hadoop.fs.FileSystem.FS_DEFAULT_NAME_KEY; + +public class SourceConfig { + + public static final String FILE_TYPE = "file.type"; + + public static final String DEFAULT_FS = FS_DEFAULT_NAME_KEY; + + public static final String FILE_PATH = "file.path"; +} diff --git a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/exception/HivePluginException.java b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/exception/HivePluginException.java new file mode 100644 index 00000000000..8e1c22548e3 --- /dev/null +++ b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/exception/HivePluginException.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.hive.exception; + +public class HivePluginException extends Exception{ + + public HivePluginException(String message) { + super(message); + } + + public HivePluginException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/HiveAggregatedCommitInfo.java b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/HiveAggregatedCommitInfo.java index 025fbefbfa2..6259389c4c6 100644 --- a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/HiveAggregatedCommitInfo.java +++ b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/HiveAggregatedCommitInfo.java @@ -17,20 +17,18 @@ package org.apache.seatunnel.connectors.seatunnel.hive.sink; +import org.apache.seatunnel.connectors.seatunnel.file.sink.FileAggregatedCommitInfo; + import lombok.AllArgsConstructor; import lombok.Data; +import org.apache.hadoop.hive.metastore.api.Table; import java.io.Serializable; -import java.util.Map; @Data @AllArgsConstructor public class HiveAggregatedCommitInfo implements Serializable { - - /** - * Storage the commit info in map. - * K is the file path need to be moved to hive data dir. - * V is the target file path of the data file. - */ - private Map needMoveFiles; + private FileAggregatedCommitInfo fileAggregatedCommitInfo; + private String hiveMetastoreUris; + private Table table; } diff --git a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/HiveCommitInfo.java b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/HiveCommitInfo.java index 0dd58f8f49f..002beea322f 100644 --- a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/HiveCommitInfo.java +++ b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/HiveCommitInfo.java @@ -17,20 +17,22 @@ package org.apache.seatunnel.connectors.seatunnel.hive.sink; +import org.apache.seatunnel.connectors.seatunnel.file.sink.FileCommitInfo; + import lombok.AllArgsConstructor; import lombok.Data; +import org.apache.hadoop.hive.metastore.api.Table; import java.io.Serializable; -import java.util.Map; @Data @AllArgsConstructor public class HiveCommitInfo implements Serializable { - /** - * Storage the commit info in map. - * K is the file path need to be moved to hive data dir. - * V is the target file path of the data file. - */ - private Map needMoveFiles; + private FileCommitInfo fileCommitInfo; + + private String hiveMetastoreUris; + + private Table table; + } diff --git a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/HiveSink.java b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/HiveSink.java index f1ba12edd95..4df91b1a535 100644 --- a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/HiveSink.java +++ b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/HiveSink.java @@ -18,6 +18,7 @@ package org.apache.seatunnel.connectors.seatunnel.hive.sink; import org.apache.seatunnel.api.common.PrepareFailException; +import org.apache.seatunnel.api.common.SeaTunnelContext; import org.apache.seatunnel.api.serialization.DefaultSerializer; import org.apache.seatunnel.api.serialization.Serializer; import org.apache.seatunnel.api.sink.SeaTunnelSink; @@ -26,6 +27,8 @@ import org.apache.seatunnel.api.table.type.SeaTunnelDataType; import org.apache.seatunnel.api.table.type.SeaTunnelRow; import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.common.constants.JobMode; +import org.apache.seatunnel.connectors.seatunnel.file.sink.config.SaveMode; import org.apache.seatunnel.shade.com.typesafe.config.Config; @@ -43,8 +46,11 @@ public class HiveSink implements SeaTunnelSink { private Config config; - private long jobId; - private SeaTunnelRowType seaTunnelRowType; + private String jobId; + private Long checkpointId; + private SeaTunnelRowType seaTunnelRowTypeInfo; + private SeaTunnelContext seaTunnelContext; + private HiveSinkConfig hiveSinkConfig; @Override public String getPluginName() { @@ -52,34 +58,47 @@ public String getPluginName() { } @Override - public void setTypeInfo(SeaTunnelRowType seaTunnelRowType) { - this.seaTunnelRowType = seaTunnelRowType; + public void setTypeInfo(SeaTunnelRowType seaTunnelRowTypeInfo) { + this.seaTunnelRowTypeInfo = seaTunnelRowTypeInfo; + this.hiveSinkConfig = new HiveSinkConfig(config, seaTunnelRowTypeInfo); } @Override public SeaTunnelDataType getConsumedType() { - return this.seaTunnelRowType; + return this.seaTunnelRowTypeInfo; } @Override public void prepare(Config pluginConfig) throws PrepareFailException { this.config = pluginConfig; - this.jobId = System.currentTimeMillis(); + this.checkpointId = 1L; } @Override public SinkWriter createWriter(SinkWriter.Context context) throws IOException { - return new HiveSinkWriter(seaTunnelRowType, config, context, System.currentTimeMillis()); + if (!seaTunnelContext.getJobMode().equals(JobMode.BATCH) && hiveSinkConfig.getTextFileSinkConfig().getSaveMode().equals(SaveMode.OVERWRITE)) { + throw new RuntimeException("only batch job can overwrite hive table"); + } + + if (!this.getSinkConfig().getTextFileSinkConfig().isEnableTransaction()) { + throw new RuntimeException("Hive Sink Connector only support transaction now"); + } + return new HiveSinkWriter(seaTunnelRowTypeInfo, + config, + context, + getSinkConfig(), + jobId); } @Override public SinkWriter restoreWriter(SinkWriter.Context context, List states) throws IOException { - return new HiveSinkWriter(seaTunnelRowType, config, context, System.currentTimeMillis()); + return new HiveSinkWriter(seaTunnelRowTypeInfo, config, context, hiveSinkConfig, jobId, states); } @Override - public Optional> getCommitInfoSerializer() { - return Optional.of(new DefaultSerializer<>()); + public void setSeaTunnelContext(SeaTunnelContext seaTunnelContext) { + this.seaTunnelContext = seaTunnelContext; + this.jobId = seaTunnelContext.getJobId(); } @Override @@ -87,8 +106,25 @@ public Optional> getWriterStateSerializer() { + return Optional.of(new DefaultSerializer<>()); + } + @Override public Optional> getAggregatedCommitInfoSerializer() { + return Optional.of(new DefaultSerializer()); + } + + @Override + public Optional> getCommitInfoSerializer() { return Optional.of(new DefaultSerializer<>()); } + + private HiveSinkConfig getSinkConfig() { + if (this.hiveSinkConfig == null && (this.seaTunnelRowTypeInfo != null && this.config != null)) { + this.hiveSinkConfig = new HiveSinkConfig(config, seaTunnelRowTypeInfo); + } + return this.hiveSinkConfig; + } } diff --git a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/HiveSinkAggregatedCommitter.java b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/HiveSinkAggregatedCommitter.java index 673923a51b1..3a0448179a7 100644 --- a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/HiveSinkAggregatedCommitter.java +++ b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/HiveSinkAggregatedCommitter.java @@ -18,8 +18,14 @@ package org.apache.seatunnel.connectors.seatunnel.hive.sink; import org.apache.seatunnel.api.sink.SinkAggregatedCommitter; -import org.apache.seatunnel.connectors.seatunnel.hive.sink.file.writer.HdfsUtils; +import org.apache.seatunnel.connectors.seatunnel.file.sink.FileAggregatedCommitInfo; +import org.apache.seatunnel.connectors.seatunnel.file.sink.hdfs.HdfsUtils; +import org.apache.seatunnel.connectors.seatunnel.hive.utils.HiveMetaStoreProxy; +import org.apache.commons.collections4.CollectionUtils; +import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.thrift.TException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -28,41 +34,82 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Set; public class HiveSinkAggregatedCommitter implements SinkAggregatedCommitter { private static final Logger LOGGER = LoggerFactory.getLogger(HiveSinkAggregatedCommitter.class); @Override public List commit(List aggregatedCommitInfoList) throws IOException { - if (aggregatedCommitInfoList == null || aggregatedCommitInfoList.size() == 0) { + LOGGER.info("=============================agg commit================================="); + if (CollectionUtils.isEmpty(aggregatedCommitInfoList)) { return null; } List errorAggregatedCommitInfoList = new ArrayList(); - aggregatedCommitInfoList.stream().forEach(aggregateCommitInfo -> { - try { - Map needMoveFiles = aggregateCommitInfo.getNeedMoveFiles(); - for (Map.Entry entry : needMoveFiles.entrySet()) { - HdfsUtils.renameFile(entry.getKey(), entry.getValue(), true); + HiveMetaStoreProxy hiveMetaStoreProxy = new HiveMetaStoreProxy(aggregatedCommitInfoList.get(0).getHiveMetastoreUris()); + HiveMetaStoreClient hiveMetaStoreClient = hiveMetaStoreProxy.getHiveMetaStoreClient(); + try { + aggregatedCommitInfoList.stream().forEach(aggregateCommitInfo -> { + try { + for (Map.Entry> entry : aggregateCommitInfo.getFileAggregatedCommitInfo().getTransactionMap().entrySet()) { + // rollback the file + for (Map.Entry mvFileEntry : entry.getValue().entrySet()) { + HdfsUtils.renameFile(mvFileEntry.getKey(), mvFileEntry.getValue(), true); + } + // delete the transaction dir + HdfsUtils.deleteFile(entry.getKey()); + } + // add hive partition + aggregateCommitInfo.getFileAggregatedCommitInfo().getPartitionDirAndValsMap().entrySet().forEach(entry -> { + Partition part = new Partition(); + part.setDbName(aggregateCommitInfo.getTable().getDbName()); + part.setTableName(aggregateCommitInfo.getTable().getTableName()); + part.setValues(entry.getValue()); + part.setParameters(new HashMap<>()); + part.setSd(aggregateCommitInfo.getTable().getSd().deepCopy()); + part.getSd().setSerdeInfo(aggregateCommitInfo.getTable().getSd().getSerdeInfo()); + part.getSd().setLocation(aggregateCommitInfo.getTable().getSd().getLocation() + "/" + entry.getKey()); + try { + hiveMetaStoreClient.add_partition(part); + } catch (TException e) { + throw new RuntimeException(e); + } + }); + } catch (Exception e) { + LOGGER.error("commit aggregateCommitInfo error ", e); + errorAggregatedCommitInfoList.add(aggregateCommitInfo); } - } catch (IOException e) { - LOGGER.error("commit aggregateCommitInfo error ", e); - errorAggregatedCommitInfoList.add(aggregateCommitInfo); - } - }); + }); + } finally { + hiveMetaStoreClient.close(); + } return errorAggregatedCommitInfoList; } @Override public HiveAggregatedCommitInfo combine(List commitInfos) { - if (commitInfos == null || commitInfos.size() == 0) { + if (CollectionUtils.isEmpty(commitInfos)) { return null; } - Map aggregateCommitInfo = new HashMap<>(); + Map> aggregateCommitInfo = new HashMap<>(); + Map> partitionDirAndValsMap = new HashMap<>(); commitInfos.stream().forEach(commitInfo -> { - aggregateCommitInfo.putAll(commitInfo.getNeedMoveFiles()); + Map needMoveFileMap = aggregateCommitInfo.get(commitInfo.getFileCommitInfo().getTransactionDir()); + if (needMoveFileMap == null) { + needMoveFileMap = new HashMap<>(); + aggregateCommitInfo.put(commitInfo.getFileCommitInfo().getTransactionDir(), needMoveFileMap); + } + needMoveFileMap.putAll(commitInfo.getFileCommitInfo().getNeedMoveFiles()); + Set>> entries = commitInfo.getFileCommitInfo().getPartitionDirAndValsMap().entrySet(); + if (!CollectionUtils.isEmpty(entries)) { + partitionDirAndValsMap.putAll(commitInfo.getFileCommitInfo().getPartitionDirAndValsMap()); + } }); - return new HiveAggregatedCommitInfo(aggregateCommitInfo); + return new HiveAggregatedCommitInfo( + new FileAggregatedCommitInfo(aggregateCommitInfo, partitionDirAndValsMap), + commitInfos.get(0).getHiveMetastoreUris(), + commitInfos.get(0).getTable()); } @Override @@ -72,9 +119,17 @@ public void abort(List aggregatedCommitInfoList) throw } aggregatedCommitInfoList.stream().forEach(aggregateCommitInfo -> { try { - Map needMoveFiles = aggregateCommitInfo.getNeedMoveFiles(); - for (Map.Entry entry : needMoveFiles.entrySet()) { - HdfsUtils.renameFile(entry.getValue(), entry.getKey(), true); + for (Map.Entry> entry : aggregateCommitInfo.getFileAggregatedCommitInfo().getTransactionMap().entrySet()) { + // rollback the file + for (Map.Entry mvFileEntry : entry.getValue().entrySet()) { + if (HdfsUtils.fileExist(mvFileEntry.getValue()) && !HdfsUtils.fileExist(mvFileEntry.getKey())) { + HdfsUtils.renameFile(mvFileEntry.getValue(), mvFileEntry.getKey(), true); + } + } + // delete the transaction dir + HdfsUtils.deleteFile(entry.getKey()); + + // The partitions that have been added will be preserved and will not be deleted } } catch (IOException e) { LOGGER.error("abort aggregateCommitInfo error ", e); diff --git a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/HiveSinkConfig.java b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/HiveSinkConfig.java index 2e08862f7b2..a37dd3a51a2 100644 --- a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/HiveSinkConfig.java +++ b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/HiveSinkConfig.java @@ -17,90 +17,132 @@ package org.apache.seatunnel.connectors.seatunnel.hive.sink; +import static org.apache.seatunnel.connectors.seatunnel.file.config.Constant.FIELD_DELIMITER; +import static org.apache.seatunnel.connectors.seatunnel.file.config.Constant.FILE_FORMAT; +import static org.apache.seatunnel.connectors.seatunnel.file.config.Constant.FILE_NAME_EXPRESSION; +import static org.apache.seatunnel.connectors.seatunnel.file.config.Constant.IS_PARTITION_FIELD_WRITE_IN_FILE; +import static org.apache.seatunnel.connectors.seatunnel.file.config.Constant.PATH; +import static org.apache.seatunnel.connectors.seatunnel.file.config.Constant.ROW_DELIMITER; +import static org.apache.seatunnel.connectors.seatunnel.file.config.Constant.SAVE_MODE; +import static org.apache.seatunnel.connectors.seatunnel.hive.config.Constant.HIVE_METASTORE_URIS; +import static org.apache.seatunnel.connectors.seatunnel.hive.config.Constant.HIVE_RESULT_TABLE_NAME; +import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkNotNull; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.connectors.seatunnel.file.config.Constant; +import org.apache.seatunnel.connectors.seatunnel.file.config.FileFormat; +import org.apache.seatunnel.connectors.seatunnel.file.sink.config.SaveMode; +import org.apache.seatunnel.connectors.seatunnel.file.sink.config.TextFileSinkConfig; +import org.apache.seatunnel.connectors.seatunnel.hive.utils.HiveMetaStoreProxy; + import org.apache.seatunnel.shade.com.typesafe.config.Config; +import org.apache.seatunnel.shade.com.typesafe.config.ConfigValueFactory; import lombok.Data; import lombok.NonNull; import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.thrift.TException; +import java.io.Serializable; +import java.util.Arrays; import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; @Data -public class HiveSinkConfig { - - private static final String HIVE_SAVE_MODE = "save_mode"; - - private static final String HIVE_SINK_COLUMNS = "sink_columns"; - - private static final String HIVE_PARTITION_BY = "partition_by"; - - private static final String HIVE_RESULT_TABLE_NAME = "result_table_name"; +public class HiveSinkConfig implements Serializable { + private String hiveTableName; + private List hivePartitionFieldList; + private String hiveMetaUris; - private static final String SINK_TMP_FS_ROOT_PATH = "sink_tmp_fs_root_path"; + private String dbName; - private static final String HIVE_TABLE_FS_PATH = "hive_table_fs_path"; + private String tableName; - private static final String HIVE_TXT_FILE_FIELD_DELIMITER = "hive_txt_file_field_delimiter"; + private Table table; - private static final String HIVE_TXT_FILE_LINE_DELIMITER = "hive_txt_file_line_delimiter"; + private TextFileSinkConfig textFileSinkConfig; - private SaveMode saveMode = SaveMode.APPEND; + public HiveSinkConfig(@NonNull Config config, @NonNull SeaTunnelRowType seaTunnelRowTypeInfo) { + checkArgument(!CollectionUtils.isEmpty(Arrays.asList(seaTunnelRowTypeInfo.getFieldNames()))); - private String sinkTmpFsRootPath = "/tmp/seatunnel"; + if (config.hasPath(HIVE_RESULT_TABLE_NAME) && !StringUtils.isBlank(config.getString(HIVE_RESULT_TABLE_NAME))) { + this.hiveTableName = config.getString(HIVE_RESULT_TABLE_NAME); + } + checkNotNull(hiveTableName); - private List partitionFieldNames; + if (config.hasPath(HIVE_METASTORE_URIS) && !StringUtils.isBlank(config.getString(HIVE_METASTORE_URIS))) { + this.hiveMetaUris = config.getString(HIVE_METASTORE_URIS); + } + checkNotNull(hiveMetaUris); - private String hiveTableName; + String[] dbAndTableName = hiveTableName.split("\\."); + if (dbAndTableName == null || dbAndTableName.length != 2) { + throw new RuntimeException("Please config " + HIVE_RESULT_TABLE_NAME + " as db.table format"); + } + this.dbName = dbAndTableName[0]; + this.tableName = dbAndTableName[1]; + HiveMetaStoreProxy hiveMetaStoreProxy = new HiveMetaStoreProxy(hiveMetaUris); + HiveMetaStoreClient hiveMetaStoreClient = hiveMetaStoreProxy.getHiveMetaStoreClient(); + + try { + table = hiveMetaStoreClient.getTable(dbName, tableName); + String inputFormat = table.getSd().getInputFormat(); + if ("org.apache.hadoop.mapred.TextInputFormat".equals(inputFormat)) { + config = config.withValue(FILE_FORMAT, ConfigValueFactory.fromAnyRef(FileFormat.TEXT.toString())); + } else { + throw new RuntimeException("Only support text file now"); + } - private List sinkColumns; + Map parameters = table.getSd().getSerdeInfo().getParameters(); + config = config.withValue(IS_PARTITION_FIELD_WRITE_IN_FILE, ConfigValueFactory.fromAnyRef(false)) + .withValue(FIELD_DELIMITER, ConfigValueFactory.fromAnyRef(parameters.get("field.delim"))) + .withValue(ROW_DELIMITER, ConfigValueFactory.fromAnyRef(parameters.get("line.delim"))) + .withValue(FILE_NAME_EXPRESSION, ConfigValueFactory.fromAnyRef("${transactionId}")) + .withValue(PATH, ConfigValueFactory.fromAnyRef(table.getSd().getLocation())); - private String hiveTableFsPath; + if (!config.hasPath(SAVE_MODE) || StringUtils.isBlank(config.getString(Constant.SAVE_MODE))) { + config = config.withValue(SAVE_MODE, ConfigValueFactory.fromAnyRef(SaveMode.APPEND.toString())); + } - private String hiveTxtFileFieldDelimiter = String.valueOf('\001'); + this.textFileSinkConfig = new TextFileSinkConfig(config, seaTunnelRowTypeInfo); - private String hiveTxtFileLineDelimiter = "\n"; + // --------------------Check textFileSinkConfig with the hive table info------------------- + List fields = hiveMetaStoreClient.getFields(dbAndTableName[0], dbAndTableName[1]); + List partitionKeys = table.getPartitionKeys(); - public enum SaveMode { - APPEND(), - OVERWRITE(); + // Remove partitionKeys from table fields + List fieldNotContainPartitionKey = fields.stream().filter(filed -> !partitionKeys.contains(filed)).collect(Collectors.toList()); - public static SaveMode fromStr(String str) { - if ("overwrite".equals(str)) { - return OVERWRITE; - } else { - return APPEND; + // check fields size must same as sinkColumnList size + if (fieldNotContainPartitionKey.size() != textFileSinkConfig.getSinkColumnList().size()) { + throw new RuntimeException("sink columns size must same as hive table field size"); } - } - } - - public HiveSinkConfig(@NonNull Config pluginConfig) { - checkNotNull(pluginConfig.getString(HIVE_RESULT_TABLE_NAME)); - checkNotNull(pluginConfig.getString(HIVE_TABLE_FS_PATH)); - this.hiveTableName = pluginConfig.getString(HIVE_RESULT_TABLE_NAME); - this.hiveTableFsPath = pluginConfig.getString(HIVE_TABLE_FS_PATH); - this.saveMode = StringUtils.isBlank(pluginConfig.getString(HIVE_SAVE_MODE)) ? SaveMode.APPEND : SaveMode.fromStr(pluginConfig.getString(HIVE_SAVE_MODE)); - if (!StringUtils.isBlank(pluginConfig.getString(SINK_TMP_FS_ROOT_PATH))) { - this.sinkTmpFsRootPath = pluginConfig.getString(SINK_TMP_FS_ROOT_PATH); - } - - this.partitionFieldNames = pluginConfig.getStringList(HIVE_PARTITION_BY); - this.sinkColumns = pluginConfig.getStringList(HIVE_SINK_COLUMNS); + // check hivePartitionFieldList size must same as partitionFieldList size + if (partitionKeys.size() != textFileSinkConfig.getPartitionFieldList().size()) { + throw new RuntimeException("partition by columns size must same as hive table partition columns size"); + } - if (!StringUtils.isBlank(pluginConfig.getString(HIVE_TXT_FILE_FIELD_DELIMITER))) { - this.hiveTxtFileFieldDelimiter = pluginConfig.getString(HIVE_TXT_FILE_FIELD_DELIMITER); + // --------------------Check textFileSinkConfig with the hive table info end---------------- + } catch (TException e) { + throw new RuntimeException(e); + } finally { + hiveMetaStoreClient.close(); } - if (!StringUtils.isBlank(pluginConfig.getString(HIVE_TXT_FILE_LINE_DELIMITER))) { - this.hiveTxtFileLineDelimiter = pluginConfig.getString(HIVE_TXT_FILE_LINE_DELIMITER); + // hive only support append or overwrite + if (!this.textFileSinkConfig.getSaveMode().equals(SaveMode.APPEND) && !this.textFileSinkConfig.getSaveMode().equals(SaveMode.OVERWRITE)) { + throw new RuntimeException("hive only support append or overwrite save mode"); } + } - // partition fields must in sink columns - if (!CollectionUtils.isEmpty(this.sinkColumns) && !CollectionUtils.isEmpty(this.partitionFieldNames) && !this.sinkColumns.containsAll(this.partitionFieldNames)) { - throw new RuntimeException("partition fields must in sink columns"); - } + public TextFileSinkConfig getTextFileSinkConfig() { + return textFileSinkConfig; } } diff --git a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/HiveSinkState.java b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/HiveSinkState.java index 4f9f5d12e74..a104151c36a 100644 --- a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/HiveSinkState.java +++ b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/HiveSinkState.java @@ -25,5 +25,6 @@ @Data @AllArgsConstructor public class HiveSinkState implements Serializable { - private HiveSinkConfig hiveSinkConfig; + private String transactionId; + private Long checkpointId; } diff --git a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/HiveSinkWriter.java b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/HiveSinkWriter.java index e04f577290a..4bdeae47860 100644 --- a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/HiveSinkWriter.java +++ b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/HiveSinkWriter.java @@ -20,48 +20,120 @@ import org.apache.seatunnel.api.sink.SinkWriter; import org.apache.seatunnel.api.table.type.SeaTunnelRow; import org.apache.seatunnel.api.table.type.SeaTunnelRowType; -import org.apache.seatunnel.connectors.seatunnel.hive.sink.file.writer.FileWriter; -import org.apache.seatunnel.connectors.seatunnel.hive.sink.file.writer.HdfsTxtFileWriter; +import org.apache.seatunnel.connectors.seatunnel.file.sink.FileCommitInfo; +import org.apache.seatunnel.connectors.seatunnel.file.sink.FileSinkState; +import org.apache.seatunnel.connectors.seatunnel.file.sink.hdfs.HdfsFileSinkPlugin; +import org.apache.seatunnel.connectors.seatunnel.file.sink.spi.SinkFileSystemPlugin; +import org.apache.seatunnel.connectors.seatunnel.file.sink.transaction.TransactionStateFileWriter; +import org.apache.seatunnel.connectors.seatunnel.file.sink.writer.FileSinkPartitionDirNameGenerator; +import org.apache.seatunnel.connectors.seatunnel.file.sink.writer.FileSinkTransactionFileNameGenerator; import org.apache.seatunnel.shade.com.typesafe.config.Config; -import com.google.common.collect.Lists; import lombok.NonNull; +import org.apache.commons.collections4.CollectionUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; -import java.util.HashMap; import java.util.List; -import java.util.Map; import java.util.Optional; +import java.util.stream.Collectors; public class HiveSinkWriter implements SinkWriter { private static final Logger LOGGER = LoggerFactory.getLogger(HiveSinkWriter.class); - private SeaTunnelRowType seaTunnelRowType; + private SeaTunnelRowType seaTunnelRowTypeInfo; private Config pluginConfig; - private SinkWriter.Context context; - private long jobId; + private Context context; + private String jobId; - private FileWriter fileWriter; + private TransactionStateFileWriter fileWriter; private HiveSinkConfig hiveSinkConfig; - public HiveSinkWriter(@NonNull SeaTunnelRowType seaTunnelRowType, + public HiveSinkWriter(@NonNull SeaTunnelRowType seaTunnelRowTypeInfo, @NonNull Config pluginConfig, @NonNull SinkWriter.Context context, - long jobId) { - this.seaTunnelRowType = seaTunnelRowType; + @NonNull HiveSinkConfig hiveSinkConfig, + @NonNull String jobId) { + this.seaTunnelRowTypeInfo = seaTunnelRowTypeInfo; this.pluginConfig = pluginConfig; this.context = context; this.jobId = jobId; + this.hiveSinkConfig = hiveSinkConfig; + + SinkFileSystemPlugin sinkFileSystemPlugin = new HdfsFileSinkPlugin(); + Optional transactionStateFileWriter = sinkFileSystemPlugin.getTransactionStateFileWriter(this.seaTunnelRowTypeInfo, + new FileSinkTransactionFileNameGenerator( + this.hiveSinkConfig.getTextFileSinkConfig().getFileFormat(), + this.hiveSinkConfig.getTextFileSinkConfig().getFileNameExpression(), + this.hiveSinkConfig.getTextFileSinkConfig().getFileNameTimeFormat()), + new FileSinkPartitionDirNameGenerator( + this.hiveSinkConfig.getTextFileSinkConfig().getPartitionFieldList(), + this.hiveSinkConfig.getTextFileSinkConfig().getPartitionFieldsIndexInRow(), + this.hiveSinkConfig.getTextFileSinkConfig().getPartitionDirExpression()), + this.hiveSinkConfig.getTextFileSinkConfig().getSinkColumnsIndexInRow(), + this.hiveSinkConfig.getTextFileSinkConfig().getTmpPath(), + this.hiveSinkConfig.getTextFileSinkConfig().getPath(), + this.jobId, + this.context.getIndexOfSubtask(), + this.hiveSinkConfig.getTextFileSinkConfig().getFieldDelimiter(), + this.hiveSinkConfig.getTextFileSinkConfig().getRowDelimiter(), + sinkFileSystemPlugin.getFileSystem().get()); + + if (!transactionStateFileWriter.isPresent()) { + throw new RuntimeException("A TransactionStateFileWriter is need"); + } + + this.fileWriter = transactionStateFileWriter.get(); + + fileWriter.beginTransaction(1L); + } - hiveSinkConfig = new HiveSinkConfig(this.pluginConfig); - fileWriter = new HdfsTxtFileWriter(this.seaTunnelRowType, - hiveSinkConfig, + public HiveSinkWriter(@NonNull SeaTunnelRowType seaTunnelRowTypeInfo, + @NonNull Config pluginConfig, + @NonNull SinkWriter.Context context, + @NonNull HiveSinkConfig hiveSinkConfig, + @NonNull String jobId, + @NonNull List hiveSinkStates) { + this.seaTunnelRowTypeInfo = seaTunnelRowTypeInfo; + this.pluginConfig = pluginConfig; + this.context = context; + this.jobId = jobId; + this.hiveSinkConfig = hiveSinkConfig; + + SinkFileSystemPlugin sinkFileSystemPlugin = new HdfsFileSinkPlugin(); + Optional transactionStateFileWriter = sinkFileSystemPlugin.getTransactionStateFileWriter(this.seaTunnelRowTypeInfo, + new FileSinkTransactionFileNameGenerator( + this.hiveSinkConfig.getTextFileSinkConfig().getFileFormat(), + this.hiveSinkConfig.getTextFileSinkConfig().getFileNameExpression(), + this.hiveSinkConfig.getTextFileSinkConfig().getFileNameTimeFormat()), + new FileSinkPartitionDirNameGenerator( + this.hiveSinkConfig.getTextFileSinkConfig().getPartitionFieldList(), + this.hiveSinkConfig.getTextFileSinkConfig().getPartitionFieldsIndexInRow(), + this.hiveSinkConfig.getTextFileSinkConfig().getPartitionDirExpression()), + this.hiveSinkConfig.getTextFileSinkConfig().getSinkColumnsIndexInRow(), + this.hiveSinkConfig.getTextFileSinkConfig().getTmpPath(), + this.hiveSinkConfig.getTextFileSinkConfig().getPath(), this.jobId, - this.context.getIndexOfSubtask()); + this.context.getIndexOfSubtask(), + this.hiveSinkConfig.getTextFileSinkConfig().getFieldDelimiter(), + this.hiveSinkConfig.getTextFileSinkConfig().getRowDelimiter(), + sinkFileSystemPlugin.getFileSystem().get()); + + if (!transactionStateFileWriter.isPresent()) { + throw new RuntimeException("A TransactionStateFileWriter is need"); + } + + this.fileWriter = transactionStateFileWriter.get(); + + // Rollback dirty transaction + if (hiveSinkStates.size() > 0) { + List transactionAfter = fileWriter.getTransactionAfter(hiveSinkStates.get(0).getTransactionId()); + fileWriter.abortTransactions(transactionAfter); + } + fileWriter.beginTransaction(hiveSinkStates.get(0).getCheckpointId() + 1); } @Override @@ -71,18 +143,12 @@ public void write(SeaTunnelRow element) throws IOException { @Override public Optional prepareCommit() throws IOException { - fileWriter.finishAndCloseWriteFile(); - /** - * We will clear the needMoveFiles in {@link #snapshotState()}, So we need copy the needMoveFiles map here. - */ - Map commitInfoMap = new HashMap<>(fileWriter.getNeedMoveFiles().size()); - commitInfoMap.putAll(fileWriter.getNeedMoveFiles()); - return Optional.of(new HiveCommitInfo(commitInfoMap)); - } - - @Override - public void abortPrepare() { - fileWriter.abort(); + Optional fileCommitInfoOptional = fileWriter.prepareCommit(); + if (fileCommitInfoOptional.isPresent()) { + FileCommitInfo fileCommitInfo = fileCommitInfoOptional.get(); + return Optional.of(new HiveCommitInfo(fileCommitInfo, hiveSinkConfig.getHiveMetaUris(), this.hiveSinkConfig.getTable())); + } + return Optional.empty(); } @Override @@ -92,8 +158,17 @@ public void close() throws IOException { @Override public List snapshotState(long checkpointId) throws IOException { - //reset FileWrite - fileWriter.resetFileWriter(System.currentTimeMillis() + ""); - return Lists.newArrayList(new HiveSinkState(hiveSinkConfig)); + List fileSinkStates = fileWriter.snapshotState(checkpointId); + if (!CollectionUtils.isEmpty(fileSinkStates)) { + return fileSinkStates.stream().map(state -> + new HiveSinkState(state.getTransactionId(), state.getCheckpointId())) + .collect(Collectors.toList()); + } + return null; + } + + @Override + public void abortPrepare() { + fileWriter.abortTransaction(); } } diff --git a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/file/writer/AbstractFileWriter.java b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/file/writer/AbstractFileWriter.java deleted file mode 100644 index 57ca3c90173..00000000000 --- a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/file/writer/AbstractFileWriter.java +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.seatunnel.hive.sink.file.writer; - -import static com.google.common.base.Preconditions.checkArgument; - -import org.apache.seatunnel.api.table.type.SeaTunnelRow; -import org.apache.seatunnel.api.table.type.SeaTunnelRowType; -import org.apache.seatunnel.connectors.seatunnel.hive.sink.HiveSinkConfig; - -import lombok.NonNull; - -import java.util.Arrays; -import java.util.HashMap; -import java.util.Map; -import java.util.stream.Collectors; -import java.util.stream.IntStream; - -public abstract class AbstractFileWriter implements FileWriter { - protected Map needMoveFiles; - protected SeaTunnelRowType seaTunnelRowType; - protected long jobId; - protected int subTaskIndex; - protected HiveSinkConfig hiveSinkConfig; - - private static final String SEATUNNEL = "seatunnel"; - private static final String NON_PARTITION = "NON_PARTITION"; - - protected Map beingWrittenFile; - - protected String checkpointId; - protected final int[] partitionKeyIndexes; - - public AbstractFileWriter(@NonNull SeaTunnelRowType seaTunnelRowType, - @NonNull HiveSinkConfig hiveSinkConfig, - long jobId, - int subTaskIndex) { - checkArgument(jobId > 0); - checkArgument(subTaskIndex > -1); - - this.needMoveFiles = new HashMap<>(); - this.seaTunnelRowType = seaTunnelRowType; - this.jobId = jobId; - this.subTaskIndex = subTaskIndex; - this.hiveSinkConfig = hiveSinkConfig; - - this.beingWrittenFile = new HashMap<>(); - if (this.hiveSinkConfig.getPartitionFieldNames() == null) { - this.partitionKeyIndexes = new int[0]; - } else { - this.partitionKeyIndexes = IntStream.range(0, seaTunnelRowType.getTotalFields()) - .filter(i -> hiveSinkConfig.getPartitionFieldNames().contains(seaTunnelRowType.getFieldName(i))) - .toArray(); - } - } - - public String getOrCreateFilePathBeingWritten(@NonNull SeaTunnelRow seaTunnelRow) { - String beingWrittenFileKey = getBeingWrittenFileKey(seaTunnelRow); - // get filePath from beingWrittenFile - String beingWrittenFilePath = beingWrittenFile.get(beingWrittenFileKey); - if (beingWrittenFilePath != null) { - return beingWrittenFilePath; - } else { - StringBuilder sbf = new StringBuilder(hiveSinkConfig.getSinkTmpFsRootPath()); - sbf.append("/") - .append(SEATUNNEL) - .append("/") - .append(jobId) - .append("/") - .append(checkpointId) - .append("/") - .append(hiveSinkConfig.getHiveTableName()) - .append("/") - .append(beingWrittenFileKey) - .append("/") - .append(jobId) - .append("_") - .append(subTaskIndex) - .append(".") - .append(getFileSuffix()); - String newBeingWrittenFilePath = sbf.toString(); - beingWrittenFile.put(beingWrittenFileKey, newBeingWrittenFilePath); - return newBeingWrittenFilePath; - } - } - - private String getBeingWrittenFileKey(@NonNull SeaTunnelRow seaTunnelRow) { - if (partitionKeyIndexes.length > 0) { - return Arrays.stream(partitionKeyIndexes) - .boxed() - .map(i -> seaTunnelRowType.getFieldName(i) + "=" + seaTunnelRow.getField(i)) - .collect(Collectors.joining("/")); - } else { - // If there is no partition field in data, We use the fixed value NON_PARTITION as the partition directory - return NON_PARTITION; - } - } - - /** - * FileWriter need return the file suffix. eg: tex, orc, parquet - * - * @return - */ - @NonNull - public abstract String getFileSuffix(); - - public String getHiveLocation(@NonNull String seaTunnelFilePath) { - StringBuilder sbf = new StringBuilder(hiveSinkConfig.getSinkTmpFsRootPath()); - sbf.append("/") - .append(SEATUNNEL) - .append("/") - .append(jobId) - .append("/") - .append(checkpointId) - .append("/") - .append(hiveSinkConfig.getHiveTableName()); - String seaTunnelPath = sbf.toString(); - String tmpPath = seaTunnelFilePath.replaceAll(seaTunnelPath, hiveSinkConfig.getHiveTableFsPath()); - return tmpPath.replaceAll(NON_PARTITION + "/", ""); - } - - @Override - public void resetFileWriter(@NonNull String checkpointId) { - this.checkpointId = checkpointId; - this.needMoveFiles = new HashMap<>(); - this.beingWrittenFile = new HashMap<>(); - this.resetMoreFileWriter(checkpointId); - } - - public abstract void resetMoreFileWriter(@NonNull String checkpointId); - - @Override - public void abort() { - this.needMoveFiles = new HashMap<>(); - this.beingWrittenFile = new HashMap<>(); - this.abortMore(); - } - - public abstract void abortMore(); -} diff --git a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/source/HadoopConf.java b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/source/HadoopConf.java new file mode 100644 index 00000000000..31dddf7b49a --- /dev/null +++ b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/source/HadoopConf.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.hive.source; + +import lombok.Data; + +import java.io.Serializable; + +@Data +public class HadoopConf implements Serializable { + + private String hdfsNameKey; + + private String fsHdfsImpl = "org.apache.hadoop.hdfs.DistributedFileSystem"; + + public HadoopConf(String hdfsNameKey) { + this.hdfsNameKey = hdfsNameKey; + } +} diff --git a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/source/HiveSource.java b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/source/HiveSource.java new file mode 100644 index 00000000000..ebaf2c51b74 --- /dev/null +++ b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/source/HiveSource.java @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.hive.source; + +import static org.apache.seatunnel.connectors.seatunnel.hive.config.SourceConfig.FILE_PATH; +import static org.apache.hadoop.fs.FileSystem.FS_DEFAULT_NAME_KEY; + +import org.apache.seatunnel.api.common.PrepareFailException; +import org.apache.seatunnel.api.common.SeaTunnelContext; +import org.apache.seatunnel.api.source.Boundedness; +import org.apache.seatunnel.api.source.SeaTunnelSource; +import org.apache.seatunnel.api.source.SourceReader; +import org.apache.seatunnel.api.source.SourceSplitEnumerator; +import org.apache.seatunnel.api.table.type.SeaTunnelDataType; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.common.config.CheckConfigUtil; +import org.apache.seatunnel.common.config.CheckResult; +import org.apache.seatunnel.common.constants.PluginType; +import org.apache.seatunnel.connectors.seatunnel.hive.config.SourceConfig; +import org.apache.seatunnel.connectors.seatunnel.hive.exception.HivePluginException; +import org.apache.seatunnel.connectors.seatunnel.hive.source.file.reader.format.OrcReadStrategy; +import org.apache.seatunnel.connectors.seatunnel.hive.source.file.reader.format.ReadStrategy; +import org.apache.seatunnel.connectors.seatunnel.hive.source.file.reader.format.TextReadStrategy; + +import org.apache.seatunnel.shade.com.typesafe.config.Config; + +import com.google.auto.service.AutoService; + +import java.io.IOException; +import java.util.List; + +@AutoService(SeaTunnelSource.class) +public class HiveSource implements SeaTunnelSource { + + private SeaTunnelContext seaTunnelContext; + + private SeaTunnelRowType typeInfo; + + private ReadStrategy readStrategy; + + private HadoopConf hadoopConf; + + private List filesPath; + + @Override + public String getPluginName() { + return "Hive"; + } + + @Override + public void prepare(Config pluginConfig) { + CheckResult result = CheckConfigUtil.checkAllExists(pluginConfig, FILE_PATH, FS_DEFAULT_NAME_KEY); + if (!result.isSuccess()) { + throw new PrepareFailException(getPluginName(), PluginType.SOURCE, result.getMsg()); + } + // default filetype is text + if ("orc".equals(pluginConfig.getString(SourceConfig.FILE_TYPE))) { + readStrategy = new OrcReadStrategy(); + } else { + readStrategy = new TextReadStrategy(); + } + String path = pluginConfig.getString(FILE_PATH); + hadoopConf = new HadoopConf(pluginConfig.getString(FS_DEFAULT_NAME_KEY)); + try { + filesPath = readStrategy.getFileNamesByPath(hadoopConf, path); + } catch (IOException e) { + throw new PrepareFailException(getPluginName(), PluginType.SOURCE, "Check file path fail."); + } + try { + // should read from config or read from hive metadata( wait catlog done) + this.typeInfo = readStrategy.getSeaTunnelRowTypeInfo(hadoopConf, filesPath.get(0)); + } catch (HivePluginException e) { + throw new PrepareFailException(getPluginName(), PluginType.SOURCE, "Read hive file type error.", e); + } + } + + @Override + public void setSeaTunnelContext(SeaTunnelContext seaTunnelContext) { + this.seaTunnelContext = seaTunnelContext; + } + + @Override + public SeaTunnelDataType getProducedType() { + return this.typeInfo; + } + + @Override + public SourceReader createReader(SourceReader.Context readerContext) throws Exception { + return new HiveSourceReader(this.readStrategy, this.hadoopConf, readerContext); + } + + @Override + public Boundedness getBoundedness() { + return Boundedness.BOUNDED; + } + + @Override + public SourceSplitEnumerator createEnumerator(SourceSplitEnumerator.Context enumeratorContext) throws Exception { + return new HiveSourceSplitEnumerator(enumeratorContext, filesPath); + } + + @Override + public SourceSplitEnumerator restoreEnumerator(SourceSplitEnumerator.Context enumeratorContext, HiveSourceState checkpointState) throws Exception { + return new HiveSourceSplitEnumerator(enumeratorContext, filesPath, checkpointState); + } + +} diff --git a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/source/HiveSourceReader.java b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/source/HiveSourceReader.java new file mode 100644 index 00000000000..570f48e35da --- /dev/null +++ b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/source/HiveSourceReader.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.hive.source; + +import org.apache.seatunnel.api.source.Collector; +import org.apache.seatunnel.api.source.SourceReader; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.connectors.seatunnel.hive.source.file.reader.format.ReadStrategy; + +import lombok.extern.slf4j.Slf4j; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +@Slf4j +public class HiveSourceReader implements SourceReader { + + private static final long THREAD_WAIT_TIME = 500L; + + private ReadStrategy readStrategy; + + private HadoopConf hadoopConf; + + private Set sourceSplits; + + private final SourceReader.Context context; + + public HiveSourceReader(ReadStrategy readStrategy, HadoopConf hadoopConf, SourceReader.Context context) { + this.readStrategy = readStrategy; + this.hadoopConf = hadoopConf; + this.context = context; + this.sourceSplits = new HashSet<>(); + } + + @Override + public void open() { + readStrategy.init(hadoopConf); + } + + @Override + public void close() { + + } + + @Override + public void pollNext(Collector output) throws Exception { + if (sourceSplits.isEmpty()) { + Thread.sleep(THREAD_WAIT_TIME); + return; + } + sourceSplits.forEach(source -> { + try { + readStrategy.read(source.splitId(), output); + } catch (Exception e) { + throw new RuntimeException("Hive source read error", e); + } + + }); + context.signalNoMoreElement(); + } + + @Override + public List snapshotState(long checkpointId) { + return new ArrayList<>(sourceSplits); + } + + @Override + public void addSplits(List splits) { + sourceSplits.addAll(splits); + } + + @Override + public void handleNoMoreSplits() { + + } + + @Override + public void notifyCheckpointComplete(long checkpointId) { + + } +} diff --git a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/source/HiveSourceSplit.java b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/source/HiveSourceSplit.java new file mode 100644 index 00000000000..44e062e54c3 --- /dev/null +++ b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/source/HiveSourceSplit.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.hive.source; + +import org.apache.seatunnel.api.source.SourceSplit; + +public class HiveSourceSplit implements SourceSplit { + + private static final long serialVersionUID = -1L; + + private String splitId; + + public HiveSourceSplit(String splitId) { + this.splitId = splitId; + } + + @Override + public String splitId() { + return this.splitId; + } +} diff --git a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/source/HiveSourceSplitEnumerator.java b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/source/HiveSourceSplitEnumerator.java new file mode 100644 index 00000000000..301b1506fe6 --- /dev/null +++ b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/source/HiveSourceSplitEnumerator.java @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.hive.source; + +import org.apache.seatunnel.api.source.SourceSplitEnumerator; +import org.apache.seatunnel.common.config.Common; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +public class HiveSourceSplitEnumerator implements SourceSplitEnumerator { + + private final Context context; + private Set pendingSplit; + private Set assignedSplit; + private List filePaths; + + public HiveSourceSplitEnumerator(SourceSplitEnumerator.Context context, List filePaths) { + this.context = context; + this.filePaths = filePaths; + } + + public HiveSourceSplitEnumerator(SourceSplitEnumerator.Context context, List filePaths, + HiveSourceState sourceState) { + this(context, filePaths); + this.assignedSplit = sourceState.getAssignedSplit(); + } + + @Override + public void open() { + this.assignedSplit = new HashSet<>(); + this.pendingSplit = new HashSet<>(); + } + + @Override + public void run() { + pendingSplit = getHiveFileSplit(); + assignSplit(context.registeredReaders()); + } + + private Set getHiveFileSplit() { + Set hiveSourceSplits = new HashSet<>(); + filePaths.forEach(k -> hiveSourceSplits.add(new HiveSourceSplit(k))); + return hiveSourceSplits; + + } + + @Override + public void close() throws IOException { + + } + + @Override + public void addSplitsBack(List splits, int subtaskId) { + if (!splits.isEmpty()) { + pendingSplit.addAll(splits); + assignSplit(Collections.singletonList(subtaskId)); + } + } + + private void assignSplit(Collection taskIDList) { + Map> readySplit = new HashMap<>(Common.COLLECTION_SIZE); + for (int taskID : taskIDList) { + readySplit.computeIfAbsent(taskID, id -> new ArrayList<>()); + } + + pendingSplit.forEach(s -> readySplit.get(getSplitOwner(s.splitId(), taskIDList.size())) + .add(s)); + readySplit.forEach(context::assignSplit); + assignedSplit.addAll(pendingSplit); + pendingSplit.clear(); + } + + private static int getSplitOwner(String tp, int numReaders) { + return tp.hashCode() % numReaders; + } + + @Override + public int currentUnassignedSplitSize() { + return pendingSplit.size(); + } + + @Override + public void registerReader(int subtaskId) { + if (!pendingSplit.isEmpty()) { + assignSplit(Collections.singletonList(subtaskId)); + } + } + + @Override + public HiveSourceState snapshotState(long checkpointId) { + return new HiveSourceState(assignedSplit); + } + + @Override + public void notifyCheckpointComplete(long checkpointId) { + + } + + @Override + public void handleSplitRequest(int subtaskId) { + + } +} diff --git a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/source/HiveSourceState.java b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/source/HiveSourceState.java new file mode 100644 index 00000000000..f982a71cb22 --- /dev/null +++ b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/source/HiveSourceState.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.hive.source; + +import java.io.Serializable; +import java.util.Set; + +public class HiveSourceState implements Serializable { + + + private Set assignedSplit; + + public HiveSourceState(Set assignedSplit) { + this.assignedSplit = assignedSplit; + } + + public Set getAssignedSplit() { + return assignedSplit; + } +} diff --git a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/source/file/reader/format/AbstractReadStrategy.java b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/source/file/reader/format/AbstractReadStrategy.java new file mode 100644 index 00000000000..2df0a21f73d --- /dev/null +++ b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/source/file/reader/format/AbstractReadStrategy.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.hive.source.file.reader.format; + +import static org.apache.hadoop.fs.FileSystem.FS_DEFAULT_NAME_KEY; + +import org.apache.seatunnel.connectors.seatunnel.hive.exception.HivePluginException; +import org.apache.seatunnel.connectors.seatunnel.hive.source.HadoopConf; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +public abstract class AbstractReadStrategy implements ReadStrategy { + + HadoopConf hadoopConf; + + @Override + public void init(HadoopConf conf) { + this.hadoopConf = conf; + } + + @Override + public Configuration getConfiguration(HadoopConf hadoopConf) { + Configuration configuration = new Configuration(); + configuration.set(FS_DEFAULT_NAME_KEY, hadoopConf.getHdfsNameKey()); + configuration.set("fs.hdfs.impl", hadoopConf.getFsHdfsImpl()); + return configuration; + } + + Configuration getConfiguration() throws HivePluginException { + if (null == hadoopConf) { + throw new HivePluginException("Not init read config"); + } + return getConfiguration(hadoopConf); + } + + boolean checkFileType(String path) { + return true; + } + + @Override + public List getFileNamesByPath(HadoopConf hadoopConf, String path) throws IOException { + Configuration configuration = getConfiguration(hadoopConf); + List fileNames = new ArrayList<>(); + FileSystem hdfs = FileSystem.get(configuration); + Path listFiles = new Path(path); + FileStatus[] stats = hdfs.listStatus(listFiles); + for (FileStatus fileStatus : stats) { + if (fileStatus.isDirectory()) { + fileNames.addAll(getFileNamesByPath(hadoopConf, fileStatus.getPath().toString())); + continue; + } + if (fileStatus.isFile()) { + // filter '_SUCCESS' file + if (!fileStatus.getPath().getName().equals("_SUCCESS")) { + fileNames.add(fileStatus.getPath().toString()); + } + } + } + return fileNames; + } + +} diff --git a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/source/file/reader/format/OrcReadStrategy.java b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/source/file/reader/format/OrcReadStrategy.java new file mode 100644 index 00000000000..325c1aaccb9 --- /dev/null +++ b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/source/file/reader/format/OrcReadStrategy.java @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.hive.source.file.reader.format; + +import org.apache.seatunnel.api.source.Collector; +import org.apache.seatunnel.api.table.type.BasicType; +import org.apache.seatunnel.api.table.type.SeaTunnelDataType; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.connectors.seatunnel.hive.exception.HivePluginException; +import org.apache.seatunnel.connectors.seatunnel.hive.source.HadoopConf; + +import lombok.extern.slf4j.Slf4j; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.io.orc.OrcFile; +import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; +import org.apache.hadoop.hive.ql.io.orc.OrcSerde; +import org.apache.hadoop.hive.ql.io.orc.OrcStruct; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapred.FileInputFormat; +import org.apache.hadoop.mapred.InputFormat; +import org.apache.hadoop.mapred.InputSplit; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.RecordReader; +import org.apache.hadoop.mapred.Reporter; +import org.apache.orc.Reader; +import org.apache.orc.TypeDescription; + +import java.io.IOException; +import java.util.List; +import java.util.Properties; + +@Slf4j +public class OrcReadStrategy extends AbstractReadStrategy { + + private SeaTunnelRowType seaTunnelRowTypeInfo; + + @Override + public void read(String path, Collector output) throws Exception { + if (Boolean.FALSE.equals(checkFileType(path))) { + throw new Exception("please check file type"); + } + JobConf conf = new JobConf(); + Path filePath = new Path(path); + Properties p = new Properties(); + OrcSerde serde = new OrcSerde(); + String columns = String.join(",", seaTunnelRowTypeInfo.getFieldNames()); + p.setProperty("columns", columns); + //support types + serde.initialize(conf, p); + StructObjectInspector inspector = (StructObjectInspector) serde.getObjectInspector(); + InputFormat in = new OrcInputFormat(); + FileInputFormat.setInputPaths(conf, filePath); + InputSplit[] splits = in.getSplits(conf, 1); + + conf.set("hive.io.file.readcolumn.ids", "1"); + RecordReader reader = in.getRecordReader(splits[0], conf, Reporter.NULL); + NullWritable key = reader.createKey(); + OrcStruct value = reader.createValue(); + List fields = inspector.getAllStructFieldRefs(); + while (reader.next(key, value)) { + Object[] datas = new Object[fields.size()]; + for (int i = 0; i < fields.size(); i++) { + Object data = inspector.getStructFieldData(value, fields.get(i)); + if (null != data) { + datas[i] = String.valueOf(data); + } else { + datas[i] = null; + } + } + output.collect(new SeaTunnelRow(datas)); + } + reader.close(); + } + + @Override + public SeaTunnelRowType getSeaTunnelRowTypeInfo(HadoopConf hadoopConf, String path) throws HivePluginException { + + if (null != seaTunnelRowTypeInfo) { + return seaTunnelRowTypeInfo; + } + Configuration configuration = getConfiguration(hadoopConf); + Path dstDir = new Path(path); + Reader reader; + try { + reader = OrcFile.createReader(FileSystem.get(configuration), dstDir); + } catch (IOException e) { + throw new HivePluginException("Create OrcReader Fail", e); + } + + TypeDescription schema = reader.getSchema(); + String[] fields = new String[schema.getFieldNames().size()]; + SeaTunnelDataType[] types = new SeaTunnelDataType[schema.getFieldNames().size()]; + + for (int i = 0; i < schema.getFieldNames().size(); i++) { + fields[i] = schema.getFieldNames().get(i); + types[i] = BasicType.STRING_TYPE; + } + seaTunnelRowTypeInfo = new SeaTunnelRowType(fields, types); + return seaTunnelRowTypeInfo; + } + +} + diff --git a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/source/file/reader/format/ReadStrategy.java b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/source/file/reader/format/ReadStrategy.java new file mode 100644 index 00000000000..5e73019147a --- /dev/null +++ b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/source/file/reader/format/ReadStrategy.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.hive.source.file.reader.format; + +import org.apache.seatunnel.api.source.Collector; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.connectors.seatunnel.hive.exception.HivePluginException; +import org.apache.seatunnel.connectors.seatunnel.hive.source.HadoopConf; + +import org.apache.hadoop.conf.Configuration; + +import java.io.IOException; +import java.io.Serializable; +import java.util.List; + +public interface ReadStrategy extends Serializable { + void init(HadoopConf conf); + + Configuration getConfiguration(HadoopConf conf); + + void read(String path, Collector output) throws Exception; + + SeaTunnelRowType getSeaTunnelRowTypeInfo(HadoopConf hadoopConf, String path) throws HivePluginException; + + List getFileNamesByPath(HadoopConf hadoopConf, String path) throws IOException; +} diff --git a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/source/file/reader/format/TextReadStrategy.java b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/source/file/reader/format/TextReadStrategy.java new file mode 100644 index 00000000000..6b014d73799 --- /dev/null +++ b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/source/file/reader/format/TextReadStrategy.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.hive.source.file.reader.format; + +import org.apache.seatunnel.api.source.Collector; +import org.apache.seatunnel.api.table.type.BasicType; +import org.apache.seatunnel.api.table.type.SeaTunnelDataType; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.connectors.seatunnel.hive.exception.HivePluginException; +import org.apache.seatunnel.connectors.seatunnel.hive.source.HadoopConf; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; + +public class TextReadStrategy extends AbstractReadStrategy { + + private static final String TEXT_FIELD_NAME = "lines"; + + @Override + public void read(String path, Collector output) throws IOException, HivePluginException { + Configuration conf = getConfiguration(); + FileSystem fs = FileSystem.get(conf); + Path filePath = new Path(path); + try (BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(filePath), StandardCharsets.UTF_8))) { + reader.lines().forEach(line -> output.collect(new SeaTunnelRow(new String[]{line}))); + } + } + + @Override + public SeaTunnelRowType getSeaTunnelRowTypeInfo(HadoopConf hadoopConf, String path) { + return new SeaTunnelRowType(new String[]{TEXT_FIELD_NAME}, + new SeaTunnelDataType[]{BasicType.STRING_TYPE}); + } +} diff --git a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/utils/HiveMetaStoreProxy.java b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/utils/HiveMetaStoreProxy.java new file mode 100644 index 00000000000..30c9a2eba5b --- /dev/null +++ b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/utils/HiveMetaStoreProxy.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.hive.utils; + +import lombok.NonNull; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.thrift.TException; + +public class HiveMetaStoreProxy { + + private HiveMetaStoreClient hiveMetaStoreClient; + + public HiveMetaStoreProxy(@NonNull String uris) { + HiveConf hiveConf = new HiveConf(); + hiveConf.set("hive.metastore.uris", uris); + try { + hiveMetaStoreClient = new HiveMetaStoreClient(hiveConf); + } catch (MetaException e) { + throw new RuntimeException(e); + } + } + + public Table getTable(@NonNull String dbName, @NonNull String tableName) { + try { + return hiveMetaStoreClient.getTable(dbName, tableName); + } catch (TException e) { + throw new RuntimeException(e); + } + } + + public HiveMetaStoreClient getHiveMetaStoreClient() { + return hiveMetaStoreClient; + } +} diff --git a/seatunnel-connectors-v2/connector-hive/src/test/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/TestHiveSinkConfig.java b/seatunnel-connectors-v2/connector-hive/src/test/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/TestHiveSinkConfig.java new file mode 100644 index 00000000000..92e5a693116 --- /dev/null +++ b/seatunnel-connectors-v2/connector-hive/src/test/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/TestHiveSinkConfig.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.hive.sink; + +import org.apache.seatunnel.api.table.type.BasicType; +import org.apache.seatunnel.api.table.type.SeaTunnelDataType; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; + +import org.apache.seatunnel.shade.com.typesafe.config.Config; +import org.apache.seatunnel.shade.com.typesafe.config.ConfigFactory; +import org.apache.seatunnel.shade.com.typesafe.config.ConfigResolveOptions; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +import java.io.File; +import java.util.List; + +@RunWith(JUnit4.class) +public class TestHiveSinkConfig { + + @Test + public void testCreateHiveSinkConfig() { + String[] fieldNames = new String[]{"name", "age"}; + SeaTunnelDataType[] seaTunnelDataTypes = new SeaTunnelDataType[]{BasicType.STRING_TYPE, BasicType.INT_TYPE}; + SeaTunnelRowType seaTunnelRowTypeInfo = new SeaTunnelRowType(fieldNames, seaTunnelDataTypes); + String configFile = "fakesource_to_hive.conf"; + String configFilePath = System.getProperty("user.dir") + "/src/test/resources/" + configFile; + Config config = ConfigFactory + .parseFile(new File(configFilePath)) + .resolve(ConfigResolveOptions.defaults().setAllowUnresolved(true)) + .resolveWith(ConfigFactory.systemProperties(), + ConfigResolveOptions.defaults().setAllowUnresolved(true)); + List sink = config.getConfigList("sink"); + HiveSinkConfig hiveSinkConfig = new HiveSinkConfig(sink.get(0), seaTunnelRowTypeInfo); + } +} diff --git a/seatunnel-connectors-v2/connector-hive/src/test/resources/fakesource_to_hive.conf b/seatunnel-connectors-v2/connector-hive/src/test/resources/fakesource_to_hive.conf new file mode 100644 index 00000000000..3412ea663c4 --- /dev/null +++ b/seatunnel-connectors-v2/connector-hive/src/test/resources/fakesource_to_hive.conf @@ -0,0 +1,55 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +###### +###### This config file is a demonstration of streaming processing in seatunnel config +###### + +env { + # You can set flink configuration here + execution.parallelism = 1 + job.mode = "BATCH" + #execution.checkpoint.interval = 10000 + #execution.checkpoint.data-uri = "hdfs://localhost:9000/checkpoint" +} + +source { + # This is a example source plugin **only for test and demonstrate the feature source plugin** + FakeSource { + result_table_name = "fake" + field_name = "name,age" + } + + # If you would like to get more information about how to configure seatunnel and see full list of source plugins, + # please go to https://seatunnel.apache.org/docs/flink/configuration/source-plugins/Fake +} + +transform { + # If you would like to get more information about how to configure seatunnel and see full list of transform plugins, + # please go to https://seatunnel.apache.org/docs/flink/configuration/transform-plugins/Sql +} + +sink { + Hive { + hive_table_name="default.test_fake_to_hive" + hive_metastore_uris="thrift://localhost:9083" + partition_by=["age"] + sink_columns=["name","age"] + } + + # If you would like to get more information about how to configure seatunnel and see full list of sink plugins, + # please go to https://seatunnel.apache.org/docs/flink/configuration/sink-plugins/Console +} \ No newline at end of file diff --git a/seatunnel-connectors-v2/connector-hudi/pom.xml b/seatunnel-connectors-v2/connector-hudi/pom.xml new file mode 100644 index 00000000000..0b2ee9ce8c4 --- /dev/null +++ b/seatunnel-connectors-v2/connector-hudi/pom.xml @@ -0,0 +1,61 @@ + + + + + seatunnel-connectors-v2 + org.apache.seatunnel + ${revision} + + 4.0.0 + + connector-hudi + + + + + org.apache.hive + hive-exec + provided + + + + org.apache.seatunnel + seatunnel-api + ${project.version} + + + + org.apache.hudi + hudi-hadoop-mr-bundle + + + + org.apache.commons + commons-lang3 + + + + junit + junit + + + diff --git a/seatunnel-connectors-v2/connector-hudi/src/main/java/org/apache/seatunnel/connectors/seatunnel/hudi/config/HudiSourceConfig.java b/seatunnel-connectors-v2/connector-hudi/src/main/java/org/apache/seatunnel/connectors/seatunnel/hudi/config/HudiSourceConfig.java new file mode 100644 index 00000000000..9f1b86e94a2 --- /dev/null +++ b/seatunnel-connectors-v2/connector-hudi/src/main/java/org/apache/seatunnel/connectors/seatunnel/hudi/config/HudiSourceConfig.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.hudi.config; + +public class HudiSourceConfig { + + public static final String TABLE_PATH = "table.path"; + + public static final String TABLE_TYPE = "table.type"; + + public static final String CONF_FILES = "conf.files"; + + public static final String USE_KERBEROS = "use.kerberos"; + + public static final String KERBEROS_PRINCIPAL = "kerberos.principal"; + + public static final String KERBEROS_PRINCIPAL_FILE = "kerberos.principal.file"; + +} diff --git a/seatunnel-connectors-v2/connector-hudi/src/main/java/org/apache/seatunnel/connectors/seatunnel/hudi/exception/HudiPluginException.java b/seatunnel-connectors-v2/connector-hudi/src/main/java/org/apache/seatunnel/connectors/seatunnel/hudi/exception/HudiPluginException.java new file mode 100644 index 00000000000..6971e029e08 --- /dev/null +++ b/seatunnel-connectors-v2/connector-hudi/src/main/java/org/apache/seatunnel/connectors/seatunnel/hudi/exception/HudiPluginException.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.hudi.exception; + +public class HudiPluginException extends Exception{ + + public HudiPluginException(String message) { + super(message); + } + + public HudiPluginException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/seatunnel-connectors-v2/connector-hudi/src/main/java/org/apache/seatunnel/connectors/seatunnel/hudi/source/HudiSource.java b/seatunnel-connectors-v2/connector-hudi/src/main/java/org/apache/seatunnel/connectors/seatunnel/hudi/source/HudiSource.java new file mode 100644 index 00000000000..2ca69d784e7 --- /dev/null +++ b/seatunnel-connectors-v2/connector-hudi/src/main/java/org/apache/seatunnel/connectors/seatunnel/hudi/source/HudiSource.java @@ -0,0 +1,139 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.hudi.source; + +import static org.apache.seatunnel.connectors.seatunnel.hudi.config.HudiSourceConfig.CONF_FILES; +import static org.apache.seatunnel.connectors.seatunnel.hudi.config.HudiSourceConfig.KERBEROS_PRINCIPAL; +import static org.apache.seatunnel.connectors.seatunnel.hudi.config.HudiSourceConfig.KERBEROS_PRINCIPAL_FILE; +import static org.apache.seatunnel.connectors.seatunnel.hudi.config.HudiSourceConfig.TABLE_PATH; +import static org.apache.seatunnel.connectors.seatunnel.hudi.config.HudiSourceConfig.TABLE_TYPE; +import static org.apache.seatunnel.connectors.seatunnel.hudi.config.HudiSourceConfig.USE_KERBEROS; + +import org.apache.seatunnel.api.common.PrepareFailException; +import org.apache.seatunnel.api.common.SeaTunnelContext; +import org.apache.seatunnel.api.source.Boundedness; +import org.apache.seatunnel.api.source.SeaTunnelSource; +import org.apache.seatunnel.api.source.SourceReader; +import org.apache.seatunnel.api.source.SourceSplitEnumerator; +import org.apache.seatunnel.api.table.type.SeaTunnelDataType; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.common.config.CheckConfigUtil; +import org.apache.seatunnel.common.config.CheckResult; +import org.apache.seatunnel.common.constants.PluginType; +import org.apache.seatunnel.connectors.seatunnel.hudi.exception.HudiPluginException; +import org.apache.seatunnel.connectors.seatunnel.hudi.util.HudiUtil; + +import org.apache.seatunnel.shade.com.typesafe.config.Config; + +import com.google.auto.service.AutoService; + +import java.io.IOException; + +@AutoService(SeaTunnelSource.class) +public class HudiSource implements SeaTunnelSource { + + private SeaTunnelContext seaTunnelContext; + + private SeaTunnelRowType typeInfo; + + private String filePath; + + private String tablePath; + + private String confFiles; + + private boolean useKerberos = false; + + @Override + public String getPluginName() { + return "Hudi"; + } + + @Override + public void prepare(Config pluginConfig) { + CheckResult result = CheckConfigUtil.checkAllExists(pluginConfig, TABLE_PATH, CONF_FILES); + if (!result.isSuccess()) { + throw new PrepareFailException(getPluginName(), PluginType.SOURCE, result.getMsg()); + } + // default hudi table tupe is cow + // TODO: support hudi mor table + // TODO: support Incremental Query and Read Optimized Query + if (!"cow".equalsIgnoreCase(pluginConfig.getString(TABLE_TYPE))) { + throw new PrepareFailException(getPluginName(), PluginType.SOURCE, "Do not support hudi mor table yet!"); + } + try { + this.confFiles = pluginConfig.getString(CONF_FILES); + this.tablePath = pluginConfig.getString(TABLE_PATH); + if (CheckConfigUtil.isValidParam(pluginConfig, USE_KERBEROS)) { + this.useKerberos = pluginConfig.getBoolean(USE_KERBEROS); + if (this.useKerberos) { + CheckResult kerberosCheckResult = CheckConfigUtil.checkAllExists(pluginConfig, KERBEROS_PRINCIPAL, KERBEROS_PRINCIPAL_FILE); + if (!kerberosCheckResult.isSuccess()) { + throw new PrepareFailException(getPluginName(), PluginType.SOURCE, result.getMsg()); + } + HudiUtil.initKerberosAuthentication(HudiUtil.getConfiguration(this.confFiles), pluginConfig.getString(KERBEROS_PRINCIPAL), pluginConfig.getString(KERBEROS_PRINCIPAL_FILE)); + } + } + this.filePath = HudiUtil.getParquetFileByPath(this.confFiles, tablePath); + if (this.filePath == null) { + throw new HudiPluginException(String.format("%s has no parquet file, please check!", tablePath)); + } + // should read from config or read from hudi metadata( wait catlog done) + this.typeInfo = HudiUtil.getSeaTunnelRowTypeInfo(this.confFiles, this.filePath); + + } catch (HudiPluginException | IOException e) { + throw new PrepareFailException(getPluginName(), PluginType.SOURCE, "Prepare HudiSource error.", e); + } + + } + + @Override + public void setSeaTunnelContext(SeaTunnelContext seaTunnelContext) { + this.seaTunnelContext = seaTunnelContext; + } + + @Override + public SeaTunnelDataType getProducedType() { + return this.typeInfo; + } + + @Override + public SourceReader createReader(SourceReader.Context readerContext) throws Exception { + return new HudiSourceReader(this.confFiles, readerContext, typeInfo); + } + + @Override + public Boundedness getBoundedness() { + // Only support Snapshot Query now. + // After support Incremental Query and Read Optimized Query, we should supoort UNBOUNDED. + // TODO: support UNBOUNDED + return Boundedness.BOUNDED; + } + + @Override + public SourceSplitEnumerator createEnumerator(SourceSplitEnumerator.Context enumeratorContext) throws Exception { + return new HudiSourceSplitEnumerator(enumeratorContext, tablePath, this.confFiles); + } + + @Override + public SourceSplitEnumerator restoreEnumerator(SourceSplitEnumerator.Context enumeratorContext, HudiSourceState checkpointState) throws Exception { + return new HudiSourceSplitEnumerator(enumeratorContext, tablePath, this.confFiles, checkpointState); + } + +} diff --git a/seatunnel-connectors-v2/connector-hudi/src/main/java/org/apache/seatunnel/connectors/seatunnel/hudi/source/HudiSourceReader.java b/seatunnel-connectors-v2/connector-hudi/src/main/java/org/apache/seatunnel/connectors/seatunnel/hudi/source/HudiSourceReader.java new file mode 100644 index 00000000000..15127ae5ba1 --- /dev/null +++ b/seatunnel-connectors-v2/connector-hudi/src/main/java/org/apache/seatunnel/connectors/seatunnel/hudi/source/HudiSourceReader.java @@ -0,0 +1,141 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.hudi.source; + +import org.apache.seatunnel.api.source.Collector; +import org.apache.seatunnel.api.source.SourceReader; +import org.apache.seatunnel.api.table.type.SeaTunnelDataType; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.connectors.seatunnel.hudi.util.HudiUtil; + +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.io.ArrayWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.RecordReader; +import org.apache.hadoop.mapred.Reporter; +import org.apache.hudi.hadoop.HoodieParquetInputFormat; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Locale; +import java.util.Properties; +import java.util.Set; + +public class HudiSourceReader implements SourceReader { + + private static final long THREAD_WAIT_TIME = 500L; + + private String confPaths; + + private Set sourceSplits; + + private final SourceReader.Context context; + + private SeaTunnelRowType seaTunnelRowType; + + public HudiSourceReader(String confPaths, SourceReader.Context context, SeaTunnelRowType seaTunnelRowType) { + this.confPaths = confPaths; + this.context = context; + this.sourceSplits = new HashSet<>(); + this.seaTunnelRowType = seaTunnelRowType; + } + + @Override + public void open() { + } + + @Override + public void close() { + + } + + @Override + public void pollNext(Collector output) throws Exception { + if (sourceSplits.isEmpty()) { + Thread.sleep(THREAD_WAIT_TIME); + return; + } + Configuration configuration = HudiUtil.getConfiguration(this.confPaths); + JobConf jobConf = HudiUtil.toJobConf(configuration); + sourceSplits.forEach(source -> { + try { + HoodieParquetInputFormat inputFormat = new HoodieParquetInputFormat(); + RecordReader reader = inputFormat.getRecordReader(source.getInputSplit(), jobConf, Reporter.NULL); + ParquetHiveSerDe serde = new ParquetHiveSerDe(); + Properties properties = new Properties(); + List types = new ArrayList<>(); + for (SeaTunnelDataType type: seaTunnelRowType.getFieldTypes()) { + types.add(type.getSqlType().name()); + } + String columns = StringUtils.join(seaTunnelRowType.getFieldNames(), ","); + String columnTypes = StringUtils.join(types, ",").toLowerCase(Locale.ROOT); + properties.setProperty("columns", columns); + properties.setProperty("columns.types", columnTypes); + serde.initialize(jobConf, properties); + StructObjectInspector inspector = (StructObjectInspector) serde.getObjectInspector(); + List fields = inspector.getAllStructFieldRefs(); + NullWritable key = reader.createKey(); + ArrayWritable value = reader.createValue(); + while (reader.next(key, value)) { + Object[] datas = new Object[fields.size()]; + for (int i = 0; i < fields.size(); i++) { + Object data = inspector.getStructFieldData(value, fields.get(i)); + if (null != data) { + datas[i] = String.valueOf(data); + } else { + datas[i] = null; + } + } + output.collect(new SeaTunnelRow(datas)); + } + reader.close(); + } catch (Exception e) { + throw new RuntimeException("Hudi source read error", e); + } + + }); + context.signalNoMoreElement(); + } + + @Override + public List snapshotState(long checkpointId) { + return new ArrayList<>(sourceSplits); + } + + @Override + public void addSplits(List splits) { + sourceSplits.addAll(splits); + } + + @Override + public void handleNoMoreSplits() { + + } + + @Override + public void notifyCheckpointComplete(long checkpointId) { + + } +} diff --git a/seatunnel-connectors-v2/connector-hudi/src/main/java/org/apache/seatunnel/connectors/seatunnel/hudi/source/HudiSourceSplit.java b/seatunnel-connectors-v2/connector-hudi/src/main/java/org/apache/seatunnel/connectors/seatunnel/hudi/source/HudiSourceSplit.java new file mode 100644 index 00000000000..b08f6f68e11 --- /dev/null +++ b/seatunnel-connectors-v2/connector-hudi/src/main/java/org/apache/seatunnel/connectors/seatunnel/hudi/source/HudiSourceSplit.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.hudi.source; + +import org.apache.seatunnel.api.source.SourceSplit; + +import org.apache.hadoop.mapred.InputSplit; + +public class HudiSourceSplit implements SourceSplit { + + private static final long serialVersionUID = -1L; + + private String splitId; + + private InputSplit inputSplit; + + public HudiSourceSplit(String splitId, InputSplit inputSplit) { + this.splitId = splitId; + this.inputSplit = inputSplit; + } + + @Override + public String splitId() { + return this.splitId; + } + + public InputSplit getInputSplit() { + return this.inputSplit; + } +} diff --git a/seatunnel-connectors-v2/connector-hudi/src/main/java/org/apache/seatunnel/connectors/seatunnel/hudi/source/HudiSourceSplitEnumerator.java b/seatunnel-connectors-v2/connector-hudi/src/main/java/org/apache/seatunnel/connectors/seatunnel/hudi/source/HudiSourceSplitEnumerator.java new file mode 100644 index 00000000000..42b072af6c1 --- /dev/null +++ b/seatunnel-connectors-v2/connector-hudi/src/main/java/org/apache/seatunnel/connectors/seatunnel/hudi/source/HudiSourceSplitEnumerator.java @@ -0,0 +1,144 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.hudi.source; + +import org.apache.seatunnel.api.source.SourceSplitEnumerator; +import org.apache.seatunnel.common.config.Common; +import org.apache.seatunnel.connectors.seatunnel.hudi.util.HudiUtil; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapred.FileInputFormat; +import org.apache.hadoop.mapred.InputSplit; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hudi.hadoop.HoodieParquetInputFormat; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +public class HudiSourceSplitEnumerator implements SourceSplitEnumerator { + + private final Context context; + private Set pendingSplit; + private Set assignedSplit; + private String tablePath; + private String confPaths; + + public HudiSourceSplitEnumerator(SourceSplitEnumerator.Context context, String tablePath, String confPaths) { + this.context = context; + this.tablePath = tablePath; + this.confPaths = confPaths; + } + + public HudiSourceSplitEnumerator(SourceSplitEnumerator.Context context, String tablePath, + String confPaths, + HudiSourceState sourceState) { + this(context, tablePath, confPaths); + this.assignedSplit = sourceState.getAssignedSplit(); + } + + @Override + public void open() { + this.assignedSplit = new HashSet<>(); + this.pendingSplit = new HashSet<>(); + } + + @Override + public void run() throws Exception { + pendingSplit = getHudiSplit(); + assignSplit(context.registeredReaders()); + } + + private Set getHudiSplit() throws IOException { + Set hudiSourceSplits = new HashSet<>(); + Path path = new Path(tablePath); + Configuration configuration = HudiUtil.getConfiguration(confPaths); + JobConf jobConf = HudiUtil.toJobConf(configuration); + FileInputFormat.setInputPaths(jobConf, path); + HoodieParquetInputFormat inputFormat = new HoodieParquetInputFormat(); + inputFormat.setConf(jobConf); + for (InputSplit split: inputFormat.getSplits(jobConf, 0)) { + hudiSourceSplits.add(new HudiSourceSplit(split.toString(), split)); + } + return hudiSourceSplits; + } + + @Override + public void close() throws IOException { + + } + + @Override + public void addSplitsBack(List splits, int subtaskId) { + if (!splits.isEmpty()) { + pendingSplit.addAll(splits); + assignSplit(Collections.singletonList(subtaskId)); + } + } + + private void assignSplit(Collection taskIDList) { + Map> readySplit = new HashMap<>(Common.COLLECTION_SIZE); + for (int taskID : taskIDList) { + readySplit.computeIfAbsent(taskID, id -> new ArrayList<>()); + } + + pendingSplit.forEach(s -> readySplit.get(getSplitOwner(s.splitId(), taskIDList.size())) + .add(s)); + readySplit.forEach(context::assignSplit); + assignedSplit.addAll(pendingSplit); + pendingSplit.clear(); + } + + private static int getSplitOwner(String tp, int numReaders) { + return tp.hashCode() % numReaders; + } + + @Override + public int currentUnassignedSplitSize() { + return pendingSplit.size(); + } + + @Override + public void registerReader(int subtaskId) { + if (!pendingSplit.isEmpty()) { + assignSplit(Collections.singletonList(subtaskId)); + } + } + + @Override + public HudiSourceState snapshotState(long checkpointId) { + return new HudiSourceState(assignedSplit); + } + + @Override + public void notifyCheckpointComplete(long checkpointId) { + + } + + @Override + public void handleSplitRequest(int subtaskId) { + + } +} diff --git a/seatunnel-connectors-v2/connector-hudi/src/main/java/org/apache/seatunnel/connectors/seatunnel/hudi/source/HudiSourceState.java b/seatunnel-connectors-v2/connector-hudi/src/main/java/org/apache/seatunnel/connectors/seatunnel/hudi/source/HudiSourceState.java new file mode 100644 index 00000000000..6235ca694ae --- /dev/null +++ b/seatunnel-connectors-v2/connector-hudi/src/main/java/org/apache/seatunnel/connectors/seatunnel/hudi/source/HudiSourceState.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.hudi.source; + +import java.io.Serializable; +import java.util.Set; + +public class HudiSourceState implements Serializable { + + + private Set assignedSplit; + + public HudiSourceState(Set assignedSplit) { + this.assignedSplit = assignedSplit; + } + + public Set getAssignedSplit() { + return assignedSplit; + } +} diff --git a/seatunnel-connectors-v2/connector-hudi/src/main/java/org/apache/seatunnel/connectors/seatunnel/hudi/util/HudiUtil.java b/seatunnel-connectors-v2/connector-hudi/src/main/java/org/apache/seatunnel/connectors/seatunnel/hudi/util/HudiUtil.java new file mode 100644 index 00000000000..346232b9534 --- /dev/null +++ b/seatunnel-connectors-v2/connector-hudi/src/main/java/org/apache/seatunnel/connectors/seatunnel/hudi/util/HudiUtil.java @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.hudi.util; + +import static org.apache.parquet.format.converter.ParquetMetadataConverter.NO_FILTER; + +import org.apache.seatunnel.api.table.type.BasicType; +import org.apache.seatunnel.api.table.type.SeaTunnelDataType; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.connectors.seatunnel.hudi.exception.HudiPluginException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.parquet.hadoop.ParquetFileReader; +import org.apache.parquet.hadoop.metadata.ParquetMetadata; +import org.apache.parquet.schema.MessageType; + +import java.io.IOException; +import java.util.Arrays; + +public class HudiUtil { + + public static Configuration getConfiguration(String confPaths) { + Configuration configuration = new Configuration(); + Arrays.stream(confPaths.split(";")).forEach(file -> configuration.addResource(new Path(file))); + return configuration; + } + + public static String getParquetFileByPath(String confPaths, String path) throws IOException { + Configuration configuration = getConfiguration(confPaths); + FileSystem hdfs = FileSystem.get(configuration); + Path listFiles = new Path(path); + FileStatus[] stats = hdfs.listStatus(listFiles); + for (FileStatus fileStatus : stats) { + if (fileStatus.isDirectory()) { + String filePath = getParquetFileByPath(confPaths, fileStatus.getPath().toString()); + if (filePath == null) { + continue; + } else { + return filePath; + } + } + if (fileStatus.isFile()) { + if (fileStatus.getPath().toString().endsWith("parquet")) { + return fileStatus.getPath().toString(); + } + } + } + return null; + } + + public static SeaTunnelRowType getSeaTunnelRowTypeInfo(String confPaths, String path) throws HudiPluginException { + Configuration configuration = getConfiguration(confPaths); + Path dstDir = new Path(path); + ParquetMetadata footer; + try { + footer = ParquetFileReader.readFooter(configuration, dstDir, NO_FILTER); + } catch (IOException e) { + throw new HudiPluginException("Create ParquetMetadata Fail!", e); + } + MessageType schema = footer.getFileMetaData().getSchema(); + String[] fields = new String[schema.getFields().size()]; + SeaTunnelDataType[] types = new SeaTunnelDataType[schema.getFields().size()]; + + for (int i = 0; i < schema.getFields().size(); i++) { + fields[i] = schema.getFields().get(i).getName(); + types[i] = BasicType.STRING_TYPE; + } + return new SeaTunnelRowType(fields, types); + } + + public static JobConf toJobConf(Configuration conf) { + if (conf instanceof JobConf) { + return (JobConf) conf; + } + return new JobConf(conf); + } + + public static void initKerberosAuthentication(Configuration conf, String principal, String principalFile) throws HudiPluginException { + try { + UserGroupInformation.setConfiguration(conf); + UserGroupInformation.loginUserFromKeytab(principal, principalFile); + } catch (IOException e) { + throw new HudiPluginException("Kerberos Authorized Fail!", e); + } + + } + +} diff --git a/seatunnel-connectors-v2/connector-jdbc/pom.xml b/seatunnel-connectors-v2/connector-jdbc/pom.xml index 1bb972ac96c..39839eba700 100644 --- a/seatunnel-connectors-v2/connector-jdbc/pom.xml +++ b/seatunnel-connectors-v2/connector-jdbc/pom.xml @@ -46,6 +46,7 @@ org.postgresql postgresql + provided diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/MySqlCatalog.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/MySqlCatalog.java index 4f6e61ce573..c3aa84ae45c 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/MySqlCatalog.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/MySqlCatalog.java @@ -130,7 +130,7 @@ public CatalogTable getTable(TablePath tablePath) throws CatalogException, Table getPrimaryKey(metaData, tablePath.getDatabaseName(), tablePath.getTableName()); PreparedStatement ps = - conn.prepareStatement(String.format("SELECT * FROM %s LIMIT 1;", tablePath.getFullName())); + conn.prepareStatement(String.format("SELECT * FROM %s WHERE 1 = 0;", tablePath.getFullName())); ResultSetMetaData tableMetaData = ps.getMetaData(); diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/xa/SemanticXidGenerator.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/xa/SemanticXidGenerator.java index 98825a57a59..3d2a82b3d13 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/xa/SemanticXidGenerator.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/xa/SemanticXidGenerator.java @@ -80,8 +80,7 @@ public boolean belongsToSubtask(Xid xid, SeaTunnelContext context, SinkWriter.Co return false; } int subtaskIndex = readNumber(xid.getGlobalTransactionId(), JOB_ID_BYTES, Integer.BYTES); - if (subtaskIndex != sinkContext.getIndexOfSubtask() - && subtaskIndex <= sinkContext.getNumberOfParallelSubtasks() - 1) { + if (subtaskIndex != sinkContext.getIndexOfSubtask()) { return false; } byte[] jobIdBytes = new byte[JOB_ID_BYTES]; diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcSinkAggregatedCommitter.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcSinkAggregatedCommitter.java index f0ee2671eda..d142e95bbe2 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcSinkAggregatedCommitter.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcSinkAggregatedCommitter.java @@ -83,7 +83,9 @@ public void abort(List aggregatedCommitInfo) throws IO public void close() throws IOException { try { - xaFacade.close(); + if (xaFacade.isOpen()) { + xaFacade.close(); + } } catch (Exception e) { ExceptionUtils.rethrowIOException(e); } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/source/JdbcSource.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/source/JdbcSource.java index 73a61b3a386..2717436f194 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/source/JdbcSource.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/source/JdbcSource.java @@ -19,7 +19,6 @@ import org.apache.seatunnel.api.common.PrepareFailException; import org.apache.seatunnel.api.common.SeaTunnelContext; -import org.apache.seatunnel.api.serialization.DefaultSerializer; import org.apache.seatunnel.api.serialization.Serializer; import org.apache.seatunnel.api.source.Boundedness; import org.apache.seatunnel.api.source.SeaTunnelSource; @@ -141,11 +140,6 @@ public SourceSplitEnumerator restoreEnumerator return new JdbcSourceSplitEnumerator(enumeratorContext, jdbcSourceOptions, partitionParameter); } - @Override - public Serializer getEnumeratorStateSerializer() { - return new DefaultSerializer<>(); - } - private SeaTunnelRowType initTableField(Connection conn) { JdbcDialectTypeMapper jdbcDialectTypeMapper = jdbcDialect.getJdbcDialectTypeMapper(); ArrayList> seaTunnelDataTypes = new ArrayList<>(); diff --git a/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSource.java b/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSource.java index b5a3d42d6b4..a4d534e6775 100644 --- a/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSource.java +++ b/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSource.java @@ -25,8 +25,6 @@ import org.apache.seatunnel.api.common.PrepareFailException; import org.apache.seatunnel.api.common.SeaTunnelContext; -import org.apache.seatunnel.api.serialization.DefaultSerializer; -import org.apache.seatunnel.api.serialization.Serializer; import org.apache.seatunnel.api.source.Boundedness; import org.apache.seatunnel.api.source.SeaTunnelSource; import org.apache.seatunnel.api.source.SourceReader; @@ -120,11 +118,6 @@ public SourceSplitEnumerator restoreEnumerat return new KafkaSourceSplitEnumerator(this.metadata, enumeratorContext, checkpointState); } - @Override - public Serializer getEnumeratorStateSerializer() { - return new DefaultSerializer<>(); - } - @Override public void setSeaTunnelContext(SeaTunnelContext seaTunnelContext) { this.seaTunnelContext = seaTunnelContext; diff --git a/seatunnel-connectors-v2/connector-kudu/pom.xml b/seatunnel-connectors-v2/connector-kudu/pom.xml new file mode 100644 index 00000000000..c8d3591abb1 --- /dev/null +++ b/seatunnel-connectors-v2/connector-kudu/pom.xml @@ -0,0 +1,30 @@ + + + + seatunnel-connectors-v2 + org.apache.seatunnel + ${revision} + + 4.0.0 + + connector-kudu + + + + org.apache.seatunnel + seatunnel-api + ${project.version} + + + org.apache.kudu + kudu-client + + + + org.apache.commons + commons-lang3 + + + \ No newline at end of file diff --git a/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/config/KuduSinkConfig.java b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/config/KuduSinkConfig.java new file mode 100644 index 00000000000..8f4f3ae44bf --- /dev/null +++ b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/config/KuduSinkConfig.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.kudu.config; + +import lombok.Data; +import lombok.NonNull; + +import org.apache.commons.lang3.StringUtils; +import org.apache.seatunnel.shade.com.typesafe.config.Config; + + + +@Data +public class KuduSinkConfig { + + private static final String KUDU_SAVE_MODE = "save_mode"; + private static final String KUDU_MASTER = "kudu_master"; + private static final String KUDU_TABLE_NAME = "kudu_table"; + + private SaveMode saveMode = SaveMode.APPEND; + + private String kuduMaster; + + /** + * Specifies the name of the table + */ + private String kuduTableName; + + public enum SaveMode { + APPEND(), + OVERWRITE(); + + public static SaveMode fromStr(String str) { + if ("overwrite".equals(str)) { + return OVERWRITE; + } else { + return APPEND; + } + } + } + + public KuduSinkConfig(@NonNull Config pluginConfig) { + + this.saveMode = StringUtils.isBlank(pluginConfig.getString(KUDU_SAVE_MODE)) ? SaveMode.APPEND : SaveMode.fromStr(pluginConfig.getString(KUDU_SAVE_MODE)); + + this.kuduMaster = pluginConfig.getString(KUDU_MASTER); + this.kuduTableName = pluginConfig.getString(KUDU_TABLE_NAME); + + + } +} diff --git a/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/config/KuduSourceConfig.java b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/config/KuduSourceConfig.java new file mode 100644 index 00000000000..9b148140c12 --- /dev/null +++ b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/config/KuduSourceConfig.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.kudu.config; + + + +import java.io.Serializable; + +public class KuduSourceConfig implements Serializable { + //kudu master ip + public static final String kuduMaster = "kudu_master"; + + public static final String tableName = "kudu_table"; + + public static final String columnsList = "columnsList"; + + + +} diff --git a/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/kuduclient/KuduInputFormat.java b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/kuduclient/KuduInputFormat.java new file mode 100644 index 00000000000..6c9a2e4629c --- /dev/null +++ b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/kuduclient/KuduInputFormat.java @@ -0,0 +1,173 @@ +package org.apache.seatunnel.connectors.seatunnel.kudu.kuduclient; + +import org.apache.kudu.ColumnSchema; +import org.apache.kudu.Schema; +import org.apache.kudu.client.*; +import org.apache.seatunnel.api.common.PrepareFailException; +import org.apache.seatunnel.api.table.type.*; +import org.apache.seatunnel.common.constants.PluginType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Serializable; +import java.math.BigDecimal; +import java.math.BigInteger; +import java.sql.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class KuduInputFormat implements Serializable { + private static final Logger logger = LoggerFactory.getLogger(KuduInputFormat.class); + + public KuduInputFormat(String kuduMaster,String tableName,String columnsList){ + this.kuduMaster=kuduMaster; + this.columnsList=Arrays.asList(columnsList.split(",")); + this.tableName=tableName; + // openInputFormat(); + } + /** + * Declare the global variable KuduClient and use it to manipulate the Kudu table + */ + public KuduClient kuduClient; + + /** + * Specify kuduMaster address + */ + public String kuduMaster; + public List columnsList; + public Schema schema; + public String keyColumn; + + /** + * Specifies the name of the table + */ + public String tableName; + public List getColumnsSchemas(){ + List columns = null; + try { + schema = kuduClient.openTable(tableName).getSchema(); + keyColumn = schema.getPrimaryKeyColumns().get(0).getName(); + columns =schema.getColumns(); + } catch (KuduException e) { + e.printStackTrace(); + } + return columns; + } + + public static SeaTunnelRow getSeaTunnelRowData(RowResult rs, SeaTunnelRowType typeInfo) throws SQLException { + + List fields = new ArrayList<>(); + SeaTunnelDataType[] seaTunnelDataTypes = typeInfo.getFieldTypes(); + + for (int i = 0; i < seaTunnelDataTypes.length; i++) { + Object seatunnelField; + SeaTunnelDataType seaTunnelDataType = seaTunnelDataTypes[i]; + if (null == rs.getObject(i)) { + seatunnelField = null; + } else if (BasicType.BOOLEAN_TYPE.equals(seaTunnelDataType)) { + seatunnelField = rs.getBoolean(i); + } else if (BasicType.BYTE_TYPE.equals(seaTunnelDataType)) { + seatunnelField = rs.getByte(i); + } else if (BasicType.SHORT_TYPE.equals(seaTunnelDataType)) { + seatunnelField = rs.getShort(i); + } else if (BasicType.INT_TYPE.equals(seaTunnelDataType)) { + seatunnelField = rs.getInt(i); + } else if (BasicType.LONG_TYPE.equals(seaTunnelDataType)) { + seatunnelField = rs.getLong(i); + } else if (seaTunnelDataType instanceof DecimalType) { + Object value = rs.getObject(i); + seatunnelField = value instanceof BigInteger ? + new BigDecimal((BigInteger) value, 0) + : value; + } else if (BasicType.FLOAT_TYPE.equals(seaTunnelDataType)) { + seatunnelField = rs.getFloat(i); + } else if (BasicType.DOUBLE_TYPE.equals(seaTunnelDataType)) { + seatunnelField = rs.getDouble(i); + } else if (BasicType.STRING_TYPE.equals(seaTunnelDataType)) { + seatunnelField = rs.getString(i); + } else { + throw new IllegalStateException("Unexpected value: " + seaTunnelDataType); + } + fields.add(seatunnelField); + } + + return new SeaTunnelRow(fields.toArray()); + } + + public SeaTunnelRowType getSeaTunnelRowType(List columnSchemaList) { + + ArrayList> seaTunnelDataTypes = new ArrayList<>(); + ArrayList fieldNames = new ArrayList<>(); + try { + + for (int i = 0; i < columnSchemaList.size(); i++) { + fieldNames.add(columnSchemaList.get(i).getName()); + seaTunnelDataTypes.add(KuduTypeMapper.mapping(columnSchemaList, i)); + } + } catch (Exception e) { + logger .warn("get row type info exception", e); + throw new PrepareFailException("kudu", PluginType.SOURCE, e.toString()); + } + return new SeaTunnelRowType(fieldNames.toArray(new String[fieldNames.size()]), seaTunnelDataTypes.toArray(new SeaTunnelDataType[seaTunnelDataTypes.size()])); + } + + public void openInputFormat() { + + KuduClient.KuduClientBuilder kuduClientBuilder = new + KuduClient.KuduClientBuilder(kuduMaster); + kuduClientBuilder.defaultOperationTimeoutMs(1800000); + + kuduClient = kuduClientBuilder.build(); + + logger.info("The Kudu client is successfully initialized", kuduMaster, kuduClient); + + } + + + /** + * + * @param lowerBound The beginning of each slice + * @param upperBound End of each slice + * @return Get the kuduScanner object for each slice + */ + public KuduScanner getKuduBuildSplit(int lowerBound,int upperBound){ + KuduScanner kuduScanner = null; + try { + KuduScanner.KuduScannerBuilder kuduScannerBuilder = + kuduClient.newScannerBuilder(kuduClient.openTable(tableName)); + + kuduScannerBuilder.setProjectedColumnNames(columnsList); + + KuduPredicate lowerPred = KuduPredicate.newComparisonPredicate( + schema.getColumn(""+keyColumn), + KuduPredicate.ComparisonOp.GREATER_EQUAL, + lowerBound); + + KuduPredicate upperPred = KuduPredicate.newComparisonPredicate( + schema.getColumn(""+keyColumn), + KuduPredicate.ComparisonOp.LESS, + upperBound); + + kuduScanner = kuduScannerBuilder.addPredicate(lowerPred) + .addPredicate(upperPred).build(); + } catch (KuduException e) { + e.printStackTrace(); + logger .warn("get the Kuduscan object for each splice exception", e); + } + return kuduScanner; + } + + public void closeInputFormat() { + if (kuduClient != null) { + try { + kuduClient.close(); + } catch ( KuduException e) { + logger.warn("Kudu Client close failed.", e); + } finally { + kuduClient = null; + } + } + + } +} diff --git a/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/kuduclient/KuduOutputFormat.java b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/kuduclient/KuduOutputFormat.java new file mode 100644 index 00000000000..40cd7f1a4b9 --- /dev/null +++ b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/kuduclient/KuduOutputFormat.java @@ -0,0 +1,165 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.kudu.kuduclient; + + +import org.apache.kudu.ColumnSchema; +import org.apache.kudu.Schema; +import org.apache.kudu.client.*; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.connectors.seatunnel.kudu.config.KuduSinkConfig; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Serializable; +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.sql.Timestamp; + +import static com.google.common.base.Preconditions.checkNotNull; + +/** + * A Kudu outputFormat + */ +public class KuduOutputFormat + implements Serializable { + private static final Logger logger = LoggerFactory.getLogger(KuduOutputFormat.class); + + private String kuduMaster; + private String kuduTableName; + private KuduClient kuduClient; + private KuduSession kuduSession; + private KuduTable kuduTable; + + + public KuduOutputFormat(KuduSinkConfig kuduSinkConfig) { + this.kuduMaster = kuduSinkConfig.getKuduMaster(); + this.kuduTableName = kuduSinkConfig.getKuduTableName(); + init(); + } + + public void write(SeaTunnelRow element) { + + Insert insert = kuduTable.newInsert(); + Schema schema = kuduTable.getSchema(); + + int columnCount = schema.getColumnCount(); + PartialRow row = insert.getRow(); + for (int columnIndex = 0; columnIndex < columnCount; columnIndex++) { + ColumnSchema col = schema.getColumnByIndex(columnIndex); + try { + switch (col.getType()) { + case BOOL: + row.addBoolean(columnIndex, (Boolean) element.getField(columnIndex)); + break; + case INT8: + row.addByte(columnIndex, (Byte) element.getField(columnIndex)); + break; + case INT16: + row.addShort(columnIndex, (Short) element.getField(columnIndex)); + break; + case INT32: + row.addInt(columnIndex, (Integer) element.getField(columnIndex)); + break; + case INT64: + row.addLong(columnIndex, (Long) element.getField(columnIndex)); + break; + case UNIXTIME_MICROS: + if (element.getField(columnIndex) instanceof Timestamp) { + row.addTimestamp(columnIndex, (Timestamp) element.getField(columnIndex)); + } else { + row.addLong(columnIndex, (Long) element.getField(columnIndex)); + } + break; + case FLOAT: + row.addFloat(columnIndex, (Float) element.getField(columnIndex)); + break; + case DOUBLE: + row.addDouble(columnIndex, (Double) element.getField(columnIndex)); + break; + case STRING: + row.addString(columnIndex, element.getField(columnIndex).toString()); + break; + case BINARY: + if (element.getField(columnIndex) instanceof byte[]) { + row.addBinary(columnIndex, (byte[]) element.getField(columnIndex)); + } else { + row.addBinary(columnIndex, (ByteBuffer) element.getField(columnIndex)); + } + break; + case DECIMAL: + row.addDecimal(columnIndex, (BigDecimal) element.getField(columnIndex)); + break; + default: + throw new IllegalArgumentException("Unsupported column type: " + col.getType()); + } + } catch (ClassCastException e) { + e.printStackTrace(); + throw new IllegalArgumentException( + "Value type does not match column type " + col.getType() + + " for column " + col.getName()); + } + + } + + try { + kuduSession.apply(insert); + + } catch (KuduException e) { + e.printStackTrace(); + } + + } + + public void init() { + + + KuduClient.KuduClientBuilder kuduClientBuilder = new + KuduClient.KuduClientBuilder(kuduMaster); + kuduClientBuilder.defaultOperationTimeoutMs(1800000); + + this.kuduClient = kuduClientBuilder.build(); + this.kuduSession = kuduClient.newSession(); + this.kuduSession.setTimeoutMillis(100000); + this.kuduSession.setFlushMode(SessionConfiguration.FlushMode.AUTO_FLUSH_SYNC); + + try { + kuduTable = kuduClient.openTable(kuduTableName); + } catch (KuduException e) { + e.printStackTrace(); + } + + + logger.info("The Kudu client is successfully initialized", kuduMaster, kuduClient); + } + + + public void closeOutputFormat() { + if (kuduClient != null) { + try { + kuduClient.close(); + kuduSession.close(); + } catch ( KuduException e) { + logger.warn("Kudu Client close failed.", e); + } finally { + kuduClient = null; + kuduSession = null; + } + } + } +} diff --git a/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/kuduclient/KuduTypeMapper.java b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/kuduclient/KuduTypeMapper.java new file mode 100644 index 00000000000..4f63c61359d --- /dev/null +++ b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/kuduclient/KuduTypeMapper.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.kudu.kuduclient; + +import org.apache.kudu.ColumnSchema; +import org.apache.seatunnel.api.table.type.*; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.util.List; + +public class KuduTypeMapper { + + private static final Logger LOG = LoggerFactory.getLogger(KuduTypeMapper.class); + + // ============================data types===================== + + private static final String KUDU_UNKNOWN = "UNKNOWN"; + private static final String KUDU_BIT = "BOOL"; + + // -------------------------number---------------------------- + private static final String KUDU_TINYINT = "INT8"; + private static final String KUDU_MEDIUMINT = "INT32"; + private static final String KUDU_INT = "INT16"; + private static final String KUDU_BIGINT = "INT64"; + + private static final String KUDU_FLOAT = "FLOAT"; + + private static final String KUDU_DOUBLE = "DOUBLE"; + private static final String KUDU_DECIMAL = "DECIMAL32"; + + + // -------------------------string---------------------------- + + private static final String KUDU_VARCHAR = "STRING"; + + + // ------------------------------time------------------------- + + private static final String KUDU_UNIXTIME_MICROS = "UNIXTIME_MICROS"; + + + // ------------------------------blob------------------------- + + private static final String KUDU_BINARY = "BINARY"; + + + + + public static SeaTunnelDataType mapping(List columnSchemaList, int colIndex) throws SQLException { + String KUDUType = columnSchemaList.get(colIndex).getType().getName().toUpperCase(); + + switch (KUDUType) { + case KUDU_BIT: + return BasicType.BOOLEAN_TYPE; + case KUDU_TINYINT: + case KUDU_MEDIUMINT: + case KUDU_INT: + return BasicType.INT_TYPE; + case KUDU_BIGINT: + return BasicType.LONG_TYPE; + case KUDU_DECIMAL: + return new DecimalType(20, 0); + case KUDU_FLOAT: + return BasicType.FLOAT_TYPE; + case KUDU_DOUBLE: + return BasicType.DOUBLE_TYPE; + + case KUDU_VARCHAR: + return BasicType.STRING_TYPE; + case KUDU_UNIXTIME_MICROS: + return LocalTimeType.LOCAL_DATE_TIME_TYPE; + case KUDU_BINARY: + return PrimitiveByteArrayType.INSTANCE; + + //Doesn't support yet + + case KUDU_UNKNOWN: + default: + throw new UnsupportedOperationException( + String.format( + "Doesn't support KUDU type '%s' .", + KUDUType)); + } + } +} diff --git a/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/sink/KuduAggregatedCommitInfo.java b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/sink/KuduAggregatedCommitInfo.java new file mode 100644 index 00000000000..054bdf8f112 --- /dev/null +++ b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/sink/KuduAggregatedCommitInfo.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.kudu.sink; + +import lombok.AllArgsConstructor; +import lombok.Data; + +import java.io.Serializable; +import java.util.Map; + +@Data +@AllArgsConstructor +public class KuduAggregatedCommitInfo implements Serializable { + + +} diff --git a/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/sink/KuduCommitInfo.java b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/sink/KuduCommitInfo.java new file mode 100644 index 00000000000..271139fe88e --- /dev/null +++ b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/sink/KuduCommitInfo.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.kudu.sink; + +import lombok.AllArgsConstructor; +import lombok.Data; + +import java.io.Serializable; +import java.util.Map; + +@Data +@AllArgsConstructor +public class KuduCommitInfo implements Serializable { + + + +} diff --git a/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/sink/KuduSink.java b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/sink/KuduSink.java new file mode 100644 index 00000000000..6c40252f885 --- /dev/null +++ b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/sink/KuduSink.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.kudu.sink; + +import com.google.auto.service.AutoService; +import org.apache.seatunnel.api.common.PrepareFailException; +import org.apache.seatunnel.api.common.SeaTunnelContext; +import org.apache.seatunnel.api.serialization.DefaultSerializer; +import org.apache.seatunnel.api.serialization.Serializer; +import org.apache.seatunnel.api.sink.SeaTunnelSink; +import org.apache.seatunnel.api.sink.SinkAggregatedCommitter; +import org.apache.seatunnel.api.sink.SinkWriter; +import org.apache.seatunnel.api.table.type.SeaTunnelDataType; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.shade.com.typesafe.config.Config; + +import java.io.IOException; +import java.util.List; +import java.util.Optional; + +/** + * Kudu Sink implementation by using SeaTunnel sink API. + * This class contains the method to create {@link KuduSinkWriter} and {@link KuduSinkAggregatedCommitter}. + */ +@AutoService(SeaTunnelSink.class) +public class KuduSink implements SeaTunnelSink { + + private Config config; + private SeaTunnelRowType seaTunnelRowType; + + @Override + public String getPluginName() { + return "kuduSink"; + } + + @Override + public void setTypeInfo(SeaTunnelRowType seaTunnelRowType) { + this.seaTunnelRowType = seaTunnelRowType; + } + + @Override + public SeaTunnelDataType getConsumedType() { + return this.seaTunnelRowType; + } + + @Override + public void prepare(Config pluginConfig) throws PrepareFailException { + this.config = pluginConfig; + } + + @Override + public SinkWriter createWriter(SinkWriter.Context context) throws IOException { + return new KuduSinkWriter(seaTunnelRowType, config, context, System.currentTimeMillis()); + } + + @Override + public SinkWriter restoreWriter(SinkWriter.Context context, List states) throws IOException { + return new KuduSinkWriter(seaTunnelRowType, config, context, System.currentTimeMillis()); + } + + @Override + public Optional> getCommitInfoSerializer() { + return Optional.of(new DefaultSerializer<>()); + } + + @Override + public Optional> createAggregatedCommitter() throws IOException { + return Optional.of(new KuduSinkAggregatedCommitter()); + } + + @Override + public Optional> getAggregatedCommitInfoSerializer() { + return Optional.of(new DefaultSerializer<>()); + } + + +} diff --git a/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/sink/KuduSinkAggregatedCommitter.java b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/sink/KuduSinkAggregatedCommitter.java new file mode 100644 index 00000000000..f06d5e89919 --- /dev/null +++ b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/sink/KuduSinkAggregatedCommitter.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.kudu.sink; + +import org.apache.seatunnel.api.sink.SinkAggregatedCommitter; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class KuduSinkAggregatedCommitter implements SinkAggregatedCommitter { + private static final Logger LOGGER = LoggerFactory.getLogger(KuduSinkAggregatedCommitter.class); + + + @Override + public List commit(List aggregatedCommitInfo) throws IOException { + return null; + } + + @Override + public KuduAggregatedCommitInfo combine(List commitInfos) { + return null; + } + + @Override + public void abort(List aggregatedCommitInfo) throws Exception { + + } + + @Override + public void close() throws IOException { + } +} diff --git a/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/sink/KuduSinkState.java b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/sink/KuduSinkState.java new file mode 100644 index 00000000000..9d32b00354e --- /dev/null +++ b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/sink/KuduSinkState.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.kudu.sink; + +import lombok.AllArgsConstructor; +import lombok.Data; + +import java.io.Serializable; + +@Data +@AllArgsConstructor +public class KuduSinkState implements Serializable { + +} diff --git a/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/sink/KuduSinkWriter.java b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/sink/KuduSinkWriter.java new file mode 100644 index 00000000000..1fd9e560aec --- /dev/null +++ b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/sink/KuduSinkWriter.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.kudu.sink; + +import lombok.NonNull; +import org.apache.seatunnel.api.sink.SinkWriter; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; + +import org.apache.seatunnel.connectors.seatunnel.kudu.config.KuduSinkConfig; +import org.apache.seatunnel.connectors.seatunnel.kudu.kuduclient.KuduOutputFormat; +import org.apache.seatunnel.shade.com.typesafe.config.Config; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.Optional; + +public class KuduSinkWriter implements SinkWriter { + private static final Logger LOGGER = LoggerFactory.getLogger(KuduSinkWriter.class); + + private SeaTunnelRowType seaTunnelRowType; + private Config pluginConfig; + private Context context; + private long jobId; + + private KuduOutputFormat fileWriter; + + private KuduSinkConfig kuduSinkConfig; + + public KuduSinkWriter(@NonNull SeaTunnelRowType seaTunnelRowType, + @NonNull Config pluginConfig, + @NonNull Context context, + long jobId) { + this.seaTunnelRowType = seaTunnelRowType; + this.pluginConfig = pluginConfig; + this.context = context; + this.jobId = jobId; + + kuduSinkConfig = new KuduSinkConfig(this.pluginConfig); + fileWriter = new KuduOutputFormat(kuduSinkConfig); + + } + + @Override + public void write(SeaTunnelRow element) throws IOException { + fileWriter.write(element); + } + + @Override + public Optional prepareCommit() throws IOException { + return Optional.empty(); + } + + @Override + public void abortPrepare() { + + } + + + @Override + public void close() throws IOException { + fileWriter.closeOutputFormat(); + } + + +} diff --git a/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/source/KuduSource.java b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/source/KuduSource.java new file mode 100644 index 00000000000..0d6f014a444 --- /dev/null +++ b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/source/KuduSource.java @@ -0,0 +1,193 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.kudu.source; + +import com.google.auto.service.AutoService; +import org.apache.kudu.ColumnSchema; +import org.apache.kudu.client.*; +import org.apache.seatunnel.api.common.PrepareFailException; +import org.apache.seatunnel.api.common.SeaTunnelContext; +import org.apache.seatunnel.api.serialization.DefaultSerializer; +import org.apache.seatunnel.api.serialization.Serializer; +import org.apache.seatunnel.api.source.Boundedness; +import org.apache.seatunnel.api.source.SeaTunnelSource; +import org.apache.seatunnel.api.source.SourceReader; +import org.apache.seatunnel.api.source.SourceSplitEnumerator; +import org.apache.seatunnel.api.table.type.SeaTunnelDataType; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; + +import org.apache.seatunnel.common.constants.PluginType; +import org.apache.seatunnel.connectors.seatunnel.kudu.config.KuduSourceConfig; +import org.apache.seatunnel.connectors.seatunnel.kudu.kuduclient.KuduInputFormat; +import org.apache.seatunnel.connectors.seatunnel.kudu.kuduclient.KuduTypeMapper; +import org.apache.seatunnel.connectors.seatunnel.kudu.state.KuduSinkState; +import org.apache.seatunnel.shade.com.typesafe.config.Config; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +@AutoService(SeaTunnelSource.class) +public class KuduSource implements SeaTunnelSource { + private static final Logger LOGGER = LoggerFactory.getLogger(KuduSource.class); + + private Config pluginConfig; + private SeaTunnelContext seaTunnelContext; + private SeaTunnelRowType rowTypeInfo; + private KuduInputFormat kuduInputFormat; + private PartitionParameter partitionParameter; + + @Override + public Boundedness getBoundedness() { + return Boundedness.BOUNDED; + } + + @Override + public SeaTunnelRowType getProducedType() { + return this.rowTypeInfo; + } + + @Override + public SourceReader createReader(SourceReader.Context readerContext) { + return new KuduSourceReader(kuduInputFormat,readerContext); + } + + @Override + public Serializer getSplitSerializer() { + return SeaTunnelSource.super.getSplitSerializer(); + } + + @Override + public SourceSplitEnumerator createEnumerator( + SourceSplitEnumerator.Context enumeratorContext) { + return new KuduSourceSplitEnumerator(enumeratorContext,partitionParameter); + } + + @Override + public SourceSplitEnumerator restoreEnumerator( + SourceSplitEnumerator.Context enumeratorContext, KuduSinkState checkpointState) { + // todo: + return new KuduSourceSplitEnumerator(enumeratorContext,partitionParameter); + } + + @Override + public Serializer getEnumeratorStateSerializer() { + return new DefaultSerializer<>(); + } + + @Override + public String getPluginName() { + return "KuduSource"; + } + + @Override + public void prepare(Config config) { + + String kudumaster = config.getString(KuduSourceConfig.kuduMaster); + String tableName = config.getString(KuduSourceConfig.tableName); + String columnslist = config.getString(KuduSourceConfig.columnsList); + kuduInputFormat=new KuduInputFormat(kudumaster,tableName,columnslist); + try { + KuduClient.KuduClientBuilder kuduClientBuilder = new + KuduClient.KuduClientBuilder(kudumaster); + kuduClientBuilder.defaultOperationTimeoutMs(1800000); + + KuduClient kuduClient = kuduClientBuilder.build(); + partitionParameter = initPartitionParameter(kuduClient,tableName); + SeaTunnelRowType seaTunnelRowType =getSeaTunnelRowType(kuduClient.openTable(tableName).getSchema().getColumns()); + rowTypeInfo=seaTunnelRowType; + } catch (KuduException e) { + e.printStackTrace(); + } + } + + private PartitionParameter initPartitionParameter(KuduClient kuduClient,String tableName) { + String keyColumn = null; + int maxKey=0; + int minKey=0; + boolean flag=true; + try { + KuduScanner.KuduScannerBuilder kuduScannerBuilder = + kuduClient.newScannerBuilder(kuduClient.openTable(tableName)); + ArrayList columnsList = new ArrayList(); + keyColumn = kuduClient.openTable(tableName).getSchema().getPrimaryKeyColumns().get(0).getName(); + columnsList.add(""+keyColumn); + kuduScannerBuilder.setProjectedColumnNames(columnsList); + KuduScanner kuduScanner = kuduScannerBuilder.build(); + + + while (kuduScanner.hasMoreRows()) { + RowResultIterator rowResults = kuduScanner.nextRows(); + while (rowResults.hasNext()) { + RowResult row = rowResults.next(); + int id = row.getInt(""+keyColumn); + if (flag){ + maxKey=id; + minKey=id; + flag=false; + }else { + if (id>=maxKey){ + maxKey=id; + } + if (id<=minKey){ + minKey=id; + } + } + } + } + } catch (KuduException e) { + e.printStackTrace(); + } + + + return new PartitionParameter(keyColumn, Long.parseLong(minKey+""), Long.parseLong(maxKey+"")); + } + + + /* @Override + public SeaTunnelContext getSeaTunnelContext() { + return seaTunnelContext; + }*/ + + @Override + public void setSeaTunnelContext(SeaTunnelContext seaTunnelContext) { + this.seaTunnelContext = seaTunnelContext; + } + + + public SeaTunnelRowType getSeaTunnelRowType(List columnSchemaList) { + + ArrayList> seaTunnelDataTypes = new ArrayList<>(); + ArrayList fieldNames = new ArrayList<>(); + try { + + for (int i = 0; i < columnSchemaList.size(); i++) { + fieldNames.add(columnSchemaList.get(i).getName()); + seaTunnelDataTypes.add(KuduTypeMapper.mapping(columnSchemaList, i)); + } + + } catch (Exception e) { + LOGGER.warn("get row type info exception", e); + throw new PrepareFailException("jdbc", PluginType.SOURCE, e.toString()); + } + return new SeaTunnelRowType(fieldNames.toArray(new String[fieldNames.size()]), seaTunnelDataTypes.toArray(new SeaTunnelDataType[seaTunnelDataTypes.size()])); + } +} diff --git a/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/source/KuduSourceReader.java b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/source/KuduSourceReader.java new file mode 100644 index 00000000000..b235744f216 --- /dev/null +++ b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/source/KuduSourceReader.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.kudu.source; + +import org.apache.kudu.ColumnSchema; +import org.apache.kudu.client.KuduException; +import org.apache.kudu.client.KuduScanner; +import org.apache.kudu.client.RowResult; +import org.apache.kudu.client.RowResultIterator; +import org.apache.seatunnel.api.source.Boundedness; +import org.apache.seatunnel.api.source.Collector; +import org.apache.seatunnel.api.source.SourceReader; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.connectors.seatunnel.kudu.kuduclient.KuduInputFormat; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.SQLException; +import java.util.*; +import java.util.concurrent.ThreadLocalRandom; + +public class KuduSourceReader implements SourceReader { + + private static final Logger LOGGER = LoggerFactory.getLogger(KuduSourceReader.class); + + private final Context context; + + private final KuduInputFormat kuduInputFormat; + Deque splits = new LinkedList<>(); + + boolean noMoreSplit; + + public KuduSourceReader(KuduInputFormat kuduInputFormat, Context context) { + this.context = context; + this.kuduInputFormat = kuduInputFormat; + } + + @Override + public void open() { + kuduInputFormat.openInputFormat(); + } + + @Override + public void close() { + kuduInputFormat.closeInputFormat(); + } + + @Override + @SuppressWarnings("magicnumber") + public void pollNext(Collector output) throws InterruptedException, KuduException, SQLException { + KuduSourceSplit split = splits.poll(); + Object[] parameterValues = split.parameterValues; + + int lowerBound=Integer.parseInt(parameterValues[0].toString()); + + int upperBound=Integer.parseInt(parameterValues[1].toString()); + + + List columnSchemaList = kuduInputFormat.getColumnsSchemas(); + KuduScanner kuduScanner = kuduInputFormat.getKuduBuildSplit( lowerBound, upperBound); + // + while (kuduScanner.hasMoreRows()) { + RowResultIterator rowResults = kuduScanner.nextRows(); + while (rowResults.hasNext()) { + RowResult rowResult = rowResults.next(); + SeaTunnelRow seaTunnelRow = KuduInputFormat.getSeaTunnelRowData(rowResult, kuduInputFormat.getSeaTunnelRowType(columnSchemaList)); + output.collect(seaTunnelRow); + } + } + + + // Generate a random number of rows to emit. + + if (Boundedness.BOUNDED.equals(context.getBoundedness())) { + // signal to the source that we have reached the end of the data. + LOGGER.info("Closed the bounded fake source"); + context.signalNoMoreElement(); + } + + } + + @Override + public List snapshotState(long checkpointId) { + return null; + } + + @Override + public void addSplits(List splits) { + this.splits.addAll(splits); + } + + @Override + public void handleNoMoreSplits() { + noMoreSplit = true; + } + + @Override + public void notifyCheckpointComplete(long checkpointId) { + + } +} diff --git a/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/source/KuduSourceSplit.java b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/source/KuduSourceSplit.java new file mode 100644 index 00000000000..2c92fb13fbf --- /dev/null +++ b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/source/KuduSourceSplit.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.kudu.source; + +import lombok.AllArgsConstructor; +import lombok.Data; +import org.apache.seatunnel.api.source.SourceSplit; +@Data +@AllArgsConstructor +public class KuduSourceSplit implements SourceSplit { + + private static final long serialVersionUID = -1L; + + Object[] parameterValues; + public final Integer splitId; + + @Override + public String splitId() { + return splitId.toString(); + } + +} diff --git a/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/source/KuduSourceSplitEnumerator.java b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/source/KuduSourceSplitEnumerator.java new file mode 100644 index 00000000000..66c8f92dbce --- /dev/null +++ b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/source/KuduSourceSplitEnumerator.java @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.kudu.source; + +import org.apache.seatunnel.api.source.SourceSplitEnumerator; + +import org.apache.seatunnel.connectors.seatunnel.kudu.state.KuduSinkState; + +import java.io.IOException; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; + +public class KuduSourceSplitEnumerator implements SourceSplitEnumerator { + + private final Context enumeratorContext; + private PartitionParameter partitionParameter; + List allSplit = new ArrayList<>(); + private Long maxVal; + private Long minVal; + private Long batchSize; + private Integer batchNum; + + public KuduSourceSplitEnumerator(Context enumeratorContext,PartitionParameter partitionParameter) { + this.enumeratorContext = enumeratorContext; + this.partitionParameter=partitionParameter; + } + + @Override + public void open() { + + } + + @Override + public void run() { + + } + + @Override + public void close() throws IOException { + + } + + @Override + public void addSplitsBack(List splits, int subtaskId) { + + } + + @Override + public int currentUnassignedSplitSize() { + return 0; + } + + @Override + public void handleSplitRequest(int subtaskId) { + + } + + @Override + public void registerReader(int subtaskId) { + int parallelism = enumeratorContext.currentParallelism(); + if (allSplit.isEmpty()) { + if (null != partitionParameter) { + Serializable[][] parameterValues = getParameterValues(partitionParameter.minValue, partitionParameter.maxValue,parallelism); + for (int i = 0; i < parameterValues.length; i++) { + allSplit.add(new KuduSourceSplit(parameterValues[i], i)); + } + } else { + allSplit.add(new KuduSourceSplit(null, 0)); + } + } + // Filter the split that the current task needs to run + List splits = allSplit.stream().filter(p -> p.splitId % parallelism == subtaskId).collect(Collectors.toList()); + enumeratorContext.assignSplit(subtaskId, splits); + enumeratorContext.signalNoMoreSplits(subtaskId); + } + + private Serializable[][] getParameterValues(Long minVal, Long maxVal, int parallelism) { + this.maxVal=maxVal; + this.minVal=minVal; + long maxElemCount = (maxVal - minVal) + 1; + batchNum=parallelism; + getBatchSizeAndBatchNum(parallelism); + long bigBatchNum = maxElemCount - (batchSize - 1) * batchNum; + + Serializable[][] parameters = new Serializable[batchNum][2]; + long start = minVal; + for (int i = 0; i < batchNum; i++) { + long end = start + batchSize - 1 - (i >= bigBatchNum ? 1 : 0); + parameters[i] = new Long[] {start, end}; + start = end + 1; + } + return parameters; + + } + + private void getBatchSizeAndBatchNum(int parallelism) { + batchNum=parallelism; + long maxElemCount = (maxVal - minVal) + 1; + if (batchNum > maxElemCount) { + batchNum = (int) maxElemCount; + } + this.batchNum = batchNum; + this.batchSize = new Double(Math.ceil((double) maxElemCount / batchNum)).longValue(); + } + + @Override + public KuduSinkState snapshotState(long checkpointId) throws Exception { + return null; + } + + @Override + public void notifyCheckpointComplete(long checkpointId) throws Exception { + + } +} diff --git a/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/source/PartitionParameter.java b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/source/PartitionParameter.java new file mode 100644 index 00000000000..e791164667c --- /dev/null +++ b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/source/PartitionParameter.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.kudu.source; + +import lombok.AllArgsConstructor; +import lombok.Data; + +import java.io.Serializable; + +@Data +@AllArgsConstructor +public class PartitionParameter implements Serializable { + + String partitionColumnName; + Long minValue; + Long maxValue; +} diff --git a/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/state/KuduSinkState.java b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/state/KuduSinkState.java new file mode 100644 index 00000000000..f50416ee29a --- /dev/null +++ b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/state/KuduSinkState.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.kudu.state; + +import java.io.Serializable; + +public class KuduSinkState implements Serializable { +} diff --git a/seatunnel-connectors-v2/connector-kudu/src/main/resources/kudu_to_kudu_flink.conf b/seatunnel-connectors-v2/connector-kudu/src/main/resources/kudu_to_kudu_flink.conf new file mode 100644 index 00000000000..b04aeae6898 --- /dev/null +++ b/seatunnel-connectors-v2/connector-kudu/src/main/resources/kudu_to_kudu_flink.conf @@ -0,0 +1,60 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +###### +###### This config file is a demonstration of streaming processing in seatunnel config +###### + +env { + # You can set flink configuration here + execution.parallelism = 2 + #job.mode = "BATCH" + #execution.checkpoint.interval = 10000 + #execution.checkpoint.data-uri = "hdfs://localhost:9000/checkpoint" +} + +source { + # This is a example source plugin **only for test and demonstrate the feature source plugin** + KuduSource { + result_table_name = "studentlyh2" + kudu_master = "192.168.88.110:7051" + kudu_table = "studentlyh2" + columnsList = "id,name,age,sex" + } + + # If you would like to get more information about how to configure seatunnel and see full list of source plugins, + # please go to https://seatunnel.apache.org/docs/flink/configuration/source-plugins/Fake +} + +transform { + sql { + sql = "select id,name,age,sex from studentlyh2" + } + + # If you would like to get more information about how to configure seatunnel and see full list of transform plugins, + # please go to https://seatunnel.apache.org/docs/flink/configuration/transform-plugins/Sql +} + +sink { + kuduSink { + kudu_master = "192.168.88.110:7051" + kudu_table = "studentlyhresultflink" + save_mode="append" + } + + # If you would like to get more information about how to configure seatunnel and see full list of sink plugins, + # please go to https://seatunnel.apache.org/docs/flink/configuration/sink-plugins/Console +} \ No newline at end of file diff --git a/seatunnel-connectors-v2/connector-kudu/src/main/resources/kudu_to_kudu_spark.conf b/seatunnel-connectors-v2/connector-kudu/src/main/resources/kudu_to_kudu_spark.conf new file mode 100644 index 00000000000..cb4ecbaa8e5 --- /dev/null +++ b/seatunnel-connectors-v2/connector-kudu/src/main/resources/kudu_to_kudu_spark.conf @@ -0,0 +1,64 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +###### +###### This config file is a demonstration of streaming processing in seatunnel config +###### + +env { + # You can set flink configuration here + spark.app.name = "SeaTunnel" + spark.executor.instances = 2 + spark.executor.cores = 2 + spark.executor.memory = "1g" + spark.master = local + #job.mode = "BATCH" + #execution.checkpoint.interval = 10000 + #execution.checkpoint.data-uri = "hdfs://localhost:9000/checkpoint" +} + +source { + # This is a example source plugin **only for test and demonstrate the feature source plugin** + KuduSource { + result_table_name = "studentlyh2" + kudu_master = "192.168.88.110:7051" + kudu_table = "studentlyh2" + columnsList = "id,name,age,sex" + } + + # If you would like to get more information about how to configure seatunnel and see full list of source plugins, + # please go to https://seatunnel.apache.org/docs/flink/configuration/source-plugins/Fake +} + +transform { + sql { + sql = "select id,name,age,sex from studentlyh2" + } + + # If you would like to get more information about how to configure seatunnel and see full list of transform plugins, + # please go to https://seatunnel.apache.org/docs/flink/configuration/transform-plugins/Sql +} + +sink { + kuduSink { + kudu_master = "192.168.88.110:7051" + kudu_table = "studentlyhresult" + save_mode="append" + } + + # If you would like to get more information about how to configure seatunnel and see full list of sink plugins, + # please go to https://seatunnel.apache.org/docs/flink/configuration/sink-plugins/Console +} \ No newline at end of file diff --git a/seatunnel-connectors-v2/connector-pulsar/src/main/java/org/apache/seatunnel/connectors/seatunnel/pulsar/config/SourceProperties.java b/seatunnel-connectors-v2/connector-pulsar/src/main/java/org/apache/seatunnel/connectors/seatunnel/pulsar/config/SourceProperties.java index e8692322059..92453efee4c 100644 --- a/seatunnel-connectors-v2/connector-pulsar/src/main/java/org/apache/seatunnel/connectors/seatunnel/pulsar/config/SourceProperties.java +++ b/seatunnel-connectors-v2/connector-pulsar/src/main/java/org/apache/seatunnel/connectors/seatunnel/pulsar/config/SourceProperties.java @@ -19,61 +19,48 @@ public class SourceProperties { - // Pulsar client API config prefix. - public static final String CLIENT_CONFIG_PREFIX = "pulsar.client."; - // Pulsar admin API config prefix. - public static final String ADMIN_CONFIG_PREFIX = "pulsar.admin."; - // -------------------------------------------------------------------------------------------- // The configuration for ClientConfigurationData part. - // All the configuration listed below should have the pulsar.client prefix. // -------------------------------------------------------------------------------------------- - public static final String PULSAR_SERVICE_URL = CLIENT_CONFIG_PREFIX + "serviceUrl"; - public static final String PULSAR_AUTH_PLUGIN_CLASS_NAME = CLIENT_CONFIG_PREFIX + "authPluginClassName"; - public static final String PULSAR_AUTH_PARAMS = CLIENT_CONFIG_PREFIX + "authParams"; + public static final String CLIENT_SERVICE_URL = "client.service-url"; + public static final String AUTH_PLUGIN_CLASS = "auth.plugin-class"; + public static final String AUTH_PARAMS = "auth.params"; // -------------------------------------------------------------------------------------------- // The configuration for ClientConfigurationData part. // All the configuration listed below should have the pulsar.client prefix. // -------------------------------------------------------------------------------------------- - public static final String PULSAR_ADMIN_URL = ADMIN_CONFIG_PREFIX + "adminUrl"; - - // Pulsar source connector config prefix. - public static final String SOURCE_CONFIG_PREFIX = "pulsar.source."; - // Pulsar consumer API config prefix. - public static final String CONSUMER_CONFIG_PREFIX = "pulsar.consumer."; + public static final String ADMIN_SERVICE_URL = "admin.service-url"; // -------------------------------------------------------------------------------------------- // The configuration for ConsumerConfigurationData part. - // All the configuration listed below should have the pulsar.consumer prefix. // -------------------------------------------------------------------------------------------- - public static final String PULSAR_SUBSCRIPTION_NAME = CONSUMER_CONFIG_PREFIX + "subscriptionName"; - public static final String PULSAR_SUBSCRIPTION_TYPE = CONSUMER_CONFIG_PREFIX + "subscriptionType"; - public static final String PULSAR_SUBSCRIPTION_MODE = CONSUMER_CONFIG_PREFIX + "subscriptionMode"; + public static final String SUBSCRIPTION_NAME = "subscription.name"; + public static final String SUBSCRIPTION_TYPE = "subscription.type"; + public static final String SUBSCRIPTION_MODE = "subscription.mode"; // -------------------------------------------------------------------------------------------- // The configuration for pulsar source part. - // All the configuration listed below should have the pulsar.source prefix. // -------------------------------------------------------------------------------------------- - public static final String PULSAR_PARTITION_DISCOVERY_INTERVAL_MS = SOURCE_CONFIG_PREFIX + "partitionDiscoveryIntervalMs"; - public static final String PULSAR_TOPIC = SOURCE_CONFIG_PREFIX + "topic"; - public static final String PULSAR_TOPIC_PATTERN = SOURCE_CONFIG_PREFIX + "topic.pattern"; - public static final String PULSAR_POLL_TIMEOUT = SOURCE_CONFIG_PREFIX + "poll.timeout"; - public static final String PULSAR_POLL_INTERVAL = SOURCE_CONFIG_PREFIX + "poll.interval"; - public static final String PULSAR_BATCH_SIZE = SOURCE_CONFIG_PREFIX + "batch.size"; - public static final String PULSAR_CURSOR_START_MODE = SOURCE_CONFIG_PREFIX + "scan.cursor.start.mode"; - public static final String PULSAR_CURSOR_START_RESET_MODE = SOURCE_CONFIG_PREFIX + "scan.cursor.start.reset.mode"; - public static final String PULSAR_CURSOR_START_TIMESTAMP = SOURCE_CONFIG_PREFIX + "scan.cursor.start.timestamp"; - public static final String PULSAR_CURSOR_START_ID = SOURCE_CONFIG_PREFIX + "scan.cursor.start.id"; - public static final String PULSAR_CURSOR_STOP_MODE = SOURCE_CONFIG_PREFIX + "scan.cursor.stop.mode"; - public static final String PULSAR_CURSOR_STOP_TIMESTAMP = SOURCE_CONFIG_PREFIX + "scan.cursor.stop.timestamp"; + public static final String TOPIC_DISCOVERY_INTERVAL = "topic-discovery.interval"; + public static final String TOPIC = "topic"; + public static final String TOPIC_PATTERN = "topic-pattern"; + public static final String POLL_TIMEOUT = "poll.timeout"; + public static final String POLL_INTERVAL = "poll.interval"; + public static final String POLL_BATCH_SIZE = "poll.batch.size"; + public static final String CURSOR_STARTUP_MODE = "cursor.startup.mode"; + public static final String CURSOR_RESET_MODE = "cursor.reset.mode"; + public static final String CURSOR_STARTUP_TIMESTAMP = "cursor.startup.timestamp"; + public static final String CURSOR_STARTUP_ID = "cursor.startup.id"; + public static final String CURSOR_STOP_MODE = "cursor.stop.mode"; + public static final String CURSOR_STOP_TIMESTAMP = "cursor.stop.timestamp"; /** - * Startup mode for the Kafka consumer, see {@link #PULSAR_CURSOR_START_MODE}. + * Startup mode for the pulsar consumer, see {@link #CURSOR_STARTUP_MODE}. */ public enum StartMode { /** @@ -99,7 +86,7 @@ public enum StartMode { } /** - * Startup mode for the Kafka consumer, see {@link #PULSAR_CURSOR_START_MODE}. + * Startup mode for the pulsar consumer, see {@link #CURSOR_STARTUP_MODE}. */ public enum StopMode { /** diff --git a/seatunnel-connectors-v2/connector-pulsar/src/main/java/org/apache/seatunnel/connectors/seatunnel/pulsar/source/PulsarSource.java b/seatunnel-connectors-v2/connector-pulsar/src/main/java/org/apache/seatunnel/connectors/seatunnel/pulsar/source/PulsarSource.java index 20028efb071..88c36d81631 100644 --- a/seatunnel-connectors-v2/connector-pulsar/src/main/java/org/apache/seatunnel/connectors/seatunnel/pulsar/source/PulsarSource.java +++ b/seatunnel-connectors-v2/connector-pulsar/src/main/java/org/apache/seatunnel/connectors/seatunnel/pulsar/source/PulsarSource.java @@ -19,30 +19,28 @@ import static org.apache.seatunnel.common.PropertiesUtil.getEnum; import static org.apache.seatunnel.common.PropertiesUtil.setOption; -import static org.apache.seatunnel.connectors.seatunnel.pulsar.config.SourceProperties.PULSAR_ADMIN_URL; -import static org.apache.seatunnel.connectors.seatunnel.pulsar.config.SourceProperties.PULSAR_AUTH_PARAMS; -import static org.apache.seatunnel.connectors.seatunnel.pulsar.config.SourceProperties.PULSAR_AUTH_PLUGIN_CLASS_NAME; -import static org.apache.seatunnel.connectors.seatunnel.pulsar.config.SourceProperties.PULSAR_BATCH_SIZE; -import static org.apache.seatunnel.connectors.seatunnel.pulsar.config.SourceProperties.PULSAR_CURSOR_START_MODE; -import static org.apache.seatunnel.connectors.seatunnel.pulsar.config.SourceProperties.PULSAR_CURSOR_START_RESET_MODE; -import static org.apache.seatunnel.connectors.seatunnel.pulsar.config.SourceProperties.PULSAR_CURSOR_START_TIMESTAMP; -import static org.apache.seatunnel.connectors.seatunnel.pulsar.config.SourceProperties.PULSAR_CURSOR_STOP_MODE; -import static org.apache.seatunnel.connectors.seatunnel.pulsar.config.SourceProperties.PULSAR_CURSOR_STOP_TIMESTAMP; -import static org.apache.seatunnel.connectors.seatunnel.pulsar.config.SourceProperties.PULSAR_PARTITION_DISCOVERY_INTERVAL_MS; -import static org.apache.seatunnel.connectors.seatunnel.pulsar.config.SourceProperties.PULSAR_POLL_INTERVAL; -import static org.apache.seatunnel.connectors.seatunnel.pulsar.config.SourceProperties.PULSAR_POLL_TIMEOUT; -import static org.apache.seatunnel.connectors.seatunnel.pulsar.config.SourceProperties.PULSAR_SERVICE_URL; -import static org.apache.seatunnel.connectors.seatunnel.pulsar.config.SourceProperties.PULSAR_SUBSCRIPTION_NAME; -import static org.apache.seatunnel.connectors.seatunnel.pulsar.config.SourceProperties.PULSAR_TOPIC; -import static org.apache.seatunnel.connectors.seatunnel.pulsar.config.SourceProperties.PULSAR_TOPIC_PATTERN; +import static org.apache.seatunnel.connectors.seatunnel.pulsar.config.SourceProperties.ADMIN_SERVICE_URL; +import static org.apache.seatunnel.connectors.seatunnel.pulsar.config.SourceProperties.AUTH_PARAMS; +import static org.apache.seatunnel.connectors.seatunnel.pulsar.config.SourceProperties.AUTH_PLUGIN_CLASS; +import static org.apache.seatunnel.connectors.seatunnel.pulsar.config.SourceProperties.CLIENT_SERVICE_URL; +import static org.apache.seatunnel.connectors.seatunnel.pulsar.config.SourceProperties.CURSOR_RESET_MODE; +import static org.apache.seatunnel.connectors.seatunnel.pulsar.config.SourceProperties.CURSOR_STARTUP_MODE; +import static org.apache.seatunnel.connectors.seatunnel.pulsar.config.SourceProperties.CURSOR_STARTUP_TIMESTAMP; +import static org.apache.seatunnel.connectors.seatunnel.pulsar.config.SourceProperties.CURSOR_STOP_MODE; +import static org.apache.seatunnel.connectors.seatunnel.pulsar.config.SourceProperties.CURSOR_STOP_TIMESTAMP; +import static org.apache.seatunnel.connectors.seatunnel.pulsar.config.SourceProperties.POLL_BATCH_SIZE; +import static org.apache.seatunnel.connectors.seatunnel.pulsar.config.SourceProperties.POLL_INTERVAL; +import static org.apache.seatunnel.connectors.seatunnel.pulsar.config.SourceProperties.POLL_TIMEOUT; +import static org.apache.seatunnel.connectors.seatunnel.pulsar.config.SourceProperties.SUBSCRIPTION_NAME; import static org.apache.seatunnel.connectors.seatunnel.pulsar.config.SourceProperties.StartMode; import static org.apache.seatunnel.connectors.seatunnel.pulsar.config.SourceProperties.StartMode.LATEST; import static org.apache.seatunnel.connectors.seatunnel.pulsar.config.SourceProperties.StopMode.NEVER; +import static org.apache.seatunnel.connectors.seatunnel.pulsar.config.SourceProperties.TOPIC; +import static org.apache.seatunnel.connectors.seatunnel.pulsar.config.SourceProperties.TOPIC_DISCOVERY_INTERVAL; +import static org.apache.seatunnel.connectors.seatunnel.pulsar.config.SourceProperties.TOPIC_PATTERN; import org.apache.seatunnel.api.common.PrepareFailException; -import org.apache.seatunnel.api.serialization.DefaultSerializer; import org.apache.seatunnel.api.serialization.DeserializationSchema; -import org.apache.seatunnel.api.serialization.Serializer; import org.apache.seatunnel.api.source.Boundedness; import org.apache.seatunnel.api.source.SeaTunnelSource; import org.apache.seatunnel.api.source.SourceReader; @@ -100,48 +98,48 @@ public String getPluginName() { @SuppressWarnings("checkstyle:MagicNumber") @Override public void prepare(Config config) throws PrepareFailException { - CheckResult result = CheckConfigUtil.checkAllExists(config, PULSAR_SUBSCRIPTION_NAME, PULSAR_SERVICE_URL, PULSAR_ADMIN_URL); + CheckResult result = CheckConfigUtil.checkAllExists(config, SUBSCRIPTION_NAME, CLIENT_SERVICE_URL, ADMIN_SERVICE_URL); if (!result.isSuccess()) { throw new PrepareFailException(getPluginName(), PluginType.SOURCE, result.getMsg()); } // admin config PulsarAdminConfig.Builder adminConfigBuilder = PulsarAdminConfig.builder() - .adminUrl(config.getString(PULSAR_ADMIN_URL)); - setOption(config, PULSAR_AUTH_PLUGIN_CLASS_NAME, config::getString, adminConfigBuilder::authPluginClassName); - setOption(config, PULSAR_AUTH_PARAMS, config::getString, adminConfigBuilder::authParams); + .adminUrl(config.getString(ADMIN_SERVICE_URL)); + setOption(config, AUTH_PLUGIN_CLASS, config::getString, adminConfigBuilder::authPluginClassName); + setOption(config, AUTH_PARAMS, config::getString, adminConfigBuilder::authParams); this.adminConfig = adminConfigBuilder.build(); // client config PulsarClientConfig.Builder clientConfigBuilder = PulsarClientConfig.builder() - .serviceUrl(config.getString(PULSAR_SERVICE_URL)); - setOption(config, PULSAR_AUTH_PLUGIN_CLASS_NAME, config::getString, clientConfigBuilder::authPluginClassName); - setOption(config, PULSAR_AUTH_PARAMS, config::getString, clientConfigBuilder::authParams); + .serviceUrl(config.getString(CLIENT_SERVICE_URL)); + setOption(config, AUTH_PLUGIN_CLASS, config::getString, clientConfigBuilder::authPluginClassName); + setOption(config, AUTH_PARAMS, config::getString, clientConfigBuilder::authParams); this.clientConfig = clientConfigBuilder.build(); // consumer config PulsarConsumerConfig.Builder consumerConfigBuilder = PulsarConsumerConfig.builder() - .subscriptionName(config.getString(PULSAR_SERVICE_URL)); + .subscriptionName(config.getString(SUBSCRIPTION_NAME)); this.consumerConfig = consumerConfigBuilder.build(); // source properties setOption(config, - PULSAR_PARTITION_DISCOVERY_INTERVAL_MS, + TOPIC_DISCOVERY_INTERVAL, 30000L, config::getLong, v -> this.partitionDiscoveryIntervalMs = v); setOption(config, - PULSAR_POLL_TIMEOUT, + POLL_TIMEOUT, 100, config::getInt, v -> this.pollTimeout = v); setOption(config, - PULSAR_POLL_INTERVAL, + POLL_INTERVAL, 50L, config::getLong, v -> this.pollInterval = v); setOption(config, - PULSAR_BATCH_SIZE, + POLL_BATCH_SIZE, 500, config::getInt, v -> this.batchSize = v); @@ -159,7 +157,7 @@ public void prepare(Config config) throws PrepareFailException { } private void setStartCursor(Config config) { - StartMode startMode = getEnum(config, PULSAR_CURSOR_START_MODE, StartMode.class, LATEST); + StartMode startMode = getEnum(config, CURSOR_STARTUP_MODE, StartMode.class, LATEST); switch (startMode) { case EARLIEST: this.startCursor = StartCursor.earliest(); @@ -169,16 +167,16 @@ private void setStartCursor(Config config) { break; case SUBSCRIPTION: SubscriptionStartCursor.CursorResetStrategy resetStrategy = getEnum(config, - PULSAR_CURSOR_START_RESET_MODE, + CURSOR_RESET_MODE, SubscriptionStartCursor.CursorResetStrategy.class, SubscriptionStartCursor.CursorResetStrategy.LATEST); this.startCursor = StartCursor.subscription(resetStrategy); break; case TIMESTAMP: - if (StringUtils.isBlank(config.getString(PULSAR_CURSOR_START_TIMESTAMP))) { - throw new IllegalArgumentException(String.format("The '%s' property is required when the '%s' is 'timestamp'.", PULSAR_CURSOR_START_TIMESTAMP, PULSAR_CURSOR_START_MODE)); + if (StringUtils.isBlank(config.getString(CURSOR_STARTUP_TIMESTAMP))) { + throw new IllegalArgumentException(String.format("The '%s' property is required when the '%s' is 'timestamp'.", CURSOR_STARTUP_TIMESTAMP, CURSOR_STARTUP_MODE)); } - setOption(config, PULSAR_CURSOR_START_TIMESTAMP, config::getLong, timestamp -> this.startCursor = StartCursor.timestamp(timestamp)); + setOption(config, CURSOR_STARTUP_TIMESTAMP, config::getLong, timestamp -> this.startCursor = StartCursor.timestamp(timestamp)); break; default: throw new IllegalArgumentException(String.format("The %s mode is not supported.", startMode)); @@ -186,7 +184,7 @@ private void setStartCursor(Config config) { } private void setStopCursor(Config config) { - SourceProperties.StopMode stopMode = getEnum(config, PULSAR_CURSOR_STOP_MODE, SourceProperties.StopMode.class, NEVER); + SourceProperties.StopMode stopMode = getEnum(config, CURSOR_STOP_MODE, SourceProperties.StopMode.class, NEVER); switch (stopMode) { case LATEST: this.stopCursor = StopCursor.latest(); @@ -195,10 +193,10 @@ private void setStopCursor(Config config) { this.stopCursor = StopCursor.never(); break; case TIMESTAMP: - if (StringUtils.isBlank(config.getString(PULSAR_CURSOR_STOP_TIMESTAMP))) { - throw new IllegalArgumentException(String.format("The '%s' property is required when the '%s' is 'timestamp'.", PULSAR_CURSOR_STOP_TIMESTAMP, PULSAR_CURSOR_STOP_MODE)); + if (StringUtils.isBlank(config.getString(CURSOR_STOP_TIMESTAMP))) { + throw new IllegalArgumentException(String.format("The '%s' property is required when the '%s' is 'timestamp'.", CURSOR_STOP_TIMESTAMP, CURSOR_STOP_MODE)); } - setOption(config, PULSAR_CURSOR_START_TIMESTAMP, config::getLong, timestamp -> this.stopCursor = StopCursor.timestamp(timestamp)); + setOption(config, CURSOR_STARTUP_TIMESTAMP, config::getLong, timestamp -> this.stopCursor = StopCursor.timestamp(timestamp)); break; default: throw new IllegalArgumentException(String.format("The %s mode is not supported.", stopMode)); @@ -206,19 +204,19 @@ private void setStopCursor(Config config) { } private void setPartitionDiscoverer(Config config) { - String topic = config.getString(PULSAR_TOPIC); + String topic = config.getString(TOPIC); if (StringUtils.isNotBlank(topic)) { this.partitionDiscoverer = new TopicListDiscoverer(Arrays.asList(StringUtils.split(topic, ","))); } - String topicPattern = config.getString(PULSAR_TOPIC_PATTERN); + String topicPattern = config.getString(TOPIC_PATTERN); if (StringUtils.isNotBlank(topicPattern)) { if (this.partitionDiscoverer != null) { - throw new IllegalArgumentException(String.format("The properties '%s' and '%s' is exclusive.", PULSAR_TOPIC, PULSAR_TOPIC_PATTERN)); + throw new IllegalArgumentException(String.format("The properties '%s' and '%s' is exclusive.", TOPIC, TOPIC_PATTERN)); } this.partitionDiscoverer = new TopicPatternDiscoverer(Pattern.compile(topicPattern)); } if (this.partitionDiscoverer == null) { - throw new IllegalArgumentException(String.format("The properties '%s' or '%s' is required.", PULSAR_TOPIC, PULSAR_TOPIC_PATTERN)); + throw new IllegalArgumentException(String.format("The properties '%s' or '%s' is required.", TOPIC, TOPIC_PATTERN)); } } @@ -274,8 +272,4 @@ public SourceSplitEnumerator r checkpointState.assignedPartitions()); } - @Override - public Serializer getEnumeratorStateSerializer() { - return new DefaultSerializer<>(); - } } diff --git a/seatunnel-connectors-v2/pom.xml b/seatunnel-connectors-v2/pom.xml index be62464f81b..56483f8acc3 100644 --- a/seatunnel-connectors-v2/pom.xml +++ b/seatunnel-connectors-v2/pom.xml @@ -28,7 +28,6 @@ 4.0.0 pom - seatunnel-connectors-v2 @@ -36,13 +35,16 @@ connector-clickhouse connector-console connector-fake - connector-hive connector-http connector-jdbc connector-kafka connector-pulsar connector-socket + connector-hive + connector-file + connector-hudi connector-assert + connector-kudu @@ -55,5 +57,16 @@ junit-jupiter-params + + + + org.apache.maven.plugins + maven-dependency-plugin + + true + + + + - \ No newline at end of file + diff --git a/seatunnel-connectors/seatunnel-connectors-flink/seatunnel-connector-flink-assert/src/test/java/org/apache/seatunnel/flink/assertion/AssertExecutorTest.java b/seatunnel-connectors/seatunnel-connectors-flink/seatunnel-connector-flink-assert/src/test/java/org/apache/seatunnel/flink/assertion/AssertExecutorTest.java index 34c533cfc8b..1ddd866b90c 100644 --- a/seatunnel-connectors/seatunnel-connectors-flink/seatunnel-connector-flink-assert/src/test/java/org/apache/seatunnel/flink/assertion/AssertExecutorTest.java +++ b/seatunnel-connectors/seatunnel-connectors-flink/seatunnel-connector-flink-assert/src/test/java/org/apache/seatunnel/flink/assertion/AssertExecutorTest.java @@ -26,7 +26,6 @@ import java.util.List; -@SuppressWarnings("magicnumber") public class AssertExecutorTest extends TestCase { Row row = Row.withNames(); AssertExecutor assertExecutor = new AssertExecutor(); diff --git a/seatunnel-connectors/seatunnel-connectors-flink/seatunnel-connector-flink-assert/src/test/java/org/apache/seatunnel/flink/assertion/rule/AssertRuleParserTest.java b/seatunnel-connectors/seatunnel-connectors-flink/seatunnel-connector-flink-assert/src/test/java/org/apache/seatunnel/flink/assertion/rule/AssertRuleParserTest.java index fad5ed322c6..1df875e9d30 100644 --- a/seatunnel-connectors/seatunnel-connectors-flink/seatunnel-connector-flink-assert/src/test/java/org/apache/seatunnel/flink/assertion/rule/AssertRuleParserTest.java +++ b/seatunnel-connectors/seatunnel-connectors-flink/seatunnel-connector-flink-assert/src/test/java/org/apache/seatunnel/flink/assertion/rule/AssertRuleParserTest.java @@ -25,7 +25,6 @@ import java.util.List; -@SuppressWarnings("magicnumber") public class AssertRuleParserTest extends TestCase { AssertRuleParser parser = new AssertRuleParser(); diff --git a/seatunnel-connectors/seatunnel-connectors-flink/seatunnel-connector-flink-clickhouse/src/main/java/org/apache/seatunnel/flink/clickhouse/pojo/Shard.java b/seatunnel-connectors/seatunnel-connectors-flink/seatunnel-connector-flink-clickhouse/src/main/java/org/apache/seatunnel/flink/clickhouse/pojo/Shard.java index 20f34be1e82..e2d1394df22 100644 --- a/seatunnel-connectors/seatunnel-connectors-flink/seatunnel-connector-flink-clickhouse/src/main/java/org/apache/seatunnel/flink/clickhouse/pojo/Shard.java +++ b/seatunnel-connectors/seatunnel-connectors-flink/seatunnel-connector-flink-clickhouse/src/main/java/org/apache/seatunnel/flink/clickhouse/pojo/Shard.java @@ -114,7 +114,6 @@ public boolean equals(Object o) { } @Override - @SuppressWarnings("magicnumber") public int hashCode() { if (hashCode != -1) { return hashCode; diff --git a/seatunnel-connectors/seatunnel-connectors-flink/seatunnel-connector-flink-clickhouse/src/main/java/org/apache/seatunnel/flink/clickhouse/pojo/ShardMetadata.java b/seatunnel-connectors/seatunnel-connectors-flink/seatunnel-connector-flink-clickhouse/src/main/java/org/apache/seatunnel/flink/clickhouse/pojo/ShardMetadata.java index 96e27336462..033c99d9048 100644 --- a/seatunnel-connectors/seatunnel-connectors-flink/seatunnel-connector-flink-clickhouse/src/main/java/org/apache/seatunnel/flink/clickhouse/pojo/ShardMetadata.java +++ b/seatunnel-connectors/seatunnel-connectors-flink/seatunnel-connector-flink-clickhouse/src/main/java/org/apache/seatunnel/flink/clickhouse/pojo/ShardMetadata.java @@ -122,7 +122,6 @@ public boolean equals(Object o) { } @Override - @SuppressWarnings("magicnumber") public int hashCode() { int result = shardKey.hashCode(); result = 31 * result + shardKeyType.hashCode(); diff --git a/seatunnel-connectors/seatunnel-connectors-flink/seatunnel-connector-flink-doris/src/main/java/org/apache/seatunnel/flink/doris/sink/RespContent.java b/seatunnel-connectors/seatunnel-connectors-flink/seatunnel-connector-flink-doris/src/main/java/org/apache/seatunnel/flink/doris/sink/RespContent.java index 2f18861172b..67bf3781e2a 100644 --- a/seatunnel-connectors/seatunnel-connectors-flink/seatunnel-connector-flink-doris/src/main/java/org/apache/seatunnel/flink/doris/sink/RespContent.java +++ b/seatunnel-connectors/seatunnel-connectors-flink/seatunnel-connector-flink-doris/src/main/java/org/apache/seatunnel/flink/doris/sink/RespContent.java @@ -38,6 +38,9 @@ public class RespContent implements Serializable { */ @JsonProperty("Label") private String label; + + @JsonProperty("TwoPhaseCommit") + private String twoPhaseCommit; /** * Import complete status. * "Success": Indicates that the import was successful. @@ -140,6 +143,14 @@ public void setLabel(String label) { this.label = label; } + public String getTwoPhaseCommit() { + return twoPhaseCommit; + } + + public void setTwoPhaseCommit(String twoPhaseCommit) { + this.twoPhaseCommit = twoPhaseCommit; + } + public String getStatus() { return status; } @@ -265,6 +276,7 @@ public String toString() { return "RespContent{" + "txnId=" + txnId + ", label='" + label + '\'' + + ", twoPhaseCommit='" + twoPhaseCommit + '\'' + ", status='" + status + '\'' + ", existingJobStatus='" + existingJobStatus + '\'' + ", message='" + message + '\'' + diff --git a/seatunnel-connectors/seatunnel-connectors-flink/seatunnel-connector-flink-fake/src/main/java/org/apache/seatunnel/flink/fake/source/MockSchema.java b/seatunnel-connectors/seatunnel-connectors-flink/seatunnel-connector-flink-fake/src/main/java/org/apache/seatunnel/flink/fake/source/MockSchema.java index b8f0428d1a8..37f1b33726f 100644 --- a/seatunnel-connectors/seatunnel-connectors-flink/seatunnel-connector-flink-fake/src/main/java/org/apache/seatunnel/flink/fake/source/MockSchema.java +++ b/seatunnel-connectors/seatunnel-connectors-flink/seatunnel-connector-flink-fake/src/main/java/org/apache/seatunnel/flink/fake/source/MockSchema.java @@ -53,7 +53,6 @@ import java.io.ByteArrayInputStream; import java.io.Serializable; import java.math.BigDecimal; -import java.math.BigInteger; import java.nio.charset.StandardCharsets; import java.sql.Timestamp; import java.util.ArrayList; @@ -132,6 +131,7 @@ public TypeInformation typeInformation() { dataType = BasicTypeInfo.SHORT_TYPE_INFO; break; case "long": + case "bigint": dataType = BasicTypeInfo.LONG_TYPE_INFO; break; case "float": @@ -150,10 +150,6 @@ public TypeInformation typeInformation() { case "bigdecimal": dataType = BasicTypeInfo.BIG_DEC_TYPE_INFO; break; - case "bigint": - case "biginteger": - dataType = BasicTypeInfo.BIG_INT_TYPE_INFO; - break; case "int[]": dataType = PrimitiveArrayTypeInfo.INT_PRIMITIVE_ARRAY_TYPE_INFO; break; @@ -193,11 +189,11 @@ public TypeInformation typeInformation() { return dataType; } - public Object mockData(){ + public Object mockData() { Object mockData; MockConfig mockConfig = new MockConfig(); resolve(mockConfig); - switch (this.type.trim().toLowerCase()){ + switch (this.type.trim().toLowerCase()) { case "int": case "integer": mockData = JMockData.mock(int.class, mockConfig); @@ -216,6 +212,7 @@ public Object mockData(){ mockData = JMockData.mock(short.class, mockConfig); break; case "long": + case "bigint": mockData = JMockData.mock(long.class, mockConfig); break; case "float": @@ -234,10 +231,6 @@ public Object mockData(){ case "bigdecimal": mockData = JMockData.mock(BigDecimal.class, mockConfig); break; - case "bigint": - case "biginteger": - mockData = JMockData.mock(BigInteger.class, mockConfig); - break; case "int[]": mockData = JMockData.mock(int[].class, mockConfig); break; @@ -426,7 +419,7 @@ public static RowTypeInfo mockRowTypeInfo(List mockDataSchema) { return new RowTypeInfo(types, fieldNames); } - public static Row mockRowData(List mockDataSchema){ + public static Row mockRowData(List mockDataSchema) { Object[] fieldByPosition = new Object[mockDataSchema.size()]; for (int index = 0; index < mockDataSchema.size(); index++) { MockSchema schema = mockDataSchema.get(index); @@ -470,7 +463,7 @@ public static Row mockRowData(List mockDataSchema){ DEFAULT_MOCK_SCHEMAS.add(ageSchema); } - public static List resolveConfig(Config config){ + public static List resolveConfig(Config config) { if (config.hasPath(MOCK_DATA_SCHEMA)) { return config.getConfigList(MOCK_DATA_SCHEMA) .stream() diff --git a/seatunnel-connectors/seatunnel-connectors-flink/seatunnel-connector-flink-kafka/src/main/java/org/apache/seatunnel/flink/kafka/source/KafkaTableStream.java b/seatunnel-connectors/seatunnel-connectors-flink/seatunnel-connector-flink-kafka/src/main/java/org/apache/seatunnel/flink/kafka/source/KafkaTableStream.java index 5038241315d..dbb60c7cf06 100644 --- a/seatunnel-connectors/seatunnel-connectors-flink/seatunnel-connector-flink-kafka/src/main/java/org/apache/seatunnel/flink/kafka/source/KafkaTableStream.java +++ b/seatunnel-connectors/seatunnel-connectors-flink/seatunnel-connector-flink-kafka/src/main/java/org/apache/seatunnel/flink/kafka/source/KafkaTableStream.java @@ -111,7 +111,7 @@ public void prepare(FlinkEnvironment env) { } String schemaContent = config.getString(SCHEMA); format = FormatType.from(config.getString(SOURCE_FORMAT).trim().toLowerCase()); - schemaInfo = JsonUtils.parseObject(schemaContent); + schemaInfo = JsonUtils.parseArray(schemaContent); } @Override diff --git a/seatunnel-connectors/seatunnel-connectors-spark/seatunnel-connector-spark-hudi/src/main/scala/org/apache/seatunnel/spark/hudi/source/Hudi.scala b/seatunnel-connectors/seatunnel-connectors-spark/seatunnel-connector-spark-hudi/src/main/scala/org/apache/seatunnel/spark/hudi/source/Hudi.scala index 82e55f09a7f..a1d42862cf2 100644 --- a/seatunnel-connectors/seatunnel-connectors-spark/seatunnel-connector-spark-hudi/src/main/scala/org/apache/seatunnel/spark/hudi/source/Hudi.scala +++ b/seatunnel-connectors/seatunnel-connectors-spark/seatunnel-connector-spark-hudi/src/main/scala/org/apache/seatunnel/spark/hudi/source/Hudi.scala @@ -38,7 +38,7 @@ class Hudi extends SparkBatchSource { reader.option(e.getKey, String.valueOf(e.getValue.unwrapped())) } - reader.load(config.getString(HOODIE_DATASTORE_READ_PATHS)) + reader.load() } override def getPluginName: String = "Hudi" diff --git a/seatunnel-connectors/seatunnel-connectors-spark/seatunnel-connector-spark-jdbc/src/main/scala/org/apache/seatunnel/spark/jdbc/source/Jdbc.scala b/seatunnel-connectors/seatunnel-connectors-spark/seatunnel-connector-spark-jdbc/src/main/scala/org/apache/seatunnel/spark/jdbc/source/Jdbc.scala index 6648e019c99..6a31beefd66 100644 --- a/seatunnel-connectors/seatunnel-connectors-spark/seatunnel-connector-spark-jdbc/src/main/scala/org/apache/seatunnel/spark/jdbc/source/Jdbc.scala +++ b/seatunnel-connectors/seatunnel-connectors-spark/seatunnel-connector-spark-jdbc/src/main/scala/org/apache/seatunnel/spark/jdbc/source/Jdbc.scala @@ -18,11 +18,12 @@ package org.apache.seatunnel.spark.jdbc.source import scala.collection.JavaConversions._ import scala.util.{Failure, Success, Try} - import org.apache.seatunnel.common.config.{CheckResult, TypesafeConfigUtils} import org.apache.seatunnel.common.config.CheckConfigUtil.checkAllExists import org.apache.seatunnel.spark.SparkEnvironment import org.apache.seatunnel.spark.batch.SparkBatchSource +import org.apache.seatunnel.spark.jdbc.source.util.HiveDialect +import org.apache.spark.sql.jdbc.JdbcDialects import org.apache.spark.sql.{DataFrameReader, Dataset, Row, SparkSession} class Jdbc extends SparkBatchSource { @@ -58,6 +59,10 @@ class Jdbc extends SparkBatchSource { case Failure(_) => // do nothing } + if (config.getString("url").startsWith("jdbc:hive2")) { + JdbcDialects.registerDialect(new HiveDialect) + } + reader } diff --git a/seatunnel-connectors/seatunnel-connectors-spark/seatunnel-connector-spark-jdbc/src/main/scala/org/apache/seatunnel/spark/jdbc/source/util/HiveDialect.scala b/seatunnel-connectors/seatunnel-connectors-spark/seatunnel-connector-spark-jdbc/src/main/scala/org/apache/seatunnel/spark/jdbc/source/util/HiveDialect.scala new file mode 100644 index 00000000000..81eb48bf60d --- /dev/null +++ b/seatunnel-connectors/seatunnel-connectors-spark/seatunnel-connector-spark-jdbc/src/main/scala/org/apache/seatunnel/spark/jdbc/source/util/HiveDialect.scala @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.seatunnel.spark.jdbc.source.util + +import org.apache.spark.sql.jdbc.JdbcDialect + +class HiveDialect extends JdbcDialect { + override def canHandle(url: String): Boolean = { + url.startsWith("jdbc:hive2") + } + + override def quoteIdentifier(colName: String): String = { + if (colName.contains(".")) { + val colName1 = colName.substring(colName.indexOf(".") + 1) + s"`$colName1`" + } else { + s"`$colName`" + } + } +} diff --git a/seatunnel-core/seatunnel-core-base/src/main/java/org/apache/seatunnel/core/base/command/AbstractCommandArgs.java b/seatunnel-core/seatunnel-core-base/src/main/java/org/apache/seatunnel/core/base/command/AbstractCommandArgs.java index a19fe285f1b..9042b2c140e 100644 --- a/seatunnel-core/seatunnel-core-base/src/main/java/org/apache/seatunnel/core/base/command/AbstractCommandArgs.java +++ b/seatunnel-core/seatunnel-core-base/src/main/java/org/apache/seatunnel/core/base/command/AbstractCommandArgs.java @@ -38,7 +38,7 @@ public abstract class AbstractCommandArgs implements CommandArgs { private List variables = Collections.emptyList(); // todo: use command type enum - @Parameter(names = {"-t", "--check"}, + @Parameter(names = {"-ck", "--check"}, description = "check config") private boolean checkConfig = false; diff --git a/seatunnel-core/seatunnel-core-flink-sql/src/main/bin/start-seatunnel-sql.sh b/seatunnel-core/seatunnel-core-flink-sql/src/main/bin/start-seatunnel-sql.sh index 41890ad91a0..6f55aa5baca 100755 --- a/seatunnel-core/seatunnel-core-flink-sql/src/main/bin/start-seatunnel-sql.sh +++ b/seatunnel-core/seatunnel-core-flink-sql/src/main/bin/start-seatunnel-sql.sh @@ -17,9 +17,27 @@ # set -eu -APP_DIR=$(cd $(dirname ${0})/../;pwd) +# resolve links - $0 may be a softlink +PRG="$0" + +while [ -h "$PRG" ] ; do + # shellcheck disable=SC2006 + ls=`ls -ld "$PRG"` + # shellcheck disable=SC2006 + link=`expr "$ls" : '.*-> \(.*\)$'` + if expr "$link" : '/.*' > /dev/null; then + PRG="$link" + else + # shellcheck disable=SC2006 + PRG=`dirname "$PRG"`/"$link" + fi +done + +PRG_DIR=`dirname "$PRG"` +APP_DIR=`cd "$PRG_DIR/.." >/dev/null; pwd` CONF_DIR=${APP_DIR}/config APP_JAR=${APP_DIR}/lib/seatunnel-core-flink-sql.jar +APP_MAIN="org.apache.seatunnel.core.sql.FlinkSqlStarter" if [ -f "${CONF_DIR}/seatunnel-env.sh" ]; then . "${CONF_DIR}/seatunnel-env.sh" @@ -33,7 +51,7 @@ else fi -CMD=$(java -cp ${APP_JAR} org.apache.seatunnel.core.sql.FlinkSqlStarter ${args}) && EXIT_CODE=$? || EXIT_CODE=$? +CMD=$(java -cp ${APP_JAR} ${APP_MAIN} ${args}) && EXIT_CODE=$? || EXIT_CODE=$? if [ ${EXIT_CODE} -eq 234 ]; then # print usage echo "${CMD}" diff --git a/seatunnel-core/seatunnel-core-flink-sql/src/main/java/org/apache/seatunnel/core/sql/FlinkSqlStarter.java b/seatunnel-core/seatunnel-core-flink-sql/src/main/java/org/apache/seatunnel/core/sql/FlinkSqlStarter.java index e8795ecc760..141a2fd92bd 100644 --- a/seatunnel-core/seatunnel-core-flink-sql/src/main/java/org/apache/seatunnel/core/sql/FlinkSqlStarter.java +++ b/seatunnel-core/seatunnel-core-flink-sql/src/main/java/org/apache/seatunnel/core/sql/FlinkSqlStarter.java @@ -39,6 +39,7 @@ public class FlinkSqlStarter implements Starter { FlinkSqlStarter(String[] args) { this.flinkCommandArgs = CommandLineUtils.parseCommandArgs(args, FlinkJobType.SQL); // set the deployment mode, used to get the job jar path. + Common.setStarter(true); Common.setDeployMode(flinkCommandArgs.getDeployMode()); this.appJar = Common.appLibDir().resolve(APP_JAR_NAME).toString(); } diff --git a/seatunnel-core/seatunnel-core-flink/src/main/bin/start-seatunnel-flink.sh b/seatunnel-core/seatunnel-core-flink/src/main/bin/start-seatunnel-flink.sh index d135ae7deb4..4e5906841d5 100755 --- a/seatunnel-core/seatunnel-core-flink/src/main/bin/start-seatunnel-flink.sh +++ b/seatunnel-core/seatunnel-core-flink/src/main/bin/start-seatunnel-flink.sh @@ -17,9 +17,28 @@ # set -eu -APP_DIR=$(cd $(dirname ${0})/../;pwd) +# resolve links - $0 may be a softlink +PRG="$0" + +while [ -h "$PRG" ] ; do + # shellcheck disable=SC2006 + ls=`ls -ld "$PRG"` + # shellcheck disable=SC2006 + link=`expr "$ls" : '.*-> \(.*\)$'` + if expr "$link" : '/.*' > /dev/null; then + PRG="$link" + else + # shellcheck disable=SC2006 + PRG=`dirname "$PRG"`/"$link" + fi +done + +PRG_DIR=`dirname "$PRG"` +APP_DIR=`cd "$PRG_DIR/.." >/dev/null; pwd` CONF_DIR=${APP_DIR}/config APP_JAR=${APP_DIR}/lib/seatunnel-core-flink.jar +ENV_PARAMETERS_MAIN="org.apache.seatunnel.core.flink.FlinkEnvParameterParser" +APP_MAIN="org.apache.seatunnel.core.flink.FlinkStarter" if [ -f "${CONF_DIR}/seatunnel-env.sh" ]; then . "${CONF_DIR}/seatunnel-env.sh" @@ -32,7 +51,7 @@ else args=$@ fi -ENV_PARAMETERS_OR_ERROR=$(java -cp ${APP_JAR} org.apache.seatunnel.core.flink.FlinkEnvParameterParser ${args}) && EXIT_CODE=$? || EXIT_CODE=$? +ENV_PARAMETERS_OR_ERROR=$(java -cp ${APP_JAR} ${ENV_PARAMETERS_MAIN} ${args}) && EXIT_CODE=$? || EXIT_CODE=$? if [ ${EXIT_CODE} -eq 0 ]; then echo "Export JVM_ARGS: ${ENV_PARAMETERS_OR_ERROR}" export JVM_ARGS="${ENV_PARAMETERS_OR_ERROR}" @@ -41,7 +60,7 @@ else exit ${EXIT_CODE} fi -CMD=$(java -cp ${APP_JAR} org.apache.seatunnel.core.flink.FlinkStarter ${args}) && EXIT_CODE=$? || EXIT_CODE=$? +CMD=$(java -cp ${APP_JAR} ${APP_MAIN} ${args}) && EXIT_CODE=$? || EXIT_CODE=$? if [ ${EXIT_CODE} -eq 234 ]; then # print usage echo "${CMD}" diff --git a/seatunnel-core/seatunnel-core-flink/src/main/java/org/apache/seatunnel/core/flink/FlinkStarter.java b/seatunnel-core/seatunnel-core-flink/src/main/java/org/apache/seatunnel/core/flink/FlinkStarter.java index 20ea886632b..cd4a2be7a7a 100644 --- a/seatunnel-core/seatunnel-core-flink/src/main/java/org/apache/seatunnel/core/flink/FlinkStarter.java +++ b/seatunnel-core/seatunnel-core-flink/src/main/java/org/apache/seatunnel/core/flink/FlinkStarter.java @@ -47,6 +47,7 @@ public class FlinkStarter implements Starter { this.flinkCommandArgs = CommandLineUtils.parseCommandArgs(args, FlinkJobType.JAR); // set the deployment mode, used to get the job jar path. Common.setDeployMode(flinkCommandArgs.getDeployMode()); + Common.setStarter(true); this.appJar = Common.appLibDir().resolve(APP_JAR_NAME).toString(); } diff --git a/seatunnel-core/seatunnel-core-flink/src/main/java/org/apache/seatunnel/core/flink/config/FlinkExecutionContext.java b/seatunnel-core/seatunnel-core-flink/src/main/java/org/apache/seatunnel/core/flink/config/FlinkExecutionContext.java index 773feb870b1..2acb8369934 100644 --- a/seatunnel-core/seatunnel-core-flink/src/main/java/org/apache/seatunnel/core/flink/config/FlinkExecutionContext.java +++ b/seatunnel-core/seatunnel-core-flink/src/main/java/org/apache/seatunnel/core/flink/config/FlinkExecutionContext.java @@ -33,7 +33,9 @@ import java.net.URL; import java.util.ArrayList; +import java.util.HashSet; import java.util.List; +import java.util.Set; import java.util.stream.Collectors; public class FlinkExecutionContext extends AbstractExecutionContext { @@ -47,12 +49,12 @@ public FlinkExecutionContext(Config config, EngineType engine) { this.flinkSourcePluginDiscovery = new FlinkSourcePluginDiscovery(); this.flinkTransformPluginDiscovery = new FlinkTransformPluginDiscovery(); this.flinkSinkPluginDiscovery = new FlinkSinkPluginDiscovery(); - List pluginJars = new ArrayList<>(); + Set pluginJars = new HashSet<>(); // since we didn't split the transform plugin jars, we just need to register the source/sink plugin jars pluginJars.addAll(flinkSourcePluginDiscovery.getPluginJarPaths(getPluginIdentifiers(PluginType.SOURCE))); pluginJars.addAll(flinkSinkPluginDiscovery.getPluginJarPaths(getPluginIdentifiers(PluginType.SINK))); - this.pluginJars = pluginJars; - this.getEnvironment().registerPlugin(pluginJars); + this.pluginJars = new ArrayList<>(pluginJars); + this.getEnvironment().registerPlugin(this.pluginJars); } @Override @@ -63,7 +65,7 @@ public List> getSources() { return configList.stream() .map(pluginConfig -> { PluginIdentifier pluginIdentifier = PluginIdentifier.of(engineType, pluginType, pluginConfig.getString("plugin_name")); - BaseSource pluginInstance = flinkSourcePluginDiscovery.getPluginInstance(pluginIdentifier); + BaseSource pluginInstance = flinkSourcePluginDiscovery.createPluginInstance(pluginIdentifier); pluginInstance.setConfig(pluginConfig); return pluginInstance; }).collect(Collectors.toList()); @@ -77,7 +79,7 @@ public List> getTransforms() { return configList.stream() .map(pluginConfig -> { PluginIdentifier pluginIdentifier = PluginIdentifier.of(engineType, pluginType, pluginConfig.getString("plugin_name")); - BaseTransform pluginInstance = flinkTransformPluginDiscovery.getPluginInstance(pluginIdentifier); + BaseTransform pluginInstance = flinkTransformPluginDiscovery.createPluginInstance(pluginIdentifier); pluginInstance.setConfig(pluginConfig); return pluginInstance; }).collect(Collectors.toList()); @@ -91,7 +93,7 @@ public List> getSinks() { return configList.stream() .map(pluginConfig -> { PluginIdentifier pluginIdentifier = PluginIdentifier.of(engineType, pluginType, pluginConfig.getString("plugin_name")); - BaseSink pluginInstance = flinkSinkPluginDiscovery.getPluginInstance(pluginIdentifier); + BaseSink pluginInstance = flinkSinkPluginDiscovery.createPluginInstance(pluginIdentifier); pluginInstance.setConfig(pluginConfig); return pluginInstance; }).collect(Collectors.toList()); diff --git a/seatunnel-core/seatunnel-core-flink/src/test/java/org/apache/seatunnel/core/flink/args/FlinkCommandArgsTest.java b/seatunnel-core/seatunnel-core-flink/src/test/java/org/apache/seatunnel/core/flink/args/FlinkCommandArgsTest.java index 56f4f5e6374..a4dba1b27cc 100644 --- a/seatunnel-core/seatunnel-core-flink/src/test/java/org/apache/seatunnel/core/flink/args/FlinkCommandArgsTest.java +++ b/seatunnel-core/seatunnel-core-flink/src/test/java/org/apache/seatunnel/core/flink/args/FlinkCommandArgsTest.java @@ -27,7 +27,7 @@ public class FlinkCommandArgsTest { @Test public void testParseFlinkArgs() { - String[] args = {"-c", "app.conf", "-t", "-i", "city=shenyang", "-i", "date=20200202"}; + String[] args = {"-c", "app.conf", "-ck", "-i", "city=shenyang", "-i", "date=20200202"}; FlinkCommandArgs flinkArgs = new FlinkCommandArgs(); JCommander.newBuilder() .addObject(flinkArgs) diff --git a/seatunnel-core/seatunnel-core-flink/src/test/java/org/apache/seatunnel/core/flink/utils/CommandLineUtilsTest.java b/seatunnel-core/seatunnel-core-flink/src/test/java/org/apache/seatunnel/core/flink/utils/CommandLineUtilsTest.java index fe206c0f595..2befcdbe877 100644 --- a/seatunnel-core/seatunnel-core-flink/src/test/java/org/apache/seatunnel/core/flink/utils/CommandLineUtilsTest.java +++ b/seatunnel-core/seatunnel-core-flink/src/test/java/org/apache/seatunnel/core/flink/utils/CommandLineUtilsTest.java @@ -34,14 +34,14 @@ public class CommandLineUtilsTest { @Test public void testParseCommandArgs() { - String[] args = {"--detached", "-c", "app.conf", "-t", "-i", "city=shenyang", "-i", "date=20200202", + String[] args = {"--detached", "-c", "app.conf", "-ck", "-i", "city=shenyang", "-i", "date=20200202", "-r", "run-application", "--unkown", "unkown-command"}; FlinkCommandArgs flinkCommandArgs = CommandLineUtils.parseCommandArgs(args, FlinkJobType.JAR); Assert.assertEquals(flinkCommandArgs.getFlinkParams(), Arrays.asList("--detached", "--unkown", "unkown-command")); Assert.assertEquals(flinkCommandArgs.getRunMode(), FlinkRunMode.APPLICATION_RUN); Assert.assertEquals(flinkCommandArgs.getVariables(), Arrays.asList("city=shenyang", "date=20200202")); - String[] args1 = {"--detached", "-c", "app.conf", "-t", "-i", "city=shenyang", "-i", "date=20200202", + String[] args1 = {"--detached", "-c", "app.conf", "-ck", "-i", "city=shenyang", "-i", "date=20200202", "-r", "run-application", "--unkown", "unkown-command"}; flinkCommandArgs = CommandLineUtils.parseCommandArgs(args1, FlinkJobType.SQL); Assert.assertEquals(flinkCommandArgs.getFlinkParams(), Arrays.asList("--detached", "--unkown", "unkown-command")); @@ -51,7 +51,7 @@ public void testParseCommandArgs() { @Test public void testBuildFlinkJarCommand() throws FileNotFoundException { - String[] args = {"--detached", "-c", APP_CONF_PATH, "-t", "-i", "city=shenyang", "-i", "date=20200202", + String[] args = {"--detached", "-c", APP_CONF_PATH, "-ck", "-i", "city=shenyang", "-i", "date=20200202", "-r", "run-application", "--unkown", "unkown-command"}; FlinkCommandArgs flinkCommandArgs = CommandLineUtils.parseCommandArgs(args, FlinkJobType.JAR); List commands = CommandLineUtils.buildFlinkCommand(flinkCommandArgs, "CLASS_NAME", "/path/to/jar"); @@ -65,14 +65,14 @@ public void testBuildFlinkJarCommand() throws FileNotFoundException { Arrays.asList("${FLINK_HOME}/bin/flink", "run-application", "--detached", "--unkown", "unkown-command", "-c", "CLASS_NAME", "/path/to/jar", "--config", APP_CONF_PATH, "--check", "-Dcity=shenyang", "-Ddate=20200202")); - String[] args1 = {"--detached", "-c", "app.conf", "-t", "-i", "city=shenyang", "-i", "date=20200202", + String[] args1 = {"--detached", "-c", "app.conf", "-ck", "-i", "city=shenyang", "-i", "date=20200202", "-r", "run-application", "--unkown", "unkown-command"}; } @Test public void testBuildFlinkSQLCommand() throws FileNotFoundException{ - String[] args = {"--detached", "-c", SQL_CONF_PATH, "-t", "-i", "city=shenyang", "-i", "date=20200202", + String[] args = {"--detached", "-c", SQL_CONF_PATH, "-ck", "-i", "city=shenyang", "-i", "date=20200202", "-r", "run-application", "--unkown", "unkown-command"}; FlinkCommandArgs flinkCommandArgs = CommandLineUtils.parseCommandArgs(args, FlinkJobType.SQL); List commands = CommandLineUtils.buildFlinkCommand(flinkCommandArgs, "CLASS_NAME", "/path/to/jar"); diff --git a/seatunnel-core/seatunnel-core-spark/pom.xml b/seatunnel-core/seatunnel-core-spark/pom.xml index 968b99f030d..5c1264b1850 100644 --- a/seatunnel-core/seatunnel-core-spark/pom.xml +++ b/seatunnel-core/seatunnel-core-spark/pom.xml @@ -107,6 +107,12 @@ ${project.version} + + org.apache.seatunnel + seatunnel-transform-spark-nulltf + ${project.version} + + org.apache.seatunnel seatunnel-transform-spark-null-rate diff --git a/seatunnel-core/seatunnel-core-spark/src/main/bin/start-seatunnel-spark.sh b/seatunnel-core/seatunnel-core-spark/src/main/bin/start-seatunnel-spark.sh index 35ea18a956c..4fa08ef2dcd 100755 --- a/seatunnel-core/seatunnel-core-spark/src/main/bin/start-seatunnel-spark.sh +++ b/seatunnel-core/seatunnel-core-spark/src/main/bin/start-seatunnel-spark.sh @@ -16,9 +16,27 @@ # limitations under the License. # set -eu -APP_DIR=$(cd $(dirname ${0})/../;pwd) +# resolve links - $0 may be a softlink +PRG="$0" + +while [ -h "$PRG" ] ; do + # shellcheck disable=SC2006 + ls=`ls -ld "$PRG"` + # shellcheck disable=SC2006 + link=`expr "$ls" : '.*-> \(.*\)$'` + if expr "$link" : '/.*' > /dev/null; then + PRG="$link" + else + # shellcheck disable=SC2006 + PRG=`dirname "$PRG"`/"$link" + fi +done + +PRG_DIR=`dirname "$PRG"` +APP_DIR=`cd "$PRG_DIR/.." >/dev/null; pwd` CONF_DIR=${APP_DIR}/config APP_JAR=${APP_DIR}/lib/seatunnel-core-spark.jar +APP_MAIN="org.apache.seatunnel.core.spark.SparkStarter" if [ -f "${CONF_DIR}/seatunnel-env.sh" ]; then . "${CONF_DIR}/seatunnel-env.sh" @@ -31,7 +49,7 @@ else args=$@ fi -CMD=$(java -cp ${APP_JAR} org.apache.seatunnel.core.spark.SparkStarter ${args}) && EXIT_CODE=$? || EXIT_CODE=$? +CMD=$(java -cp ${APP_JAR} ${APP_MAIN} ${args}) && EXIT_CODE=$? || EXIT_CODE=$? if [ ${EXIT_CODE} -eq 234 ]; then # print usage echo "${CMD}" diff --git a/seatunnel-core/seatunnel-core-spark/src/main/java/org/apache/seatunnel/core/spark/SparkStarter.java b/seatunnel-core/seatunnel-core-spark/src/main/java/org/apache/seatunnel/core/spark/SparkStarter.java index 0fd70b629df..7eca89b4b36 100644 --- a/seatunnel-core/seatunnel-core-spark/src/main/java/org/apache/seatunnel/core/spark/SparkStarter.java +++ b/seatunnel-core/seatunnel-core-spark/src/main/java/org/apache/seatunnel/core/spark/SparkStarter.java @@ -50,9 +50,11 @@ import java.util.Arrays; import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Set; import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -148,8 +150,8 @@ private static SparkCommandArgs parseCommandArgs(String[] args) { public List buildCommands() throws IOException { setSparkConf(); Common.setDeployMode(commandArgs.getDeployMode()); + Common.setStarter(true); this.jars.addAll(getPluginsJarDependencies()); - this.jars.addAll(listJars(Common.appLibDir())); this.jars.addAll(getConnectorJarDependencies()); this.appName = this.sparkConf.getOrDefault("spark.app.name", Constants.LOGO); return buildFinal(); @@ -213,7 +215,7 @@ private List getConnectorJarDependencies() { return Collections.emptyList(); } Config config = new ConfigBuilder(Paths.get(commandArgs.getConfigFile())).getConfig(); - List pluginJars = new ArrayList<>(); + Set pluginJars = new HashSet<>(); SparkSourcePluginDiscovery sparkSourcePluginDiscovery = new SparkSourcePluginDiscovery(); SparkSinkPluginDiscovery sparkSinkPluginDiscovery = new SparkSinkPluginDiscovery(); pluginJars.addAll(sparkSourcePluginDiscovery.getPluginJarPaths(getPluginIdentifiers(config, PluginType.SOURCE))); @@ -221,18 +223,6 @@ private List getConnectorJarDependencies() { return pluginJars.stream().map(url -> new File(url.getPath()).toPath()).collect(Collectors.toList()); } - /** - * list jars in given directory - */ - private List listJars(Path dir) throws IOException { - try (Stream stream = Files.list(dir)) { - return stream - .filter(it -> !Files.isDirectory(it)) - .filter(it -> it.getFileName().endsWith("jar")) - .collect(Collectors.toList()); - } - } - /** * build final spark-submit commands */ @@ -405,10 +395,9 @@ private ClusterModeSparkStarter(String[] args, SparkCommandArgs commandArgs) { @Override public List buildCommands() throws IOException { Common.setDeployMode(commandArgs.getDeployMode()); + Common.setStarter(true); Path pluginTarball = Common.pluginTarball(); - if (Files.notExists(pluginTarball)) { - CompressionUtils.tarGzip(Common.pluginRootDir(), pluginTarball); - } + CompressionUtils.tarGzip(Common.pluginRootDir(), pluginTarball); this.files.add(pluginTarball); this.files.add(Paths.get(commandArgs.getConfigFile())); return super.buildCommands(); diff --git a/seatunnel-core/seatunnel-core-spark/src/main/java/org/apache/seatunnel/core/spark/config/SparkExecutionContext.java b/seatunnel-core/seatunnel-core-spark/src/main/java/org/apache/seatunnel/core/spark/config/SparkExecutionContext.java index 7effd19b349..07854b83cf5 100644 --- a/seatunnel-core/seatunnel-core-spark/src/main/java/org/apache/seatunnel/core/spark/config/SparkExecutionContext.java +++ b/seatunnel-core/seatunnel-core-spark/src/main/java/org/apache/seatunnel/core/spark/config/SparkExecutionContext.java @@ -33,7 +33,9 @@ import java.net.URL; import java.util.ArrayList; +import java.util.HashSet; import java.util.List; +import java.util.Set; import java.util.stream.Collectors; public class SparkExecutionContext extends AbstractExecutionContext { @@ -47,11 +49,11 @@ public SparkExecutionContext(Config config, EngineType engine) { this.sparkSourcePluginDiscovery = new SparkSourcePluginDiscovery(); this.sparkTransformPluginDiscovery = new SparkTransformPluginDiscovery(); this.sparkSinkPluginDiscovery = new SparkSinkPluginDiscovery(); - List pluginJars = new ArrayList<>(); + Set pluginJars = new HashSet<>(); pluginJars.addAll(sparkSourcePluginDiscovery.getPluginJarPaths(getPluginIdentifiers(PluginType.SOURCE))); pluginJars.addAll(sparkSinkPluginDiscovery.getPluginJarPaths(getPluginIdentifiers(PluginType.SINK))); - this.pluginJars = pluginJars; - this.getEnvironment().registerPlugin(pluginJars); + this.pluginJars = new ArrayList<>(pluginJars); + this.getEnvironment().registerPlugin(this.pluginJars); } @Override @@ -62,7 +64,7 @@ public List> getSources() { return configList.stream() .map(pluginConfig -> { PluginIdentifier pluginIdentifier = PluginIdentifier.of(engineType, pluginType, pluginConfig.getString("plugin_name")); - BaseSource pluginInstance = sparkSourcePluginDiscovery.getPluginInstance(pluginIdentifier); + BaseSource pluginInstance = sparkSourcePluginDiscovery.createPluginInstance(pluginIdentifier); pluginInstance.setConfig(pluginConfig); return pluginInstance; }).collect(Collectors.toList()); @@ -76,7 +78,7 @@ public List> getTransforms() { return configList.stream() .map(pluginConfig -> { PluginIdentifier pluginIdentifier = PluginIdentifier.of(engineType, pluginType, pluginConfig.getString("plugin_name")); - BaseTransform pluginInstance = sparkTransformPluginDiscovery.getPluginInstance(pluginIdentifier); + BaseTransform pluginInstance = sparkTransformPluginDiscovery.createPluginInstance(pluginIdentifier); pluginInstance.setConfig(pluginConfig); return pluginInstance; }).collect(Collectors.toList()); @@ -90,7 +92,7 @@ public List> getSinks() { return configList.stream() .map(pluginConfig -> { PluginIdentifier pluginIdentifier = PluginIdentifier.of(engineType, pluginType, pluginConfig.getString("plugin_name")); - BaseSink pluginInstance = sparkSinkPluginDiscovery.getPluginInstance(pluginIdentifier); + BaseSink pluginInstance = sparkSinkPluginDiscovery.createPluginInstance(pluginIdentifier); pluginInstance.setConfig(pluginConfig); return pluginInstance; }).collect(Collectors.toList()); diff --git a/seatunnel-core/seatunnel-flink-starter/src/main/bin/start-seatunnel-flink-new-connector.sh b/seatunnel-core/seatunnel-flink-starter/src/main/bin/start-seatunnel-flink-new-connector.sh index e5d52639801..0c276319b28 100755 --- a/seatunnel-core/seatunnel-flink-starter/src/main/bin/start-seatunnel-flink-new-connector.sh +++ b/seatunnel-core/seatunnel-flink-starter/src/main/bin/start-seatunnel-flink-new-connector.sh @@ -17,9 +17,27 @@ # set -eu -APP_DIR=$(cd $(dirname ${0})/../;pwd) +# resolve links - $0 may be a softlink +PRG="$0" + +while [ -h "$PRG" ] ; do + # shellcheck disable=SC2006 + ls=`ls -ld "$PRG"` + # shellcheck disable=SC2006 + link=`expr "$ls" : '.*-> \(.*\)$'` + if expr "$link" : '/.*' > /dev/null; then + PRG="$link" + else + # shellcheck disable=SC2006 + PRG=`dirname "$PRG"`/"$link" + fi +done + +PRG_DIR=`dirname "$PRG"` +APP_DIR=`cd "$PRG_DIR/.." >/dev/null; pwd` CONF_DIR=${APP_DIR}/config APP_JAR=${APP_DIR}/lib/seatunnel-flink-starter.jar +APP_MAIN="org.apache.seatunnel.core.starter.flink.FlinkStarter" if [ -f "${CONF_DIR}/seatunnel-env.sh" ]; then . "${CONF_DIR}/seatunnel-env.sh" @@ -32,15 +50,15 @@ else args=$@ fi -CMD=$(java -cp ${APP_JAR} org.apache.seatunnel.core.starter.flink.FlinkStarter ${args}) && EXIT_CODE=$? || EXIT_CODE=$? +CMD=$(java -cp ${APP_JAR} ${APP_MAIN} ${args}) && EXIT_CODE=$? || EXIT_CODE=$? if [ ${EXIT_CODE} -eq 234 ]; then # print usage - echo ${CMD} + echo "${CMD}" exit 0 elif [ ${EXIT_CODE} -eq 0 ]; then echo "Execute SeaTunnel Flink Job: ${CMD}" eval ${CMD} else - echo ${CMD} + echo "${CMD}" exit ${EXIT_CODE} fi diff --git a/seatunnel-core/seatunnel-flink-starter/src/main/java/org/apache/seatunnel/core/starter/flink/FlinkStarter.java b/seatunnel-core/seatunnel-flink-starter/src/main/java/org/apache/seatunnel/core/starter/flink/FlinkStarter.java index 03a5bbfb267..52b207a7943 100644 --- a/seatunnel-core/seatunnel-flink-starter/src/main/java/org/apache/seatunnel/core/starter/flink/FlinkStarter.java +++ b/seatunnel-core/seatunnel-flink-starter/src/main/java/org/apache/seatunnel/core/starter/flink/FlinkStarter.java @@ -47,6 +47,7 @@ public class FlinkStarter implements Starter { this.flinkCommandArgs = CommandLineUtils.parseCommandArgs(args, FlinkJobType.JAR); // set the deployment mode, used to get the job jar path. Common.setDeployMode(flinkCommandArgs.getDeployMode()); + Common.setStarter(true); this.appJar = Common.appLibDir().resolve(APP_JAR_NAME).toString(); } diff --git a/seatunnel-core/seatunnel-flink-starter/src/main/java/org/apache/seatunnel/core/starter/flink/env/FlinkEnvironment.java b/seatunnel-core/seatunnel-flink-starter/src/main/java/org/apache/seatunnel/core/starter/flink/env/FlinkEnvironment.java deleted file mode 100644 index f9de514b073..00000000000 --- a/seatunnel-core/seatunnel-flink-starter/src/main/java/org/apache/seatunnel/core/starter/flink/env/FlinkEnvironment.java +++ /dev/null @@ -1,286 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.core.starter.flink.env; - -import org.apache.seatunnel.apis.base.env.RuntimeEnv; -import org.apache.seatunnel.common.config.CheckResult; -import org.apache.seatunnel.common.constants.JobMode; -import org.apache.seatunnel.common.utils.ReflectionUtils; -import org.apache.seatunnel.flink.util.ConfigKeyName; -import org.apache.seatunnel.flink.util.EnvironmentUtil; - -import org.apache.seatunnel.shade.com.typesafe.config.Config; - -import org.apache.flink.api.common.RuntimeExecutionMode; -import org.apache.flink.api.common.time.Time; -import org.apache.flink.configuration.Configuration; -import org.apache.flink.configuration.PipelineOptions; -import org.apache.flink.contrib.streaming.state.RocksDBStateBackend; -import org.apache.flink.runtime.state.StateBackend; -import org.apache.flink.runtime.state.filesystem.FsStateBackend; -import org.apache.flink.streaming.api.CheckpointingMode; -import org.apache.flink.streaming.api.TimeCharacteristic; -import org.apache.flink.streaming.api.environment.CheckpointConfig; -import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; -import org.apache.flink.table.api.EnvironmentSettings; -import org.apache.flink.table.api.TableConfig; -import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; -import org.apache.flink.util.TernaryBoolean; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.net.URL; -import java.util.ArrayList; -import java.util.List; -import java.util.Objects; -import java.util.stream.Collectors; - -public class FlinkEnvironment implements RuntimeEnv { - - private static final Logger LOGGER = LoggerFactory.getLogger(FlinkEnvironment.class); - - private Config config; - - private StreamExecutionEnvironment environment; - - private StreamTableEnvironment tableEnvironment; - - private JobMode jobMode; - - private String jobName = "seatunnel"; - - @Override - public FlinkEnvironment setConfig(Config config) { - this.config = config; - return this; - } - - @Override - public Config getConfig() { - return config; - } - - @Override - public CheckResult checkConfig() { - return EnvironmentUtil.checkRestartStrategy(config); - } - - @Override - public FlinkEnvironment prepare() { - // Batch/Streaming both use data stream api in SeaTunnel New API - createStreamEnvironment(); - createStreamTableEnvironment(); - if (config.hasPath("job.name")) { - jobName = config.getString("job.name"); - } - return this; - } - - public String getJobName() { - return jobName; - } - - @Override - public FlinkEnvironment setJobMode(JobMode jobMode) { - this.jobMode = jobMode; - return this; - } - - @Override - public JobMode getJobMode() { - return jobMode; - } - - @Override - public void registerPlugin(List pluginPaths) { - pluginPaths.forEach(url -> LOGGER.info("register plugins : {}", url)); - Configuration configuration; - try { - configuration = - (Configuration) Objects.requireNonNull(ReflectionUtils.getDeclaredMethod(StreamExecutionEnvironment.class, - "getConfiguration")).orElseThrow(() -> new RuntimeException("can't find " + - "method: getConfiguration")).invoke(this.environment); - } catch (Exception e) { - throw new RuntimeException("Get flink configuration from environment failed", e); - } - List jars = configuration.get(PipelineOptions.JARS); - if (jars == null) { - jars = new ArrayList<>(); - } - jars.addAll(pluginPaths.stream().map(URL::toString).collect(Collectors.toList())); - configuration.set(PipelineOptions.JARS, jars); - List classpath = configuration.get(PipelineOptions.CLASSPATHS); - if (classpath == null) { - classpath = new ArrayList<>(); - } - classpath.addAll(pluginPaths.stream().map(URL::toString).collect(Collectors.toList())); - configuration.set(PipelineOptions.CLASSPATHS, classpath); - } - - public StreamExecutionEnvironment getStreamExecutionEnvironment() { - return environment; - } - - public StreamTableEnvironment getStreamTableEnvironment() { - return tableEnvironment; - } - - private void createStreamTableEnvironment() { - // use blink and streammode - EnvironmentSettings.Builder envBuilder = EnvironmentSettings.newInstance() - .inStreamingMode(); - if (this.config.hasPath(ConfigKeyName.PLANNER) && "blink" - .equals(this.config.getString(ConfigKeyName.PLANNER))) { - envBuilder.useBlinkPlanner(); - } else { - envBuilder.useOldPlanner(); - } - EnvironmentSettings environmentSettings = envBuilder.build(); - - tableEnvironment = StreamTableEnvironment.create(getStreamExecutionEnvironment(), environmentSettings); - TableConfig config = tableEnvironment.getConfig(); - if (this.config.hasPath(ConfigKeyName.MAX_STATE_RETENTION_TIME) && this.config - .hasPath(ConfigKeyName.MIN_STATE_RETENTION_TIME)) { - long max = this.config.getLong(ConfigKeyName.MAX_STATE_RETENTION_TIME); - long min = this.config.getLong(ConfigKeyName.MIN_STATE_RETENTION_TIME); - config.setIdleStateRetentionTime(Time.seconds(min), Time.seconds(max)); - } - } - - private void createStreamEnvironment() { - environment = StreamExecutionEnvironment.getExecutionEnvironment(); - setTimeCharacteristic(); - - setCheckpoint(); - - EnvironmentUtil.setRestartStrategy(config, environment.getConfig()); - - if (config.hasPath(ConfigKeyName.BUFFER_TIMEOUT_MILLIS)) { - long timeout = config.getLong(ConfigKeyName.BUFFER_TIMEOUT_MILLIS); - environment.setBufferTimeout(timeout); - } - - if (config.hasPath(ConfigKeyName.PARALLELISM)) { - int parallelism = config.getInt(ConfigKeyName.PARALLELISM); - environment.setParallelism(parallelism); - } - - if (config.hasPath(ConfigKeyName.MAX_PARALLELISM)) { - int max = config.getInt(ConfigKeyName.MAX_PARALLELISM); - environment.setMaxParallelism(max); - } - - if (this.jobMode.equals(JobMode.BATCH)) { - environment.setRuntimeMode(RuntimeExecutionMode.BATCH); - } - } - - private void setTimeCharacteristic() { - if (config.hasPath(ConfigKeyName.TIME_CHARACTERISTIC)) { - String timeType = config.getString(ConfigKeyName.TIME_CHARACTERISTIC); - switch (timeType.toLowerCase()) { - case "event-time": - environment.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); - break; - case "ingestion-time": - environment.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime); - break; - case "processing-time": - environment.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime); - break; - default: - LOGGER.warn( - "set time-characteristic failed, unknown time-characteristic [{}],only support event-time,ingestion-time,processing-time", - timeType); - break; - } - } - } - - private void setCheckpoint() { - if (config.hasPath(ConfigKeyName.CHECKPOINT_INTERVAL)) { - CheckpointConfig checkpointConfig = environment.getCheckpointConfig(); - long interval = config.getLong(ConfigKeyName.CHECKPOINT_INTERVAL); - environment.enableCheckpointing(interval); - - if (config.hasPath(ConfigKeyName.CHECKPOINT_MODE)) { - String mode = config.getString(ConfigKeyName.CHECKPOINT_MODE); - switch (mode.toLowerCase()) { - case "exactly-once": - checkpointConfig.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE); - break; - case "at-least-once": - checkpointConfig.setCheckpointingMode(CheckpointingMode.AT_LEAST_ONCE); - break; - default: - LOGGER.warn( - "set checkpoint.mode failed, unknown checkpoint.mode [{}],only support exactly-once,at-least-once", - mode); - break; - } - } - - if (config.hasPath(ConfigKeyName.CHECKPOINT_TIMEOUT)) { - long timeout = config.getLong(ConfigKeyName.CHECKPOINT_TIMEOUT); - checkpointConfig.setCheckpointTimeout(timeout); - } - - if (config.hasPath(ConfigKeyName.CHECKPOINT_DATA_URI)) { - String uri = config.getString(ConfigKeyName.CHECKPOINT_DATA_URI); - StateBackend fsStateBackend = new FsStateBackend(uri); - if (config.hasPath(ConfigKeyName.STATE_BACKEND)) { - String stateBackend = config.getString(ConfigKeyName.STATE_BACKEND); - if ("rocksdb".equalsIgnoreCase(stateBackend)) { - StateBackend rocksDBStateBackend = new RocksDBStateBackend(fsStateBackend, TernaryBoolean.TRUE); - environment.setStateBackend(rocksDBStateBackend); - } - } else { - environment.setStateBackend(fsStateBackend); - } - } - - if (config.hasPath(ConfigKeyName.MAX_CONCURRENT_CHECKPOINTS)) { - int max = config.getInt(ConfigKeyName.MAX_CONCURRENT_CHECKPOINTS); - checkpointConfig.setMaxConcurrentCheckpoints(max); - } - - if (config.hasPath(ConfigKeyName.CHECKPOINT_CLEANUP_MODE)) { - boolean cleanup = config.getBoolean(ConfigKeyName.CHECKPOINT_CLEANUP_MODE); - if (cleanup) { - checkpointConfig.enableExternalizedCheckpoints( - CheckpointConfig.ExternalizedCheckpointCleanup.DELETE_ON_CANCELLATION); - } else { - checkpointConfig.enableExternalizedCheckpoints( - CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION); - - } - } - - if (config.hasPath(ConfigKeyName.MIN_PAUSE_BETWEEN_CHECKPOINTS)) { - long minPause = config.getLong(ConfigKeyName.MIN_PAUSE_BETWEEN_CHECKPOINTS); - checkpointConfig.setMinPauseBetweenCheckpoints(minPause); - } - - if (config.hasPath(ConfigKeyName.FAIL_ON_CHECKPOINTING_ERRORS)) { - int failNum = config.getInt(ConfigKeyName.FAIL_ON_CHECKPOINTING_ERRORS); - checkpointConfig.setTolerableCheckpointFailureNumber(failNum); - } - } - } - -} diff --git a/seatunnel-core/seatunnel-flink-starter/src/main/java/org/apache/seatunnel/core/starter/flink/execution/AbstractPluginExecuteProcessor.java b/seatunnel-core/seatunnel-flink-starter/src/main/java/org/apache/seatunnel/core/starter/flink/execution/AbstractPluginExecuteProcessor.java index 6c3ebceb1c7..5e3178bfb81 100644 --- a/seatunnel-core/seatunnel-flink-starter/src/main/java/org/apache/seatunnel/core/starter/flink/execution/AbstractPluginExecuteProcessor.java +++ b/seatunnel-core/seatunnel-flink-starter/src/main/java/org/apache/seatunnel/core/starter/flink/execution/AbstractPluginExecuteProcessor.java @@ -20,6 +20,7 @@ import static org.apache.seatunnel.apis.base.plugin.Plugin.RESULT_TABLE_NAME; import static org.apache.seatunnel.apis.base.plugin.Plugin.SOURCE_TABLE_NAME; +import org.apache.seatunnel.common.utils.ReflectionUtils; import org.apache.seatunnel.flink.FlinkEnvironment; import org.apache.seatunnel.flink.util.TableUtil; @@ -30,8 +31,11 @@ import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; import org.apache.flink.types.Row; +import java.net.URL; +import java.net.URLClassLoader; import java.util.List; import java.util.Optional; +import java.util.function.BiConsumer; public abstract class AbstractPluginExecuteProcessor implements PluginExecuteProcessor { @@ -41,6 +45,17 @@ public abstract class AbstractPluginExecuteProcessor implements PluginExecute protected static final String ENGINE_TYPE = "seatunnel"; protected static final String PLUGIN_NAME = "plugin_name"; + protected final BiConsumer addUrlToClassloader = (classLoader, url) -> { + if (classLoader.getClass().getName().endsWith("SafetyNetWrapperClassLoader")) { + URLClassLoader c = (URLClassLoader) ReflectionUtils.getField(classLoader, "inner").get(); + ReflectionUtils.invoke(c, "addURL", url); + } else if (classLoader instanceof URLClassLoader) { + ReflectionUtils.invoke(classLoader, "addURL", url); + } else { + throw new RuntimeException("Unsupported classloader: " + classLoader.getClass().getName()); + } + }; + protected AbstractPluginExecuteProcessor(FlinkEnvironment flinkEnvironment, List pluginConfigs) { this.flinkEnvironment = flinkEnvironment; diff --git a/seatunnel-core/seatunnel-flink-starter/src/main/java/org/apache/seatunnel/core/starter/flink/execution/SinkExecuteProcessor.java b/seatunnel-core/seatunnel-flink-starter/src/main/java/org/apache/seatunnel/core/starter/flink/execution/SinkExecuteProcessor.java index 079e3f95e72..8de5d422dcd 100644 --- a/seatunnel-core/seatunnel-flink-starter/src/main/java/org/apache/seatunnel/core/starter/flink/execution/SinkExecuteProcessor.java +++ b/seatunnel-core/seatunnel-flink-starter/src/main/java/org/apache/seatunnel/core/starter/flink/execution/SinkExecuteProcessor.java @@ -36,7 +36,6 @@ import java.net.URL; import java.util.ArrayList; -import java.util.Collections; import java.util.List; import java.util.stream.Collectors; @@ -53,17 +52,17 @@ protected SinkExecuteProcessor(FlinkEnvironment flinkEnvironment, @Override protected List> initializePlugins(List pluginConfigs) { - SeaTunnelSinkPluginDiscovery sinkPluginDiscovery = new SeaTunnelSinkPluginDiscovery(); + SeaTunnelSinkPluginDiscovery sinkPluginDiscovery = new SeaTunnelSinkPluginDiscovery(addUrlToClassloader); List pluginJars = new ArrayList<>(); List> sinks = pluginConfigs.stream().map(sinkConfig -> { PluginIdentifier pluginIdentifier = PluginIdentifier.of(ENGINE_TYPE, PLUGIN_TYPE, sinkConfig.getString(PLUGIN_NAME)); pluginJars.addAll(sinkPluginDiscovery.getPluginJarPaths(Lists.newArrayList(pluginIdentifier))); SeaTunnelSink seaTunnelSink = - sinkPluginDiscovery.getPluginInstance(pluginIdentifier); + sinkPluginDiscovery.createPluginInstance(pluginIdentifier); seaTunnelSink.prepare(sinkConfig); seaTunnelSink.setSeaTunnelContext(SeaTunnelContext.getContext()); return seaTunnelSink; - }).collect(Collectors.toList()); + }).distinct().collect(Collectors.toList()); flinkEnvironment.registerPlugin(pluginJars); return sinks; } @@ -76,7 +75,7 @@ public List> execute(List> upstreamDataStreams) SeaTunnelSink seaTunnelSink = plugins.get(i); DataStream stream = fromSourceTable(sinkConfig).orElse(input); seaTunnelSink.setTypeInfo((SeaTunnelRowType) TypeConverterUtils.convert(stream.getType())); - stream.sinkTo(new FlinkSink<>(seaTunnelSink, Collections.emptyMap())); + stream.sinkTo(new FlinkSink<>(seaTunnelSink)); } // the sink is the last stream return null; diff --git a/seatunnel-core/seatunnel-flink-starter/src/main/java/org/apache/seatunnel/core/starter/flink/execution/SourceExecuteProcessor.java b/seatunnel-core/seatunnel-flink-starter/src/main/java/org/apache/seatunnel/core/starter/flink/execution/SourceExecuteProcessor.java index b097c5a8c8c..a1b31836fd0 100644 --- a/seatunnel-core/seatunnel-flink-starter/src/main/java/org/apache/seatunnel/core/starter/flink/execution/SourceExecuteProcessor.java +++ b/seatunnel-core/seatunnel-flink-starter/src/main/java/org/apache/seatunnel/core/starter/flink/execution/SourceExecuteProcessor.java @@ -44,7 +44,9 @@ import java.net.URL; import java.util.ArrayList; +import java.util.HashSet; import java.util.List; +import java.util.Set; public class SourceExecuteProcessor extends AbstractPluginExecuteProcessor { @@ -99,14 +101,14 @@ private DataStreamSource addSource( @Override protected List initializePlugins(List pluginConfigs) { - SeaTunnelSourcePluginDiscovery sourcePluginDiscovery = new SeaTunnelSourcePluginDiscovery(); + SeaTunnelSourcePluginDiscovery sourcePluginDiscovery = new SeaTunnelSourcePluginDiscovery(addUrlToClassloader); List sources = new ArrayList<>(); - List jars = new ArrayList<>(); + Set jars = new HashSet<>(); for (Config sourceConfig : pluginConfigs) { PluginIdentifier pluginIdentifier = PluginIdentifier.of( ENGINE_TYPE, PLUGIN_TYPE, sourceConfig.getString(PLUGIN_NAME)); jars.addAll(sourcePluginDiscovery.getPluginJarPaths(Lists.newArrayList(pluginIdentifier))); - SeaTunnelSource seaTunnelSource = sourcePluginDiscovery.getPluginInstance(pluginIdentifier); + SeaTunnelSource seaTunnelSource = sourcePluginDiscovery.createPluginInstance(pluginIdentifier); seaTunnelSource.prepare(sourceConfig); seaTunnelSource.setSeaTunnelContext(SeaTunnelContext.getContext()); if (SeaTunnelContext.getContext().getJobMode() == JobMode.BATCH @@ -115,7 +117,7 @@ protected List initializePlugins(List pluginC } sources.add(seaTunnelSource); } - flinkEnvironment.registerPlugin(jars); + flinkEnvironment.registerPlugin(new ArrayList<>(jars)); return sources; } } diff --git a/seatunnel-core/seatunnel-flink-starter/src/main/java/org/apache/seatunnel/core/starter/flink/execution/TransformExecuteProcessor.java b/seatunnel-core/seatunnel-flink-starter/src/main/java/org/apache/seatunnel/core/starter/flink/execution/TransformExecuteProcessor.java index ab0200e371a..d077f6d3a44 100644 --- a/seatunnel-core/seatunnel-flink-starter/src/main/java/org/apache/seatunnel/core/starter/flink/execution/TransformExecuteProcessor.java +++ b/seatunnel-core/seatunnel-flink-starter/src/main/java/org/apache/seatunnel/core/starter/flink/execution/TransformExecuteProcessor.java @@ -51,11 +51,11 @@ protected List initializePlugins(List pl .map(transformConfig -> { PluginIdentifier pluginIdentifier = PluginIdentifier.of(ENGINE_TYPE, PLUGIN_TYPE, transformConfig.getString(PLUGIN_NAME)); pluginJars.addAll(transformPluginDiscovery.getPluginJarPaths(Lists.newArrayList(pluginIdentifier))); - FlinkStreamTransform pluginInstance = (FlinkStreamTransform) transformPluginDiscovery.getPluginInstance(pluginIdentifier); + FlinkStreamTransform pluginInstance = (FlinkStreamTransform) transformPluginDiscovery.createPluginInstance(pluginIdentifier); pluginInstance.setConfig(transformConfig); pluginInstance.prepare(flinkEnvironment); return pluginInstance; - }).collect(Collectors.toList()); + }).distinct().collect(Collectors.toList()); flinkEnvironment.registerPlugin(pluginJars); return transforms; } diff --git a/seatunnel-core/seatunnel-spark-starter/src/main/bin/start-seatunnel-spark-new-connector.sh b/seatunnel-core/seatunnel-spark-starter/src/main/bin/start-seatunnel-spark-new-connector.sh index 85f33000acd..a54dc55ee33 100755 --- a/seatunnel-core/seatunnel-spark-starter/src/main/bin/start-seatunnel-spark-new-connector.sh +++ b/seatunnel-core/seatunnel-spark-starter/src/main/bin/start-seatunnel-spark-new-connector.sh @@ -16,9 +16,28 @@ # limitations under the License. # set -eu -APP_DIR=$(cd $(dirname ${0})/../;pwd) + +# resolve links - $0 may be a softlink +PRG="$0" + +while [ -h "$PRG" ] ; do + # shellcheck disable=SC2006 + ls=`ls -ld "$PRG"` + # shellcheck disable=SC2006 + link=`expr "$ls" : '.*-> \(.*\)$'` + if expr "$link" : '/.*' > /dev/null; then + PRG="$link" + else + # shellcheck disable=SC2006 + PRG=`dirname "$PRG"`/"$link" + fi +done + +PRG_DIR=`dirname "$PRG"` +APP_DIR=`cd "$PRG_DIR/.." >/dev/null; pwd` CONF_DIR=${APP_DIR}/config APP_JAR=${APP_DIR}/lib/seatunnel-spark-starter.jar +APP_MAIN="org.apache.seatunnel.core.starter.spark.SparkStarter" if [ -f "${CONF_DIR}/seatunnel-env.sh" ]; then . "${CONF_DIR}/seatunnel-env.sh" @@ -31,15 +50,15 @@ else args=$@ fi -CMD=$(java -cp ${APP_JAR} org.apache.seatunnel.core.starter.spark.SparkStarter ${args} | tail -n 1) && EXIT_CODE=$? || EXIT_CODE=$? +CMD=$(java -cp ${APP_JAR} ${APP_MAIN} ${args} | tail -n 1) && EXIT_CODE=$? || EXIT_CODE=$? if [ ${EXIT_CODE} -eq 234 ]; then # print usage - echo ${CMD} + echo "${CMD}" exit 0 elif [ ${EXIT_CODE} -eq 0 ]; then - echo "Execute SeaTunnel Spark Job: ${CMD}" - eval ${CMD} + echo "Execute SeaTunnel Spark Job: $(echo ${CMD} | tail -n 1)" + eval $(echo ${CMD} | tail -n 1) else - echo ${CMD} + echo "${CMD}" exit ${EXIT_CODE} fi diff --git a/seatunnel-core/seatunnel-spark-starter/src/main/java/org/apache/seatunnel/core/starter/spark/SparkStarter.java b/seatunnel-core/seatunnel-spark-starter/src/main/java/org/apache/seatunnel/core/starter/spark/SparkStarter.java index 6cc8ef1a8e7..00261a80c03 100644 --- a/seatunnel-core/seatunnel-spark-starter/src/main/java/org/apache/seatunnel/core/starter/spark/SparkStarter.java +++ b/seatunnel-core/seatunnel-spark-starter/src/main/java/org/apache/seatunnel/core/starter/spark/SparkStarter.java @@ -24,13 +24,12 @@ import org.apache.seatunnel.common.config.DeployMode; import org.apache.seatunnel.core.starter.Starter; import org.apache.seatunnel.core.starter.config.ConfigBuilder; -import org.apache.seatunnel.core.starter.config.EngineType; import org.apache.seatunnel.core.starter.config.PluginType; import org.apache.seatunnel.core.starter.spark.args.SparkCommandArgs; import org.apache.seatunnel.core.starter.utils.CompressionUtils; import org.apache.seatunnel.plugin.discovery.PluginIdentifier; -import org.apache.seatunnel.plugin.discovery.spark.SparkSinkPluginDiscovery; -import org.apache.seatunnel.plugin.discovery.spark.SparkSourcePluginDiscovery; +import org.apache.seatunnel.plugin.discovery.seatunnel.SeaTunnelSinkPluginDiscovery; +import org.apache.seatunnel.plugin.discovery.seatunnel.SeaTunnelSourcePluginDiscovery; import org.apache.seatunnel.shade.com.typesafe.config.Config; import org.apache.seatunnel.shade.com.typesafe.config.ConfigFactory; @@ -51,9 +50,11 @@ import java.util.Arrays; import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Set; import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -149,8 +150,8 @@ private static SparkCommandArgs parseCommandArgs(String[] args) { public List buildCommands() throws IOException { setSparkConf(); Common.setDeployMode(commandArgs.getDeployMode()); + Common.setStarter(true); this.jars.addAll(getPluginsJarDependencies()); - this.jars.addAll(listJars(Common.appLibDir())); this.jars.addAll(getConnectorJarDependencies()); this.appName = this.sparkConf.getOrDefault("spark.app.name", Constants.LOGO); return buildFinal(); @@ -220,31 +221,19 @@ private List getPluginsJarDependencies() throws IOException { * return connector's jars, which located in 'connectors/spark/*'. */ private List getConnectorJarDependencies() { - Path pluginRootDir = Common.connectorJarDir("spark"); + Path pluginRootDir = Common.connectorJarDir("seatunnel"); if (!Files.exists(pluginRootDir) || !Files.isDirectory(pluginRootDir)) { return Collections.emptyList(); } Config config = new ConfigBuilder(Paths.get(commandArgs.getConfigFile())).getConfig(); - List pluginJars = new ArrayList<>(); - SparkSourcePluginDiscovery sparkSourcePluginDiscovery = new SparkSourcePluginDiscovery(); - SparkSinkPluginDiscovery sparkSinkPluginDiscovery = new SparkSinkPluginDiscovery(); - pluginJars.addAll(sparkSourcePluginDiscovery.getPluginJarPaths(getPluginIdentifiers(config, PluginType.SOURCE))); - pluginJars.addAll(sparkSinkPluginDiscovery.getPluginJarPaths(getPluginIdentifiers(config, PluginType.SINK))); + Set pluginJars = new HashSet<>(); + SeaTunnelSourcePluginDiscovery seaTunnelSourcePluginDiscovery = new SeaTunnelSourcePluginDiscovery(); + SeaTunnelSinkPluginDiscovery seaTunnelSinkPluginDiscovery = new SeaTunnelSinkPluginDiscovery(); + pluginJars.addAll(seaTunnelSourcePluginDiscovery.getPluginJarPaths(getPluginIdentifiers(config, PluginType.SOURCE))); + pluginJars.addAll(seaTunnelSinkPluginDiscovery.getPluginJarPaths(getPluginIdentifiers(config, PluginType.SINK))); return pluginJars.stream().map(url -> new File(url.getPath()).toPath()).collect(Collectors.toList()); } - /** - * list jars in given directory - */ - private List listJars(Path dir) throws IOException { - try (Stream stream = Files.list(dir)) { - return stream - .filter(it -> !Files.isDirectory(it)) - .filter(it -> it.getFileName().endsWith("jar")) - .collect(Collectors.toList()); - } - } - /** * build final spark-submit commands */ @@ -327,10 +316,8 @@ private List getPluginIdentifiers(Config config, PluginType... return Arrays.stream(pluginTypes).flatMap((Function>) pluginType -> { List configList = config.getConfigList(pluginType.getType()); return configList.stream() - .map(pluginConfig -> PluginIdentifier - .of(EngineType.SPARK.getEngine(), - pluginType.getType(), - pluginConfig.getString("plugin_name"))); + .map(pluginConfig -> PluginIdentifier.of("seatunnel", pluginType.getType(), + pluginConfig.getString("plugin_name"))); }).collect(Collectors.toList()); } @@ -417,10 +404,9 @@ private ClusterModeSparkStarter(String[] args, SparkCommandArgs commandArgs) { @Override public List buildCommands() throws IOException { Common.setDeployMode(commandArgs.getDeployMode()); + Common.setStarter(true); Path pluginTarball = Common.pluginTarball(); - if (Files.notExists(pluginTarball)) { - CompressionUtils.tarGzip(Common.pluginRootDir(), pluginTarball); - } + CompressionUtils.tarGzip(Common.pluginRootDir(), pluginTarball); this.files.add(pluginTarball); this.files.add(Paths.get(commandArgs.getConfigFile())); return super.buildCommands(); diff --git a/seatunnel-core/seatunnel-spark-starter/src/main/java/org/apache/seatunnel/core/starter/spark/execution/SinkExecuteProcessor.java b/seatunnel-core/seatunnel-spark-starter/src/main/java/org/apache/seatunnel/core/starter/spark/execution/SinkExecuteProcessor.java index c19770a6347..974d0fcf0f5 100644 --- a/seatunnel-core/seatunnel-spark-starter/src/main/java/org/apache/seatunnel/core/starter/spark/execution/SinkExecuteProcessor.java +++ b/seatunnel-core/seatunnel-spark-starter/src/main/java/org/apache/seatunnel/core/starter/spark/execution/SinkExecuteProcessor.java @@ -20,7 +20,6 @@ import org.apache.seatunnel.api.common.SeaTunnelContext; import org.apache.seatunnel.api.sink.SeaTunnelSink; import org.apache.seatunnel.api.table.type.SeaTunnelRowType; -import org.apache.seatunnel.common.config.Common; import org.apache.seatunnel.core.starter.exception.TaskExecuteException; import org.apache.seatunnel.plugin.discovery.PluginIdentifier; import org.apache.seatunnel.plugin.discovery.seatunnel.SeaTunnelSinkPluginDiscovery; @@ -36,7 +35,6 @@ import java.net.URL; import java.util.ArrayList; -import java.util.HashMap; import java.util.List; import java.util.stream.Collectors; @@ -56,11 +54,11 @@ protected SinkExecuteProcessor(SparkEnvironment sparkEnvironment, List> sinks = pluginConfigs.stream().map(sinkConfig -> { PluginIdentifier pluginIdentifier = PluginIdentifier.of(ENGINE_TYPE, PLUGIN_TYPE, sinkConfig.getString(PLUGIN_NAME)); pluginJars.addAll(sinkPluginDiscovery.getPluginJarPaths(Lists.newArrayList(pluginIdentifier))); - SeaTunnelSink seaTunnelSink = sinkPluginDiscovery.getPluginInstance(pluginIdentifier); + SeaTunnelSink seaTunnelSink = sinkPluginDiscovery.createPluginInstance(pluginIdentifier); seaTunnelSink.prepare(sinkConfig); seaTunnelSink.setSeaTunnelContext(SeaTunnelContext.getContext()); return seaTunnelSink; - }).collect(Collectors.toList()); + }).distinct().collect(Collectors.toList()); sparkEnvironment.registerPlugin(pluginJars); return sinks; } @@ -74,8 +72,7 @@ public List> execute(List> upstreamDataStreams) throws Dataset dataset = fromSourceTable(sinkConfig, sparkEnvironment).orElse(input); // TODO modify checkpoint location seaTunnelSink.setTypeInfo((SeaTunnelRowType) TypeConverterUtils.convert(dataset.schema())); - SparkSinkInjector.inject(dataset.write(), seaTunnelSink, new HashMap<>(Common.COLLECTION_SIZE)).option( - "checkpointLocation", "/tmp").save(); + SparkSinkInjector.inject(dataset.write(), seaTunnelSink).option("checkpointLocation", "/tmp").save(); } // the sink is the last stream return null; diff --git a/seatunnel-core/seatunnel-spark-starter/src/main/java/org/apache/seatunnel/core/starter/spark/execution/SourceExecuteProcessor.java b/seatunnel-core/seatunnel-spark-starter/src/main/java/org/apache/seatunnel/core/starter/spark/execution/SourceExecuteProcessor.java index ba953ebda2f..1435bd1b450 100644 --- a/seatunnel-core/seatunnel-spark-starter/src/main/java/org/apache/seatunnel/core/starter/spark/execution/SourceExecuteProcessor.java +++ b/seatunnel-core/seatunnel-spark-starter/src/main/java/org/apache/seatunnel/core/starter/spark/execution/SourceExecuteProcessor.java @@ -19,6 +19,7 @@ import org.apache.seatunnel.api.common.SeaTunnelContext; import org.apache.seatunnel.api.source.SeaTunnelSource; +import org.apache.seatunnel.common.Constants; import org.apache.seatunnel.common.utils.SerializationUtils; import org.apache.seatunnel.plugin.discovery.PluginIdentifier; import org.apache.seatunnel.plugin.discovery.seatunnel.SeaTunnelSourcePluginDiscovery; @@ -34,7 +35,9 @@ import java.net.URL; import java.util.ArrayList; +import java.util.HashSet; import java.util.List; +import java.util.Set; public class SourceExecuteProcessor extends AbstractPluginExecuteProcessor> { @@ -53,7 +56,7 @@ public List> execute(List> upstreamDataStreams) { Dataset dataset = sparkEnvironment.getSparkSession() .read() .format(SeaTunnelSource.class.getSimpleName()) - .option("source.serialization", SerializationUtils.objectToString(source)) + .option(Constants.SOURCE_SERIALIZATION, SerializationUtils.objectToString(source)) .schema((StructType) TypeConverterUtils.convert(source.getProducedType())).load(); sources.add(dataset); registerInputTempView(pluginConfigs.get(i), dataset); @@ -65,17 +68,17 @@ public List> execute(List> upstreamDataStreams) { protected List> initializePlugins(List pluginConfigs) { SeaTunnelSourcePluginDiscovery sourcePluginDiscovery = new SeaTunnelSourcePluginDiscovery(); List> sources = new ArrayList<>(); - List jars = new ArrayList<>(); + Set jars = new HashSet<>(); for (Config sourceConfig : pluginConfigs) { PluginIdentifier pluginIdentifier = PluginIdentifier.of( ENGINE_TYPE, PLUGIN_TYPE, sourceConfig.getString(PLUGIN_NAME)); jars.addAll(sourcePluginDiscovery.getPluginJarPaths(Lists.newArrayList(pluginIdentifier))); - SeaTunnelSource seaTunnelSource = sourcePluginDiscovery.getPluginInstance(pluginIdentifier); + SeaTunnelSource seaTunnelSource = sourcePluginDiscovery.createPluginInstance(pluginIdentifier); seaTunnelSource.prepare(sourceConfig); seaTunnelSource.setSeaTunnelContext(SeaTunnelContext.getContext()); sources.add(seaTunnelSource); } - sparkEnvironment.registerPlugin(jars); + sparkEnvironment.registerPlugin(new ArrayList<>(jars)); return sources; } } diff --git a/seatunnel-core/seatunnel-spark-starter/src/main/java/org/apache/seatunnel/core/starter/spark/execution/SparkExecution.java b/seatunnel-core/seatunnel-spark-starter/src/main/java/org/apache/seatunnel/core/starter/spark/execution/SparkExecution.java index 7b90bd0dd13..31193e0cffb 100644 --- a/seatunnel-core/seatunnel-spark-starter/src/main/java/org/apache/seatunnel/core/starter/spark/execution/SparkExecution.java +++ b/seatunnel-core/seatunnel-spark-starter/src/main/java/org/apache/seatunnel/core/starter/spark/execution/SparkExecution.java @@ -18,6 +18,7 @@ package org.apache.seatunnel.core.starter.spark.execution; import org.apache.seatunnel.api.common.SeaTunnelContext; +import org.apache.seatunnel.common.Constants; import org.apache.seatunnel.core.starter.config.EngineType; import org.apache.seatunnel.core.starter.config.EnvironmentFactory; import org.apache.seatunnel.core.starter.exception.TaskExecuteException; @@ -47,9 +48,9 @@ public SparkExecution(Config config) { this.config = config; this.sparkEnvironment = (SparkEnvironment) new EnvironmentFactory<>(config, EngineType.SPARK).getEnvironment(); SeaTunnelContext.getContext().setJobMode(sparkEnvironment.getJobMode()); - this.sourcePluginExecuteProcessor = new SourceExecuteProcessor(sparkEnvironment, config.getConfigList("source")); - this.transformPluginExecuteProcessor = new TransformExecuteProcessor(sparkEnvironment, config.getConfigList("transform")); - this.sinkPluginExecuteProcessor = new SinkExecuteProcessor(sparkEnvironment, config.getConfigList("sink")); + this.sourcePluginExecuteProcessor = new SourceExecuteProcessor(sparkEnvironment, config.getConfigList(Constants.SOURCE)); + this.transformPluginExecuteProcessor = new TransformExecuteProcessor(sparkEnvironment, config.getConfigList(Constants.TRANSFORM)); + this.sinkPluginExecuteProcessor = new SinkExecuteProcessor(sparkEnvironment, config.getConfigList(Constants.SINK)); } public void execute() throws TaskExecuteException { diff --git a/seatunnel-core/seatunnel-spark-starter/src/main/java/org/apache/seatunnel/core/starter/spark/execution/TransformExecuteProcessor.java b/seatunnel-core/seatunnel-spark-starter/src/main/java/org/apache/seatunnel/core/starter/spark/execution/TransformExecuteProcessor.java index faa2baceaf6..5668457b768 100644 --- a/seatunnel-core/seatunnel-spark-starter/src/main/java/org/apache/seatunnel/core/starter/spark/execution/TransformExecuteProcessor.java +++ b/seatunnel-core/seatunnel-spark-starter/src/main/java/org/apache/seatunnel/core/starter/spark/execution/TransformExecuteProcessor.java @@ -51,11 +51,11 @@ protected List initializePlugins(List plug .map(transformConfig -> { PluginIdentifier pluginIdentifier = PluginIdentifier.of(ENGINE_TYPE, PLUGIN_TYPE, transformConfig.getString(PLUGIN_NAME)); pluginJars.addAll(transformPluginDiscovery.getPluginJarPaths(Lists.newArrayList(pluginIdentifier))); - BaseSparkTransform pluginInstance = transformPluginDiscovery.getPluginInstance(pluginIdentifier); + BaseSparkTransform pluginInstance = transformPluginDiscovery.createPluginInstance(pluginIdentifier); pluginInstance.setConfig(transformConfig); pluginInstance.prepare(sparkEnvironment); return pluginInstance; - }).collect(Collectors.toList()); + }).distinct().collect(Collectors.toList()); sparkEnvironment.registerPlugin(pluginJars); return transforms; } diff --git a/seatunnel-dist/pom.xml b/seatunnel-dist/pom.xml index be7e709a4e0..3b8aa73cbdc 100644 --- a/seatunnel-dist/pom.xml +++ b/seatunnel-dist/pom.xml @@ -37,48 +37,120 @@ - release + all + + true + + + + org.apache.seatunnel + seatunnel-connectors-v2-dist + ${project.version} + + + org.apache.seatunnel + seatunnel-connectors-spark-dist + ${project.version} + + + org.apache.seatunnel + seatunnel-connectors-flink-dist + ${project.version} + + + + + + maven-assembly-plugin + + + bin + package + + single + + + + + src/main/assembly/assembly-bin-ci.xml + + true + + + + + src + package + + single + + + + src/main/assembly/assembly-src.xml + + true + + + + + + + apache-seatunnel-incubating-${project.version} + - - - - - - maven-assembly-plugin - - - bin - package - - single - + + release + + + org.apache.seatunnel + seatunnel-connectors-spark-dist + ${project.version} + + + org.apache.seatunnel + seatunnel-connectors-flink-dist + ${project.version} + + + + + + maven-assembly-plugin + + + bin + package + + single + - - - src/main/assembly/assembly-bin.xml - - true - - + + + src/main/assembly/assembly-bin.xml + + true + + - - src - package - - single - - - - src/main/assembly/assembly-src.xml - - true - - + + src + package + + single + + + + src/main/assembly/assembly-src.xml + + true + + - - - - apache-seatunnel-incubating-${project.version} - - + + + + apache-seatunnel-incubating-${project.version} + + + diff --git a/seatunnel-dist/release-docs/LICENSE b/seatunnel-dist/release-docs/LICENSE index 4e4d2f912e7..8191c55a083 100644 --- a/seatunnel-dist/release-docs/LICENSE +++ b/seatunnel-dist/release-docs/LICENSE @@ -268,13 +268,14 @@ The text of each license is the standard Apache 2.0 license. (Apache 2) chill-java (com.twitter:chill-java:0.8.4 - https://github.com/twitter/chill) (Apache 2) chill-java (com.twitter:chill-java:0.9.3 - https://github.com/twitter/chill) (Apache 2) opencsv (com.opencsv:opencsv:4.6 - http://opencsv.sf.net) - (Apache 2) opencsv (net.sf.opencsv:opencsv:2.3 - http://opencsv.sf.net) (Apache 2) org.roaringbitmap:RoaringBitmap (org.roaringbitmap:RoaringBitmap:0.9.0 - https://github.com/RoaringBitmap/RoaringBitmap) (Apache 2) org.roaringbitmap:RoaringBitmap (org.roaringbitmap:RoaringBitmap:0.9.22 - https://github.com/RoaringBitmap/RoaringBitmap) (Apache 2) org.roaringbitmap:shims (org.roaringbitmap:shims:0.9.0 - https://github.com/RoaringBitmap/RoaringBitmap) (Apache 2) org.roaringbitmap:shims (org.roaringbitmap:shims:0.9.22 - https://github.com/RoaringBitmap/RoaringBitmap) (Apache 2) scalaj-http (org.scalaj:scalaj-http_2.11:2.3.0 - http://github.com/scalaj/scalaj-http) (Apache 2) univocity-parsers (com.univocity:univocity-parsers:2.7.3 - http://github.com/univocity/univocity-parsers) + (Apache 2) druid (com.alibaba:druid:1.2.9 - https://github.com/alibaba/druid) + (Apache 2) druid-spring-boot-starter (com.alibaba:druid-spring-boot-starter:1.2.9 - https://github.com/alibaba/druid) (Apache 2.0 License) Apache Mina SSHD :: Common support utilities (org.apache.sshd:sshd-common:2.7.0 - https://www.apache.org/sshd/sshd-common/) (Apache 2.0 License) Apache Mina SSHD :: Core (org.apache.sshd:sshd-core:2.7.0 - https://www.apache.org/sshd/sshd-core/) (Apache 2.0 License) Apache Mina SSHD :: SCP (org.apache.sshd:sshd-scp:2.7.0 - https://www.apache.org/sshd/sshd-scp/) @@ -308,10 +309,7 @@ The text of each license is the standard Apache 2.0 license. (Apache License 2.0) Asynchronous Http Client Netty Utils (org.asynchttpclient:async-http-client-netty-utils:2.5.3 - http://github.com/AsyncHttpClient/async-http-client/async-http-client-netty-utils) (Apache License 2.0) Compress-LZF (com.ning:compress-lzf:1.0.3 - http://github.com/ning/compress) (Apache License 2.0) Compress-LZF (com.ning:compress-lzf:1.0.4 - http://github.com/ning/compress) - (Apache License 2.0) FRocksDB JNI (com.ververica:frocksdbjni:5.17.2-ververica-2.1 - https://github.com/ververica/frocksdb) (Apache License 2.0) Graphite Integration for Metrics (io.dropwizard.metrics:metrics-graphite:3.1.5 - http://metrics.codahale.com/metrics-graphite/) - (Apache License 2.0) JVM Integration for Metrics (io.dropwizard.metrics:metrics-jvm:3.1.5 - http://metrics.codahale.com/metrics-jvm/) - (Apache License 2.0) Jackson Integration for Metrics (io.dropwizard.metrics:metrics-json:3.1.5 - http://metrics.codahale.com/metrics-json/) (Apache License 2.0) Metrics Core (io.dropwizard.metrics:metrics-core:3.1.5 - http://metrics.codahale.com/metrics-core/) (Apache License 2.0) Metrics Core (io.dropwizard.metrics:metrics-core:3.2.1 - http://metrics.codahale.com/metrics-core/) (Apache License 2.0) Metrics Core (io.dropwizard.metrics:metrics-core:4.0.0 - http://metrics.dropwizard.io/metrics-core) @@ -323,7 +321,12 @@ The text of each license is the standard Apache 2.0 license. (Apache License 2.0) jDBI (org.jdbi:jdbi:2.63.1 - http://jdbi.org/) (Apache License 2.0) snappy (org.iq80.snappy:snappy:0.2 - http://github.com/dain/snappy) (Apache License 2.0) snappy (org.iq80.snappy:snappy:0.3 - http://github.com/dain/snappy) - (Apache License Version 2) Jetty SSLEngine (org.mortbay.jetty:jetty-sslengine:6.1.26 - http://jetty.mortbay.org) + (Apache License 2.0) Hibernate Validator Engine (org.hibernate.validator:hibernate-validator:6.2.2.Final - http://hibernate.org/validator/hibernate-validator) + (Apache License 2.0) Jakarta Bean Validation API (jakarta.validation:jakarta.validation-api:2.0.2 - https://beanvalidation.org) + (Apache License 2.0) swagger-annotations (io.swagger:swagger-annotations:1.5.10 - https://github.com/swagger-api/swagger-core/modules/swagger-annotations) + (Apache License 2.0) swagger-models (io.swagger:swagger-models:1.5.10 - https://github.com/swagger-api/swagger-core/modules/swagger-models) + (Apache License 2.0) swagger-models (io.swagger:swagger-models:1.5.10 - https://github.com/swagger-api/swagger-core/modules/swagger-models) + (Apache License Version 2) HikariCP (com.zaxxer:HikariCP:4.0.3 - https://github.com/brettwooldridge/HikariCP) (Apache License) HttpClient (commons-httpclient:commons-httpclient:3.1 - http://jakarta.apache.org/httpcomponents/httpclient-3.x/) (Apache License) HttpClient (org.apache.httpcomponents:httpclient:4.0.1 - http://hc.apache.org/httpcomponents-client) (Apache License) HttpClient (org.apache.httpcomponents:httpclient:4.2.5 - http://hc.apache.org/httpcomponents-client) @@ -338,12 +341,9 @@ The text of each license is the standard Apache 2.0 license. (Apache License, Version 2.0) An open source Java toolkit for Amazon S3 (net.java.dev.jets3t:jets3t:0.9.0 - http://www.jets3t.org) (Apache License, Version 2.0) Apache Avro (org.apache.avro:avro:1.10.0 - https://avro.apache.org) (Apache License, Version 2.0) Apache Avro (org.apache.avro:avro:1.10.1 - https://avro.apache.org) - (Apache License, Version 2.0) Apache Commons BeanUtils (commons-beanutils:commons-beanutils:1.7.0 - https://commons.apache.org/proper/commons-beanutils/) - (Apache License, Version 2.0) Apache Commons BeanUtils (commons-beanutils:commons-beanutils:1.9.3 - https://commons.apache.org/proper/commons-beanutils/) - (Apache License, Version 2.0) Apache Commons CLI (commons-cli:commons-cli:1.3.1 - http://commons.apache.org/proper/commons-cli/) + (Apache License, Version 2.0) Apache Commons BeanUtils (commons-beanutils:commons-beanutils:1.9.4 - https://commons.apache.org/proper/commons-beanutils/) (Apache License, Version 2.0) Apache Commons CLI (commons-cli:commons-cli:1.4 - http://commons.apache.org/proper/commons-cli/) (Apache License, Version 2.0) Apache Commons Codec (commons-codec:commons-codec:1.13 - https://commons.apache.org/proper/commons-codec/) - (Apache License, Version 2.0) Apache Commons Collections (commons-collections:commons-collections:3.2.2 - http://commons.apache.org/collections/) (Apache License, Version 2.0) Apache Commons Collections (org.apache.commons:commons-collections4:4.4 - https://commons.apache.org/proper/commons-collections/) (Apache License, Version 2.0) Apache Commons Compress (org.apache.commons:commons-compress:1.18 - https://commons.apache.org/proper/commons-compress/) (Apache License, Version 2.0) Apache Commons Compress (org.apache.commons:commons-compress:1.20 - https://commons.apache.org/proper/commons-compress/) @@ -357,7 +357,6 @@ The text of each license is the standard Apache 2.0 license. (Apache License, Version 2.0) Apache Commons Lang (org.apache.commons:commons-lang3:3.5 - http://commons.apache.org/proper/commons-lang/) (Apache License, Version 2.0) Apache Commons Lang (org.apache.commons:commons-lang3:3.6 - http://commons.apache.org/proper/commons-lang/) (Apache License, Version 2.0) Apache Commons Lang (org.apache.commons:commons-lang3:3.8.1 - http://commons.apache.org/proper/commons-lang/) - (Apache License, Version 2.0) Apache Commons Math (org.apache.commons:commons-math3:3.6.1 - http://commons.apache.org/proper/commons-math/) (Apache License, Version 2.0) Apache Commons Net (commons-net:commons-net:3.6 - http://commons.apache.org/proper/commons-net/) (Apache License, Version 2.0) Apache Commons Text (org.apache.commons:commons-text:1.3 - http://commons.apache.org/proper/commons-text/) (Apache License, Version 2.0) Apache HBase - Annotations (org.apache.hbase:hbase-annotations:2.0.0 - http://hbase.apache.org/hbase-annotations) @@ -392,22 +391,35 @@ The text of each license is the standard Apache 2.0 license. (Apache License, Version 2.0) Apache HBase - Zookeeper (org.apache.hbase:hbase-zookeeper:2.1.0 - http://hbase.apache.org/hbase-build-configuration/hbase-zookeeper) (Apache License, Version 2.0) Apache Hadoop Annotations (org.apache.hadoop:hadoop-annotations:2.6.5 - no url defined) (Apache License, Version 2.0) Apache Hadoop Annotations (org.apache.hadoop:hadoop-annotations:3.0.0 - no url defined) - (Apache License, Version 2.0) Apache Hadoop Auth (org.apache.hadoop:hadoop-auth:2.7.2 - no url defined) + (Apache License, Version 2.0) Apache Hadoop Auth (org.apache.hadoop:hadoop-auth:2.6.5 - no url defined) (Apache License, Version 2.0) Apache Hadoop Auth (org.apache.hadoop:hadoop-auth:2.7.4 - no url defined) (Apache License, Version 2.0) Apache Hadoop Auth (org.apache.hadoop:hadoop-auth:3.0.0 - no url defined) - (Apache License, Version 2.0) Apache Hadoop Client Aggregator (org.apache.hadoop:hadoop-client:3.0.0 - no url defined) + (Apache License, Version 2.0) Apache Hadoop Client (org.apache.hadoop:hadoop-client:2.6.5 - no url defined) + (Apache License, Version 2.0) Apache Hadoop Client (org.apache.hadoop:hadoop-client:3.0.0 - no url defined) + (Apache License, Version 2.0) Apache Hadoop Common (org.apache.hadoop:hadoop-common:2.6.5 - no url defined) (Apache License, Version 2.0) Apache Hadoop Common (org.apache.hadoop:hadoop-common:2.7.7 - no url defined) (Apache License, Version 2.0) Apache Hadoop Common (org.apache.hadoop:hadoop-common:3.0.0 - no url defined) (Apache License, Version 2.0) Apache Hadoop Distributed Copy (org.apache.hadoop:hadoop-distcp:2.7.4 - no url defined) - (Apache License, Version 2.0) Apache Hadoop HDFS (org.apache.hadoop:hadoop-hdfs:2.7.2 - no url defined) + (Apache License, Version 2.0) Apache Hadoop HDFS (org.apache.hadoop:hadoop-hdfs:2.6.5 - no url defined) (Apache License, Version 2.0) Apache Hadoop HDFS (org.apache.hadoop:hadoop-hdfs:2.7.4 - no url defined) (Apache License, Version 2.0) Apache Hadoop HDFS Client (org.apache.hadoop:hadoop-hdfs-client:3.0.0 - no url defined) - (Apache License, Version 2.0) Apache Hadoop MapReduce Common (org.apache.hadoop:hadoop-mapreduce-client-common:3.0.0 - no url defined) - (Apache License, Version 2.0) Apache Hadoop MapReduce Core (org.apache.hadoop:hadoop-mapreduce-client-core:3.0.0 - no url defined) - (Apache License, Version 2.0) Apache Hadoop MapReduce JobClient (org.apache.hadoop:hadoop-mapreduce-client-jobclient:3.0.0 - no url defined) - (Apache License, Version 2.0) Apache Hadoop YARN API (org.apache.hadoop:hadoop-yarn-api:3.0.0 - no url defined) - (Apache License, Version 2.0) Apache Hadoop YARN Client (org.apache.hadoop:hadoop-yarn-client:3.0.0 - no url defined) - (Apache License, Version 2.0) Apache Hadoop YARN Common (org.apache.hadoop:hadoop-yarn-common:3.0.0 - no url defined) + (Apache License, Version 2.0) hadoop-mapreduce-client-app (org.apache.hadoop:hadoop-mapreduce-client-app:2.6.5 - no url defined) + (Apache License, Version 2.0) hadoop-mapreduce-client-common (org.apache.hadoop:hadoop-mapreduce-client-common:2.6.5 - no url defined) + (Apache License, Version 2.0) hadoop-mapreduce-client-common (org.apache.hadoop:hadoop-mapreduce-client-common:3.0.0 - no url defined) + (Apache License, Version 2.0) hadoop-mapreduce-client-core (org.apache.hadoop:hadoop-mapreduce-client-core:2.6.5 - no url defined) + (Apache License, Version 2.0) hadoop-mapreduce-client-core (org.apache.hadoop:hadoop-mapreduce-client-core:2.7.7 - no url defined) + (Apache License, Version 2.0) hadoop-mapreduce-client-core (org.apache.hadoop:hadoop-mapreduce-client-core:3.0.0 - no url defined) + (Apache License, Version 2.0) hadoop-mapreduce-client-jobclient (org.apache.hadoop:hadoop-mapreduce-client-jobclient:2.6.5 - no url defined) + (Apache License, Version 2.0) hadoop-mapreduce-client-jobclient (org.apache.hadoop:hadoop-mapreduce-client-jobclient:3.0.0 - no url defined) + (Apache License, Version 2.0) hadoop-mapreduce-client-shuffle (org.apache.hadoop:hadoop-mapreduce-client-shuffle:2.6.5 - no url defined) + (Apache License, Version 2.0) hadoop-yarn-api (org.apache.hadoop:hadoop-yarn-api:2.6.5 - no url defined) + (Apache License, Version 2.0) hadoop-yarn-api (org.apache.hadoop:hadoop-yarn-api:3.0.0 - no url defined) + (Apache License, Version 2.0) hadoop-yarn-client (org.apache.hadoop:hadoop-yarn-client:2.6.5 - no url defined) + (Apache License, Version 2.0) hadoop-yarn-client (org.apache.hadoop:hadoop-yarn-client:3.0.0 - no url defined) + (Apache License, Version 2.0) hadoop-yarn-common (org.apache.hadoop:hadoop-yarn-common:2.6.5 - no url defined) + (Apache License, Version 2.0) hadoop-yarn-common (org.apache.hadoop:hadoop-yarn-common:2.7.7 - no url defined) + (Apache License, Version 2.0) hadoop-yarn-common (org.apache.hadoop:hadoop-yarn-common:3.0.0 - no url defined) + (Apache License, Version 2.0) hadoop-yarn-server-common (org.apache.hadoop:hadoop-yarn-server-common:2.6.5 - no url defined) (Apache License, Version 2.0) Apache HttpAsyncClient (org.apache.httpcomponents:httpasyncclient:4.1.2 - http://hc.apache.org/httpcomponents-asyncclient) (Apache License, Version 2.0) Apache HttpAsyncClient (org.apache.httpcomponents:httpasyncclient:4.1.4 - http://hc.apache.org/httpcomponents-asyncclient) (Apache License, Version 2.0) Apache HttpClient (org.apache.httpcomponents:httpclient:4.5.10 - http://hc.apache.org/httpcomponents-client) @@ -417,7 +429,6 @@ The text of each license is the standard Apache 2.0 license. (Apache License, Version 2.0) Apache HttpClient (org.apache.httpcomponents:httpclient:4.5.6 - http://hc.apache.org/httpcomponents-client) (Apache License, Version 2.0) Apache HttpClient (org.apache.httpcomponents:httpclient:4.5.9 - http://hc.apache.org/httpcomponents-client) (Apache License, Version 2.0) Apache HttpClient Mime (org.apache.httpcomponents:httpmime:4.5.2 - http://hc.apache.org/httpcomponents-client) - (Apache License, Version 2.0) Apache HttpClient Mime (org.apache.httpcomponents:httpmime:4.5.13 - http://hc.apache.org/httpcomponents-client) (Apache License, Version 2.0) Apache HttpCore (org.apache.httpcomponents:httpcore:4.4.10 - http://hc.apache.org/httpcomponents-core-ga) (Apache License, Version 2.0) Apache HttpCore (org.apache.httpcomponents:httpcore:4.4.11 - http://hc.apache.org/httpcomponents-core-ga) (Apache License, Version 2.0) Apache HttpCore (org.apache.httpcomponents:httpcore:4.4.12 - http://hc.apache.org/httpcomponents-core-ga) @@ -440,7 +451,6 @@ The text of each license is the standard Apache 2.0 license. (Apache License, Version 2.0) Apache POI (org.apache.poi:poi-ooxml-schemas:4.1.2 - http://poi.apache.org/) (Apache License, Version 2.0) Apache POI (org.apache.poi:poi-ooxml:4.1.2 - http://poi.apache.org/) (Apache License, Version 2.0) Apache POI (org.apache.poi:poi:4.1.2 - http://poi.apache.org/) - (Apache License, Version 2.0) Apache Pulsar :: Bouncy Castle :: BC (org.apache.pulsar:bouncy-castle-bc:2.8.0 - https://github.com/apache/pulsar) (Apache License, Version 2.0) Apache Yetus - Audience Annotations (org.apache.yetus:audience-annotations:0.11.0 - https://yetus.apache.org/audience-annotations) (Apache License, Version 2.0) Apache Yetus - Audience Annotations (org.apache.yetus:audience-annotations:0.5.0 - https://yetus.apache.org/audience-annotations) (Apache License, Version 2.0) Apache Yetus - Audience Annotations (org.apache.yetus:audience-annotations:0.7.0 - https://yetus.apache.org/audience-annotations) @@ -460,7 +470,6 @@ The text of each license is the standard Apache 2.0 license. (Apache License, Version 2.0) Flink : Tools : Force Shading (org.apache.flink:force-shading:1.13.6 - https://www.apache.org/force-shading/) (Apache License, Version 2.0) Hibernate Validator Engine (org.hibernate:hibernate-validator:5.2.5.Final - http://hibernate.org/validator/hibernate-validator) (Apache License, Version 2.0) Hive Storage API (org.apache.hive:hive-storage-api:2.6.0 - https://www.apache.org/hive-storage-api/) - (Apache License, Version 2.0) JCIP Annotations under Apache License (com.github.stephenc.jcip:jcip-annotations:1.0 - http://stephenc.github.com/jcip-annotations) (Apache License, Version 2.0) JCIP Annotations under Apache License (com.github.stephenc.jcip:jcip-annotations:1.0-1 - http://stephenc.github.com/jcip-annotations) (Apache License, Version 2.0) JCL 1.2 implemented over SLF4J (org.slf4j:jcl-over-slf4j:1.7.30 - http://www.slf4j.org) (Apache License, Version 2.0) JMES Path Query library (com.amazonaws:jmespath-java:1.12.37 - https://aws.amazon.com/sdkforjava) @@ -522,9 +531,6 @@ The text of each license is the standard Apache 2.0 license. (Apache License, Version 2.0) ORC Shims (org.apache.orc:orc-shims:1.5.2 - http://orc.apache.org/orc-shims) (Apache License, Version 2.0) ORC Shims (org.apache.orc:orc-shims:1.5.6 - http://orc.apache.org/orc-shims) (Apache License, Version 2.0) Plexus Common Utilities (org.codehaus.plexus:plexus-utils:3.1.0 - http://codehaus-plexus.github.io/plexus-utils/) - (Apache License, Version 2.0) Pulsar Client :: API (org.apache.pulsar:pulsar-client-api:2.8.0 - https://github.com/apache/pulsar) - (Apache License, Version 2.0) Pulsar Client Admin :: API (org.apache.pulsar:pulsar-client-admin-api:2.8.0 - https://github.com/apache/pulsar) - (Apache License, Version 2.0) Pulsar Client All (org.apache.pulsar:pulsar-client-all:2.8.0 - https://github.com/apache/pulsar) (Apache License, Version 2.0) Sigar (org.hyperic:sigar:1.6.5.132 - https://github.com/hyperic/sigar) (Apache License, Version 2.0) SnakeYAML (org.yaml:snakeyaml:1.17 - http://www.snakeyaml.org) (Apache License, Version 2.0) SnakeYAML (org.yaml:snakeyaml:1.24 - http://www.snakeyaml.org) @@ -549,14 +555,13 @@ The text of each license is the standard Apache 2.0 license. (Apache License, Version 2.0) druid-indexing-service (org.apache.druid:druid-indexing-service:0.22.1 - https://druid.apache.org/druid-indexing-service/) (Apache License, Version 2.0) druid-processing (org.apache.druid:druid-processing:0.22.1 - https://druid.apache.org/druid-processing/) (Apache License, Version 2.0) druid-server (org.apache.druid:druid-server:0.22.1 - https://druid.apache.org/druid-server/) - (Apache License, Version 2.0) eigenbase-properties (net.hydromatic:eigenbase-properties:1.1.5 - http://github.com/julianhyde/eigenbase-properties) (Apache License, Version 2.0) extendedset (org.apache.druid:extendedset:0.22.1 - https://druid.apache.org/extendedset/) (Apache License, Version 2.0) fastutil (it.unimi.dsi:fastutil-core:8.5.4 - http://fastutil.di.unimi.it/) (Apache License, Version 2.0) fastutil (it.unimi.dsi:fastutil-extra:8.5.4 - http://fastutil.di.unimi.it/) (Apache License, Version 2.0) fastutil (it.unimi.dsi:fastutil:6.5.6 - http://fasutil.dsi.unimi.it/) (Apache License, Version 2.0) fastutil (it.unimi.dsi:fastutil:7.0.13 - http://fasutil.di.unimi.it/) (Apache License, Version 2.0) fastutil (it.unimi.dsi:fastutil:8.5.4 - http://fastutil.di.unimi.it/) - (Apache License, Version 2.0) hadoop-mapreduce-client-core (org.apache.hadoop:hadoop-mapreduce-client-core:2.7.7 - no url defined) + (Apache License, Version 2.0) htrace-core4 (org.apache.htrace:htrace-core4:4.2.0-incubating - http://incubator.apache.org/projects/htrace.html) (Apache License, Version 2.0) hudi-spark-bundle_2.11 (org.apache.hudi:hudi-spark-bundle_2.11:0.10.0 - https://github.com/apache/hudi/hudi-spark-bundle_2.11) (Apache License, Version 2.0) java-xmlbuilder (com.jamesmurty.utils:java-xmlbuilder:0.4 - http://code.google.com/p/java-xmlbuilder/) @@ -564,6 +569,29 @@ The text of each license is the standard Apache 2.0 license. (Apache License, Version 2.0) jmockdata (com.github.jsonzou:jmockdata:4.3.0 - https://github.com/jsonzou/jmockdata) (Apache License, Version 2.0) stream-lib (com.clearspring.analytics:stream:2.7.0 - https://github.com/addthis/stream-lib) (Apache License, version 2.0) JBoss Logging 3 (org.jboss.logging:jboss-logging:3.2.1.Final - http://www.jboss.org) + (Apache License, Version 2.0) Guava: Google Core Libraries for Java (com.google.guava:guava:31.0.1-jre - https://github.com/google/guava) + (Apache License, Version 2.0) SnakeYAML (org.yaml:snakeyaml:1.29 - http://www.snakeyaml.org) + (Apache License, Version 2.0) Spring AOP (org.springframework:spring-aop:5.3.20 - https://github.com/spring-projects/spring-framework) + (Apache License, Version 2.0) Spring Beans (org.springframework:spring-beans:5.3.20 - https://github.com/spring-projects/spring-framework) + (Apache License, Version 2.0) Spring Commons Logging Bridge (org.springframework:spring-jcl:5.3.20 - https://github.com/spring-projects/spring-framework) + (Apache License, Version 2.0) Spring Context (org.springframework:spring-context:5.3.20 - https://github.com/spring-projects/spring-framework) + (Apache License, Version 2.0) Spring Core (org.springframework:spring-core:5.3.20 - https://github.com/spring-projects/spring-framework) + (Apache License, Version 2.0) Spring JDBC (org.springframework:spring-jdbc:5.3.15 - https://github.com/spring-projects/spring-framework) + (Apache License, Version 2.0) Spring Plugin - Core (org.springframework.plugin:spring-plugin-core:1.2.0.RELEASE - https://github.com/spring-projects/spring-plugin/spring-plugin-core) + (Apache License, Version 2.0) Spring Plugin - Metadata Extension (org.springframework.plugin:spring-plugin-metadata:1.2.0.RELEASE - https://github.com/spring-projects/spring-plugin/spring-plugin-metadata) + (Apache License, Version 2.0) Spring Transaction (org.springframework:spring-tx:5.3.15 - https://github.com/spring-projects/spring-framework) + (Apache License, Version 2.0) Spring Web (org.springframework:spring-web:5.3.20 - https://github.com/spring-projects/spring-framework) + (Apache License, Version 2.0) Spring Web MVC (org.springframework:spring-webmvc:5.3.20 - https://github.com/spring-projects/spring-framework) + (Apache License, Version 2.0) spring-boot (org.springframework.boot:spring-boot:2.6.8 - https://spring.io/projects/spring-boot) + (Apache License, Version 2.0) spring-boot-autoconfigure (org.springframework.boot:spring-boot-autoconfigure:2.6.8 - https://spring.io/projects/spring-boot) + (Apache License, Version 2.0) spring-boot-starter (org.springframework.boot:spring-boot-starter:2.6.8 - https://spring.io/projects/spring-boot) + (Apache License, Version 2.0) spring-boot-starter-jdbc (org.springframework.boot:spring-boot-starter-jdbc:2.6.3 - https://spring.io/projects/spring-boot) + (Apache License, Version 2.0) spring-boot-starter-jetty (org.springframework.boot:spring-boot-starter-jetty:2.6.8 - https://spring.io/projects/spring-boot) + (Apache License, Version 2.0) spring-boot-starter-json (org.springframework.boot:spring-boot-starter-json:2.6.8 - https://spring.io/projects/spring-boot) + (Apache License, Version 2.0) spring-boot-starter-logging (org.springframework.boot:spring-boot-starter-logging:2.6.8 - https://spring.io/projects/spring-boot) + (Apache License, Version 2.0) spring-boot-starter-web (org.springframework.boot:spring-boot-starter-web:2.6.8 - https://spring.io/projects/spring-boot) + (Apache License, Version 2.0) tomcat-embed-el (org.apache.tomcat.embed:tomcat-embed-el:9.0.63 - https://tomcat.apache.org/) + (Apache License, version 2.0) JBoss Logging 3 (org.jboss.logging:jboss-logging:3.4.1.Final - http://www.jboss.org) (Apache v2) BoneCP :: Core Library (com.jolbox:bonecp:0.8.0.RELEASE - http://jolbox.com/bonecp) (Apache-2.0) json4s-ast (org.json4s:json4s-ast_2.11:3.5.3 - https://github.com/json4s/json4s) (Apache-2.0) json4s-core (org.json4s:json4s-core_2.11:3.5.3 - https://github.com/json4s/json4s) @@ -591,6 +619,7 @@ The text of each license is the standard Apache 2.0 license. (The Apache License, Version 2.0) org.jetbrains.kotlin:kotlin-stdlib-jdk7 (org.jetbrains.kotlin:kotlin-stdlib-jdk7:1.4.10 - https://kotlinlang.org/) (The Apache License, Version 2.0) org.jetbrains.kotlin:kotlin-stdlib-jdk8 (org.jetbrains.kotlin:kotlin-stdlib-jdk8:1.4.10 - https://kotlinlang.org/) (The Apache License, Version 2.0) software.amazon.ion:ion-java (software.amazon.ion:ion-java:1.0.2 - https://github.com/amznlabs/ion-java/) + (The Apache License, Version 2.0) com.github.jsonzou:jmockdata (com.github.jsonzou:jmockdata:4.3.0 - https://github.com/jsonzou/jmockdata) (The Apache Software License, Version 2.0) ASM based accessors helper used by json-smart (net.minidev:accessors-smart:1.2 - http://www.minidev.net/) (The Apache Software License, Version 2.0) Aggregate Designer Algorithm (net.hydromatic:aggdesigner-algorithm:6.0 - http://github.com/julianhyde/aggdesigner/aggdesigner-algorithm) (The Apache Software License, Version 2.0) Apache Avro (org.apache.avro:avro:1.7.7 - http://avro.apache.org) @@ -610,16 +639,11 @@ The text of each license is the standard Apache 2.0 license. (The Apache Software License, Version 2.0) Apache Directory API ASN.1 API (org.apache.directory.api:api-asn1-api:1.0.0-M20 - http://directory.apache.org/api-parent/api-asn1-parent/api-asn1-api/) (The Apache Software License, Version 2.0) Apache Directory LDAP API Utilities (org.apache.directory.api:api-util:1.0.0-M20 - http://directory.apache.org/api-parent/api-util/) (The Apache Software License, Version 2.0) Apache Extras™ for Apache log4j™. (log4j:apache-log4j-extras:1.2.17 - http://logging.apache.org/log4j/extras) - (The Apache Software License, Version 2.0) Apache Hadoop Annotations (org.apache.hadoop:hadoop-annotations:2.6.5 - no url defined) - (The Apache Software License, Version 2.0) Apache Hadoop Auth (org.apache.hadoop:hadoop-auth:2.6.5 - no url defined) - (The Apache Software License, Version 2.0) Apache Hadoop Client (org.apache.hadoop:hadoop-client:2.6.5 - no url defined) - (The Apache Software License, Version 2.0) Apache Hadoop Common (org.apache.hadoop:hadoop-common:2.6.5 - no url defined) - (The Apache Software License, Version 2.0) Apache Hadoop HDFS (org.apache.hadoop:hadoop-hdfs:2.6.5 - no url defined) + (The Apache Software License, Version 2.0) Apache Iceberg (org.apache.iceberg:iceberg-api:0.13.1 - https://iceberg.apache.org) (The Apache Software License, Version 2.0) Apache Iceberg (org.apache.iceberg:iceberg-bundled-guava:0.13.1 - https://iceberg.apache.org) (The Apache Software License, Version 2.0) Apache Iceberg (org.apache.iceberg:iceberg-common:0.13.1 - https://iceberg.apache.org) (The Apache Software License, Version 2.0) Apache Iceberg (org.apache.iceberg:iceberg-core:0.13.1 - https://iceberg.apache.org) - (The Apache Software License, Version 2.0) Apache Ivy (org.apache.ivy:ivy:2.4.0 - http://ant.apache.org/ivy/) (The Apache Software License, Version 2.0) Apache Kafka (org.apache.kafka:kafka-clients:2.0.0 - http://kafka.apache.org) (The Apache Software License, Version 2.0) Apache Kafka (org.apache.kafka:kafka-clients:2.4.1 - https://kafka.apache.org) (The Apache Software License, Version 2.0) Apache Kafka (org.apache.kafka:kafka-clients:3.2.1 - https://kafka.apache.org) @@ -641,7 +665,6 @@ The text of each license is the standard Apache 2.0 license. (The Apache Software License, Version 2.0) Apache Tephra API (org.apache.tephra:tephra-api:0.14.0-incubating - http://tephra.incubator.apache.org/tephra-api) (The Apache Software License, Version 2.0) Apache Tephra Core (org.apache.tephra:tephra-core:0.14.0-incubating - http://tephra.incubator.apache.org/tephra-core) (The Apache Software License, Version 2.0) Apache Tephra HBase 2.0 Compatibility (org.apache.tephra:tephra-hbase-compat-2.0:0.14.0-incubating - http://tephra.incubator.apache.org/tephra-hbase-compat-2.0) - (The Apache Software License, Version 2.0) Apache Thrift (org.apache.thrift:libfb303:0.9.3 - http://thrift.apache.org) (The Apache Software License, Version 2.0) Apache Thrift (org.apache.thrift:libthrift:0.9.0 - http://thrift.apache.org) (The Apache Software License, Version 2.0) Apache Thrift (org.apache.thrift:libthrift:0.9.3 - http://thrift.apache.org) (The Apache Software License, Version 2.0) Apache Twill API (org.apache.twill:twill-api:0.8.0 - http://twill.apache.org/twill-api) @@ -664,28 +687,17 @@ The text of each license is the standard Apache 2.0 license. (The Apache Software License, Version 2.0) Commons Digester (commons-digester:commons-digester:1.8.1 - http://commons.apache.org/digester/) (The Apache Software License, Version 2.0) Commons IO (commons-io:commons-io:2.4 - http://commons.apache.org/io/) (The Apache Software License, Version 2.0) Commons Lang (commons-lang:commons-lang:2.6 - http://commons.apache.org/lang/) - (The Apache Software License, Version 2.0) Commons Logging (commons-logging:commons-logging:1.1.3 - http://commons.apache.org/proper/commons-logging/) (The Apache Software License, Version 2.0) Commons Math (org.apache.commons:commons-math3:3.1.1 - http://commons.apache.org/math/) (The Apache Software License, Version 2.0) Commons Net (commons-net:commons-net:3.1 - http://commons.apache.org/net/) (The Apache Software License, Version 2.0) Commons Pool (commons-pool:commons-pool:1.5.4 - http://commons.apache.org/pool/) (The Apache Software License, Version 2.0) Commons Pool (commons-pool:commons-pool:1.6 - http://commons.apache.org/pool/) (The Apache Software License, Version 2.0) Converter: Moshi (com.squareup.retrofit2:converter-moshi:2.9.0 - https://github.com/square/retrofit) (The Apache Software License, Version 2.0) CronScheduler (io.timeandspace:cron-scheduler:0.1 - https://github.com/TimeAndSpaceIO/CronScheduler) - (The Apache Software License, Version 2.0) Curator Client (org.apache.curator:curator-client:2.12.0 - http://curator.apache.org/curator-client) - (The Apache Software License, Version 2.0) Curator Client (org.apache.curator:curator-client:2.6.0 - http://curator.apache.org/curator-client) - (The Apache Software License, Version 2.0) Curator Client (org.apache.curator:curator-client:2.7.1 - http://curator.apache.org/curator-client) (The Apache Software License, Version 2.0) Curator Client (org.apache.curator:curator-client:4.3.0 - http://curator.apache.org/curator-client) - (The Apache Software License, Version 2.0) Curator Framework (org.apache.curator:curator-framework:2.12.0 - http://curator.apache.org/curator-framework) - (The Apache Software License, Version 2.0) Curator Framework (org.apache.curator:curator-framework:2.6.0 - http://curator.apache.org/curator-framework) - (The Apache Software License, Version 2.0) Curator Framework (org.apache.curator:curator-framework:2.7.1 - http://curator.apache.org/curator-framework) (The Apache Software License, Version 2.0) Curator Framework (org.apache.curator:curator-framework:4.3.0 - http://curator.apache.org/curator-framework) - (The Apache Software License, Version 2.0) Curator Recipes (org.apache.curator:curator-recipes:2.12.0 - http://curator.apache.org/curator-recipes) - (The Apache Software License, Version 2.0) Curator Recipes (org.apache.curator:curator-recipes:2.6.0 - http://curator.apache.org/curator-recipes) - (The Apache Software License, Version 2.0) Curator Recipes (org.apache.curator:curator-recipes:2.7.1 - http://curator.apache.org/curator-recipes) (The Apache Software License, Version 2.0) Curator Recipes (org.apache.curator:curator-recipes:4.3.0 - http://curator.apache.org/curator-recipes) (The Apache Software License, Version 2.0) Curator Service Discovery (org.apache.curator:curator-x-discovery:4.3.0 - http://curator.apache.org/curator-x-discovery) (The Apache Software License, Version 2.0) Data Mapper for Jackson (org.codehaus.jackson:jackson-mapper-asl:1.9.13 - http://jackson.codehaus.org) - (The Apache Software License, Version 2.0) DataNucleus Core (org.datanucleus:datanucleus-core:3.2.10 - http://www.datanucleus.org) (The Apache Software License, Version 2.0) DataNucleus JDO API plugin (org.datanucleus:datanucleus-api-jdo:3.2.6 - http://www.datanucleus.org) (The Apache Software License, Version 2.0) DataNucleus RDBMS (org.datanucleus:datanucleus-rdbms:3.2.9 - http://www.datanucleus.org) (The Apache Software License, Version 2.0) Digester (commons-digester:commons-digester:1.8 - http://jakarta.apache.org/commons/digester/) @@ -741,7 +753,6 @@ The text of each license is the standard Apache 2.0 license. (The Apache Software License, Version 2.0) Google HTTP Client Library for Java (com.google.http-client:google-http-client:1.26.0 - https://github.com/googleapis/google-http-java-client/google-http-client) (The Apache Software License, Version 2.0) Google OAuth Client Library for Java (com.google.oauth-client:google-oauth-client:1.26.0 - https://github.com/googleapis/google-oauth-java-client/google-oauth-client) (The Apache Software License, Version 2.0) Gson (com.google.code.gson:gson:2.2.4 - http://code.google.com/p/google-gson/) - (The Apache Software License, Version 2.0) Gson (com.google.code.gson:gson:2.9.0 - http://code.google.com/p/google-gson/) (The Apache Software License, Version 2.0) Guava: Google Core Libraries for Java (com.google.guava:guava:19.0 - https://github.com/google/guava/guava) (The Apache Software License, Version 2.0) HPPC Collections (com.carrotsearch:hppc:0.7.1 - http://labs.carrotsearch.com/hppc.html/hppc) (The Apache Software License, Version 2.0) HPPC Collections (com.carrotsearch:hppc:0.7.2 - http://labs.carrotsearch.com/hppc.html/hppc) @@ -756,7 +767,6 @@ The text of each license is the standard Apache 2.0 license. (The Apache Software License, Version 2.0) JPam (net.sf.jpam:jpam:1.1 - http://jpam.sf.net) (The Apache Software License, Version 2.0) JSON Small and Fast Parser (net.minidev:json-smart:2.3 - http://www.minidev.net/) (The Apache Software License, Version 2.0) Jackson (org.codehaus.jackson:jackson-core-asl:1.9.13 - http://jackson.codehaus.org) - (The Apache Software License, Version 2.0) Jackson (org.codehaus.jackson:jackson-core-asl:1.9.2 - http://jackson.codehaus.org) (The Apache Software License, Version 2.0) Jackson 2 extensions to the Google HTTP Client Library for Java. (com.google.http-client:google-http-client-jackson2:1.26.0 - https://github.com/googleapis/google-http-java-client/google-http-client-jackson2) (The Apache Software License, Version 2.0) Jackson dataformat: CBOR (com.fasterxml.jackson.dataformat:jackson-dataformat-cbor:2.12.3 - http://github.com/FasterXML/jackson-dataformats-binary) (The Apache Software License, Version 2.0) Jackson dataformat: CBOR (com.fasterxml.jackson.dataformat:jackson-dataformat-cbor:2.8.10 - http://github.com/FasterXML/jackson-dataformats-binary) @@ -784,7 +794,6 @@ The text of each license is the standard Apache 2.0 license. (The Apache Software License, Version 2.0) LZ4 and xxHash (org.lz4:lz4-java:1.4.0 - https://github.com/lz4/lz4-java) (The Apache Software License, Version 2.0) LZ4 and xxHash (org.lz4:lz4-java:1.6.0 - https://github.com/lz4/lz4-java) (The Apache Software License, Version 2.0) LZ4 and xxHash (org.lz4:lz4-java:1.7.1 - https://github.com/lz4/lz4-java) - (The Apache Software License, Version 2.0) LZ4 and xxHash (org.lz4:lz4-java:1.8.0 - https://github.com/lz4/lz4-java) (The Apache Software License, Version 2.0) Log4j (log4j:log4j:1.2.14 - http://logging.apache.org/log4j/docs/) (The Apache Software License, Version 2.0) Maven Aether Provider (org.apache.maven:maven-aether-provider:3.1.1 - http://maven.apache.org/ref/3.1.1/maven-aether-provider) (The Apache Software License, Version 2.0) Maven Model (org.apache.maven:maven-model:3.1.1 - http://maven.apache.org/ref/3.1.1/maven-model) @@ -801,7 +810,6 @@ The text of each license is the standard Apache 2.0 license. (The Apache Software License, Version 2.0) Retrofit (com.squareup.retrofit2:retrofit:2.9.0 - https://github.com/square/retrofit) (The Apache Software License, Version 2.0) SparseBitSet (com.zaxxer:SparseBitSet:1.2 - https://github.com/brettwooldridge/SparseBitSet) (The Apache Software License, Version 2.0) Spymemcached (net.spy:spymemcached:2.12.3 - http://www.couchbase.org/code/couchbase/java) - (The Apache Software License, Version 2.0) StAX API (stax:stax-api:1.0.1 - http://stax.codehaus.org/) (The Apache Software License, Version 2.0) T-Digest (com.tdunning:t-digest:3.2 - https://github.com/tdunning/t-digest) (The Apache Software License, Version 2.0) Uzaygezen-core (com.google.uzaygezen:uzaygezen-core:0.2 - http://code.google.com/p/uzaygezen/uzaygezen-core) (The Apache Software License, Version 2.0) Vavr (io.vavr:vavr:0.10.2 - http://vavr.io) @@ -813,11 +821,6 @@ The text of each license is the standard Apache 2.0 license. (The Apache Software License, Version 2.0) aggs-matrix-stats (org.elasticsearch.plugin:aggs-matrix-stats-client:6.3.1 - https://github.com/elastic/elasticsearch) (The Apache Software License, Version 2.0) aggs-matrix-stats (org.elasticsearch.plugin:aggs-matrix-stats-client:7.5.1 - https://github.com/elastic/elasticsearch) (The Apache Software License, Version 2.0) cli (org.elasticsearch:elasticsearch-cli:6.3.1 - https://github.com/elastic/elasticsearch) - (The Apache Software License, Version 2.0) clickhouse-client (com.clickhouse:clickhouse-client:0.3.2-patch9 - https://github.com/ClickHouse/clickhouse-jdbc) - (The Apache Software License, Version 2.0) clickhouse-grpc-client (com.clickhouse:clickhouse-grpc-client:0.3.2-patch9 - https://github.com/ClickHouse/clickhouse-jdbc) - (The Apache Software License, Version 2.0) clickhouse-http-client (com.clickhouse:clickhouse-http-client:0.3.2-patch9 - https://github.com/ClickHouse/clickhouse-jdbc) - (The Apache Software License, Version 2.0) clickhouse-jdbc (com.clickhouse:clickhouse-jdbc:0.3.2-patch9 - https://github.com/ClickHouse/clickhouse-jdbc) - (The Apache Software License, Version 2.0) clickhouse-jdbc (ru.yandex.clickhouse:clickhouse-jdbc:0.2 - https://github.com/yandex/clickhouse-jdbc) (The Apache Software License, Version 2.0) elasticsearch-cli (org.elasticsearch:elasticsearch-cli:7.5.1 - https://github.com/elastic/elasticsearch) (The Apache Software License, Version 2.0) elasticsearch-core (org.elasticsearch:elasticsearch-core:6.3.1 - https://github.com/elastic/elasticsearch) (The Apache Software License, Version 2.0) elasticsearch-core (org.elasticsearch:elasticsearch-core:7.5.1 - https://github.com/elastic/elasticsearch) @@ -831,15 +834,6 @@ The text of each license is the standard Apache 2.0 license. (The Apache Software License, Version 2.0) flink-shaded-jackson-2 (org.apache.flink:flink-shaded-jackson:2.12.1-13.0 - http://flink.apache.org/flink-shaded-jackson-parent/flink-shaded-jackson) (The Apache Software License, Version 2.0) flink-shaded-netty-4 (org.apache.flink:flink-shaded-netty:4.1.49.Final-13.0 - http://flink.apache.org/flink-shaded-netty) (The Apache Software License, Version 2.0) flink-shaded-zookeeper-3.4 (org.apache.flink:flink-shaded-zookeeper-3:3.4.14-13.0 - http://flink.apache.org/flink-shaded-zookeeper-parent/flink-shaded-zookeeper-3) - (The Apache Software License, Version 2.0) hadoop-mapreduce-client-app (org.apache.hadoop:hadoop-mapreduce-client-app:2.6.5 - no url defined) - (The Apache Software License, Version 2.0) hadoop-mapreduce-client-common (org.apache.hadoop:hadoop-mapreduce-client-common:2.6.5 - no url defined) - (The Apache Software License, Version 2.0) hadoop-mapreduce-client-core (org.apache.hadoop:hadoop-mapreduce-client-core:2.6.5 - no url defined) - (The Apache Software License, Version 2.0) hadoop-mapreduce-client-jobclient (org.apache.hadoop:hadoop-mapreduce-client-jobclient:2.6.5 - no url defined) - (The Apache Software License, Version 2.0) hadoop-mapreduce-client-shuffle (org.apache.hadoop:hadoop-mapreduce-client-shuffle:2.6.5 - no url defined) - (The Apache Software License, Version 2.0) hadoop-yarn-api (org.apache.hadoop:hadoop-yarn-api:2.6.5 - no url defined) - (The Apache Software License, Version 2.0) hadoop-yarn-client (org.apache.hadoop:hadoop-yarn-client:2.6.5 - no url defined) - (The Apache Software License, Version 2.0) hadoop-yarn-common (org.apache.hadoop:hadoop-yarn-common:2.6.5 - no url defined) - (The Apache Software License, Version 2.0) hadoop-yarn-server-common (org.apache.hadoop:hadoop-yarn-server-common:2.6.5 - no url defined) (The Apache Software License, Version 2.0) htrace-core (org.apache.htrace:htrace-core:3.1.0-incubating - http://incubator.apache.org/projects/htrace.html) (The Apache Software License, Version 2.0) htrace-core (org.htrace:htrace-core:3.0.4 - https://github.com/cloudera/htrace) (The Apache Software License, Version 2.0) htrace-core4 (org.apache.htrace:htrace-core4:4.1.0-incubating - http://incubator.apache.org/projects/htrace.html) @@ -871,15 +865,79 @@ The text of each license is the standard Apache 2.0 license. (The Apache Software License, Version 2.0) server (org.elasticsearch:elasticsearch:6.3.1 - https://github.com/elastic/elasticsearch) (The Apache Software License, Version 2.0) server (org.elasticsearch:elasticsearch:7.5.1 - https://github.com/elastic/elasticsearch) (The Apache Software License, Version 2.0) snappy-java (org.xerial.snappy:snappy-java:1.1.2.6 - https://github.com/xerial/snappy-java) - (The Apache Software License, Version 2.0) snappy-java (org.xerial.snappy:snappy-java:1.1.4 - https://github.com/xerial/snappy-java) - (The Apache Software License, Version 2.0) snappy-java (org.xerial.snappy:snappy-java:1.1.7.1 - https://github.com/xerial/snappy-java) (The Apache Software License, Version 2.0) snappy-java (org.xerial.snappy:snappy-java:1.1.7.3 - https://github.com/xerial/snappy-java) + (The Apache Software License, Version 2.0) snappy-java (org.xerial.snappy:snappy-java:1.1.8.3 - https://github.com/xerial/snappy-java) (The Apache Software License, Version 2.0) transport (org.elasticsearch.client:transport:6.3.1 - https://github.com/elastic/elasticsearch) (The Apache Software License, Version 2.0) transport (org.elasticsearch.client:transport:7.5.1 - https://github.com/elastic/elasticsearch) (The Apache Software License, Version 2.0) transport-netty4 (org.elasticsearch.plugin:transport-netty4-client:6.3.1 - https://github.com/elastic/elasticsearch) (The Apache Software License, Version 2.0) transport-netty4 (org.elasticsearch.plugin:transport-netty4-client:7.5.1 - https://github.com/elastic/elasticsearch) (The Apache Software License, Version 2.0) x-content (org.elasticsearch:elasticsearch-x-content:6.3.1 - https://github.com/elastic/elasticsearch) (The Apache Software License, Version 2.0) zookeeper (org.apache.zookeeper:zookeeper:3.4.10 - no url defined) + (Apache License, Version 2.0) Hadoop Metrics2 Reporter for Dropwizard Metrics (com.github.joshelser:dropwizard-metrics-hadoop-metrics2-reporter:0.1.2 - https://github.com/joshelser/dropwizard-hadoop-metrics2) + (The Apache Software License, Version 2.0) Apache Ivy (org.apache.ivy:ivy:2.4.0 - http://ant.apache.org/ivy/) + (The Apache Software License, Version 2.0) Apache Thrift (org.apache.thrift:libfb303:0.9.3 - http://thrift.apache.org) + (The Apache Software License, Version 2.0) Data Mapper for Jackson (org.codehaus.jackson:jackson-mapper-asl:1.8.13 - http://jackson.codehaus.org) + (Apache Software License - Version 2.0) Jettison (org.codehaus.jettison:jettison:1.1 - nhttps://mvnrepository.com/artifact/org.codehaus.jettison/jettison) + (Apache Software License - Version 2.0) (Eclipse Public License - Version 1.0) Jetty Orbit :: Servlet API (org.eclipse.jetty.orbit:javax.servlet:3.0.0.v201112011016 - http://www.eclipse.org/jetty/jetty-orbit/javax.servlet) + (Apache Software License - Version 2.0) (Eclipse Public License - Version 1.0) Jetty Server (org.mortbay.jetty:jetty:6.1.26 - http://www.eclipse.org/jetty/jetty-parent/project/modules/jetty) + (The Apache Software License, Version 2.0) JAX-RS provider for JSON content type (org.codehaus.jackson:jackson-jaxrs:1.9.13 - http://jackson.codehaus.org) + (The Apache Software License, Version 2.0) Xml Compatibility extensions for Jackson (org.codehaus.jackson:jackson-xc:1.9.13 - http://jackson.codehaus.org) + + (The Apache Software License, Version 2.0) ClassMate (com.fasterxml:classmate:1.3.1 - http://github.com/cowtowncoder/java-classmate) + (The Apache Software License, Version 2.0) Guava: Google Core Libraries for Java (com.google.guava:guava:11.0.2 - http://code.google.com/p/guava-libraries/guava) + (The Apache Software License, Version 2.0) Guava: Google Core Libraries for Java (com.google.guava:guava:13.0.1 - http://code.google.com/p/guava-libraries/guava) + (The Apache Software License, Version 2.0) Guava: Google Core Libraries for Java (com.google.guava:guava:16.0.1 - http://code.google.com/p/guava-libraries/guava) + (The Apache Software License, Version 2.0) Guava: Google Core Libraries for Java (com.google.guava:guava:18.0 - http://code.google.com/p/guava-libraries/guava) + (The Apache Software License, Version 2.0) HikariCP (com.zaxxer:HikariCP:4.0.3 - https://github.com/brettwooldridge/HikariCP) + (The Apache Software License, Version 2.0) Jackson datatype: JSR310 (com.fasterxml.jackson.datatype:jackson-datatype-jsr310:2.13.3 - https://github.com/FasterXML/jackson-modules-java8/jackson-datatype-jsr310) + (The Apache Software License, Version 2.0) Jackson datatype: jdk8 (com.fasterxml.jackson.datatype:jackson-datatype-jdk8:2.13.3 - https://github.com/FasterXML/jackson-modules-java8/jackson-datatype-jdk8) + (The Apache Software License, Version 2.0) Jackson-annotations (com.fasterxml.jackson.core:jackson-annotations:2.10.5 - http://github.com/FasterXML/jackson) + (The Apache Software License, Version 2.0) Jackson-annotations (com.fasterxml.jackson.core:jackson-annotations:2.11.0 - http://github.com/FasterXML/jackson) + (The Apache Software License, Version 2.0) Jackson-annotations (com.fasterxml.jackson.core:jackson-annotations:2.11.4 - http://github.com/FasterXML/jackson) + (The Apache Software License, Version 2.0) Jackson-annotations (com.fasterxml.jackson.core:jackson-annotations:2.4.5 - http://github.com/FasterXML/jackson) + (The Apache Software License, Version 2.0) Jackson-annotations (com.fasterxml.jackson.core:jackson-annotations:2.6.7 - http://github.com/FasterXML/jackson) + (The Apache Software License, Version 2.0) Jackson-annotations (com.fasterxml.jackson.core:jackson-annotations:2.7.0 - http://github.com/FasterXML/jackson) + (The Apache Software License, Version 2.0) Jackson-annotations (com.fasterxml.jackson.core:jackson-annotations:2.7.8 - http://github.com/FasterXML/jackson) + (The Apache Software License, Version 2.0) Jackson-annotations (com.fasterxml.jackson.core:jackson-annotations:2.9.2 - http://github.com/FasterXML/jackson) + (The Apache Software License, Version 2.0) Jackson-core (com.fasterxml.jackson.core:jackson-core:2.10.5 - https://github.com/FasterXML/jackson-core) + (The Apache Software License, Version 2.0) Jackson-core (com.fasterxml.jackson.core:jackson-core:2.11.0 - https://github.com/FasterXML/jackson-core) + (The Apache Software License, Version 2.0) Jackson-core (com.fasterxml.jackson.core:jackson-core:2.11.4 - https://github.com/FasterXML/jackson-core) + (The Apache Software License, Version 2.0) Jackson-core (com.fasterxml.jackson.core:jackson-core:2.13.3 - https://github.com/FasterXML/jackson-core) + (The Apache Software License, Version 2.0) Jackson-core (com.fasterxml.jackson.core:jackson-core:2.6.7 - https://github.com/FasterXML/jackson-core) + (The Apache Software License, Version 2.0) Jackson-core (com.fasterxml.jackson.core:jackson-core:2.7.3 - https://github.com/FasterXML/jackson-core) + (The Apache Software License, Version 2.0) Jackson-core (com.fasterxml.jackson.core:jackson-core:2.7.9 - https://github.com/FasterXML/jackson-core) + (The Apache Software License, Version 2.0) Jackson-core (com.fasterxml.jackson.core:jackson-core:2.8.10 - https://github.com/FasterXML/jackson-core) + (The Apache Software License, Version 2.0) Jackson-core (com.fasterxml.jackson.core:jackson-core:2.8.11 - https://github.com/FasterXML/jackson-core) + (The Apache Software License, Version 2.0) Jackson-core (com.fasterxml.jackson.core:jackson-core:2.9.2 - https://github.com/FasterXML/jackson-core) + (The Apache Software License, Version 2.0) Jackson-module-parameter-names (com.fasterxml.jackson.module:jackson-module-parameter-names:2.13.3 - https://github.com/FasterXML/jackson-modules-java8/jackson-module-parameter-names) + (The Apache Software License, Version 2.0) MapStruct Core (org.mapstruct:mapstruct:1.0.0.Final - http://mapstruct.org/mapstruct/) + (The Apache Software License, Version 2.0) Snappy for Java (org.xerial.snappy:snappy-java:1.0.5 - http://github.com/xerial/snappy-java/) + (The Apache Software License, Version 2.0) jackson-databind (com.fasterxml.jackson.core:jackson-databind:2.12.6 - http://github.com/FasterXML/jackson) + (The Apache Software License, Version 2.0) jackson-module-scala (com.fasterxml.jackson.module:jackson-module-scala_2.11:2.12.6 - http://wiki.fasterxml.com/JacksonModuleScala) + (The Apache Software License, Version 2.0) jackson-databind (com.fasterxml.jackson.core:jackson-databind:2.10.5.1 - http://github.com/FasterXML/jackson) + (The Apache Software License, Version 2.0) jackson-databind (com.fasterxml.jackson.core:jackson-databind:2.11.0 - http://github.com/FasterXML/jackson) + (The Apache Software License, Version 2.0) jackson-databind (com.fasterxml.jackson.core:jackson-databind:2.11.4 - http://github.com/FasterXML/jackson) + (The Apache Software License, Version 2.0) jackson-databind (com.fasterxml.jackson.core:jackson-databind:2.12.6 - http://github.com/FasterXML/jackson) + (The Apache Software License, Version 2.0) jackson-databind (com.fasterxml.jackson.core:jackson-databind:2.13.3 - http://github.com/FasterXML/jackson) + (The Apache Software License, Version 2.0) jackson-databind (com.fasterxml.jackson.core:jackson-databind:2.6.7.1 - http://github.com/FasterXML/jackson) + (The Apache Software License, Version 2.0) jackson-databind (com.fasterxml.jackson.core:jackson-databind:2.7.3 - http://github.com/FasterXML/jackson) + (The Apache Software License, Version 2.0) jackson-databind (com.fasterxml.jackson.core:jackson-databind:2.9.2 - http://github.com/FasterXML/jackson) + (The Apache Software License, Version 2.0) jackson-module-scala (com.fasterxml.jackson.module:jackson-module-scala_2.11:2.6.7.1 - http://wiki.fasterxml.com/JacksonModuleScala) + (The Apache Software License, Version 2.0) mybatis (org.mybatis:mybatis:3.5.9 - http://www.mybatis.org/mybatis-3) + (The Apache Software License, Version 2.0) mybatis-spring (org.mybatis:mybatis-spring:2.0.7 - http://www.mybatis.org/spring/) + (The Apache Software License, Version 2.0) mybatis-spring-boot-autoconfigure (org.mybatis.spring.boot:mybatis-spring-boot-autoconfigure:2.2.2 - http://www.mybatis.org/spring-boot-starter/mybatis-spring-boot-autoconfigure/) + (The Apache Software License, Version 2.0) mybatis-spring-boot-starter (org.mybatis.spring.boot:mybatis-spring-boot-starter:2.2.2 - http://www.mybatis.org/spring-boot-starter/mybatis-spring-boot-starter/) + (The Apache Software License, Version 2.0) snappy-java (org.xerial.snappy:snappy-java:1.1.2.6 - https://github.com/xerial/snappy-java) + (The Apache Software License, Version 2.0) snappy-java (org.xerial.snappy:snappy-java:1.1.4 - https://github.com/xerial/snappy-java) + (The Apache Software License, Version 2.0) snappy-java (org.xerial.snappy:snappy-java:1.1.7.1 - https://github.com/xerial/snappy-java) + (The Apache Software License, Version 2.0) snappy-java (org.xerial.snappy:snappy-java:1.1.7.3 - https://github.com/xerial/snappy-java) + (The Apache Software License, Version 2.0) springfox-core (io.springfox:springfox-core:2.6.1 - https://github.com/springfox/springfox) + (The Apache Software License, Version 2.0) springfox-schema (io.springfox:springfox-schema:2.6.1 - https://github.com/springfox/springfox) + (The Apache Software License, Version 2.0) springfox-spi (io.springfox:springfox-spi:2.6.1 - https://github.com/springfox/springfox) + (The Apache Software License, Version 2.0) springfox-spring-web (io.springfox:springfox-spring-web:2.6.1 - https://github.com/springfox/springfox) + (The Apache Software License, Version 2.0) springfox-swagger-common (io.springfox:springfox-swagger-common:2.6.1 - https://github.com/springfox/springfox) + (The Apache Software License, Version 2.0) springfox-swagger-ui (io.springfox:springfox-swagger-ui:2.6.1 - https://github.com/springfox/springfox) + (The Apache Software License, Version 2.0) springfox-swagger2 (io.springfox:springfox-swagger2:2.6.1 - https://github.com/springfox/springfox) ======================================================================== @@ -893,8 +951,6 @@ The text of each license is also included at licenses/LICENSE-[project].txt. (MIT License) JCL 1.1.1 implemented over SLF4J (org.slf4j:jcl-over-slf4j:1.7.16 - http://www.slf4j.org) (MIT License) JCodings (org.jruby.jcodings:jcodings:1.0.18 - http://nexus.sonatype.org/oss-repository-hosting.html/jcodings) (MIT License) JCodings (org.jruby.jcodings:jcodings:1.0.43 - http://nexus.sonatype.org/oss-repository-hosting.html/jcodings) - (MIT License) JUL to SLF4J bridge (org.slf4j:jul-to-slf4j:1.7.16 - http://www.slf4j.org) - (MIT License) JUL to SLF4J bridge (org.slf4j:jul-to-slf4j:1.7.25 - http://www.slf4j.org) (MIT License) Joni (org.jruby.joni:joni:2.1.11 - http://nexus.sonatype.org/oss-repository-hosting.html/joni) (MIT License) Joni (org.jruby.joni:joni:2.1.2 - http://nexus.sonatype.org/oss-repository-hosting.html/joni) (MIT License) Joni (org.jruby.joni:joni:2.1.27 - http://nexus.sonatype.org/oss-repository-hosting.html/joni) @@ -902,15 +958,22 @@ The text of each license is also included at licenses/LICENSE-[project].txt. (MIT License) SLF4J LOG4J-12 Binding (org.slf4j:slf4j-log4j12:1.7.25 - http://www.slf4j.org) (MIT License) pyrolite (net.razorvine:pyrolite:4.13 - https://github.com/irmen/Pyrolite) (MIT License) scopt (com.github.scopt:scopt_2.11:3.5.0 - https://github.com/scopt/scopt) + (MIT License) JUL to SLF4J bridge (org.slf4j:jul-to-slf4j:1.7.36 - http://www.slf4j.org) + (MIT License) SLF4J API Module (org.slf4j:slf4j-api:1.6.4 - http://www.slf4j.org) + (MIT License) SLF4J API Module (org.slf4j:slf4j-api:1.7.15 - http://www.slf4j.org) + (MIT License) SLF4J API Module (org.slf4j:slf4j-api:1.7.16 - http://www.slf4j.org) + (MIT License) SLF4J API Module (org.slf4j:slf4j-api:1.7.21 - http://www.slf4j.org) + (MIT License) SLF4J API Module (org.slf4j:slf4j-api:1.7.28 - http://www.slf4j.org) + (MIT License) SLF4J API Module (org.slf4j:slf4j-api:1.7.30 - http://www.slf4j.org) + (MIT License) SLF4J LOG4J-12 Binding (org.slf4j:slf4j-log4j12:1.7.10 - http://www.slf4j.org) + (MIT License) SLF4J LOG4J-12 Binding (org.slf4j:slf4j-log4j12:1.7.15 - http://www.slf4j.org) + (MIT License) SLF4J LOG4J-12 Binding (org.slf4j:slf4j-log4j12:1.7.16 - http://www.slf4j.org) (MIT) Jedis (redis.clients:jedis:3.2.0 - https://github.com/xetorthio/jedis) (MIT-License) spoiwo (com.norbitltd:spoiwo_2.11:1.8.0 - https://github.com/norbert-radyk/spoiwo/) (The MIT License (MIT)) influxdb java bindings (org.influxdb:influxdb-java:2.22 - http://www.influxdb.org) (The MIT License) Checker Qual (org.checkerframework:checker-qual:3.10.0 - https://checkerframework.org) (The MIT License) Checker Qual (org.checkerframework:checker-qual:3.4.0 - https://checkerframework.org) (The MIT License) JOpt Simple (net.sf.jopt-simple:jopt-simple:5.0.2 - http://pholser.github.io/jopt-simple) - (Bouncy Castle License) The Bouncy Castle Crypto Package For Java (org.bouncycastle:bcpkix-jdk15on:1.68 - https://github.com/bcgit/bc-java) - (Bouncy Castle License) The Bouncy Castle Crypto Package For Java (org.bouncycastle:bcprov-ext-jdk15on:1.68 - https://github.com/bcgit/bc-java) - (Bouncy Castle License) The Bouncy Castle Crypto Package For Java (org.bouncycastle:bcprov-jdk15on:1.68 - https://github.com/bcgit/bc-java) ======================================================================== @@ -925,7 +988,6 @@ The text of each license is also included at licenses/LICENSE-[project].txt. (BSD 2-Clause License) zstd-jni (com.github.luben:zstd-jni:1.3.2-2 - https://github.com/luben/zstd-jni) (BSD 2-Clause License) zstd-jni (com.github.luben:zstd-jni:1.3.3-1 - https://github.com/luben/zstd-jni) (BSD 2-Clause License) zstd-jni (com.github.luben:zstd-jni:1.4.3-1 - https://github.com/luben/zstd-jni) - (BSD 2-Clause License) zstd-jni (com.github.luben:zstd-jni:1.5.2-1 - https://github.com/luben/zstd-jni) (BSD 3 Clause) Spark-Redis (com.redislabs:spark-redis_2.11:2.6.0 - http://github.com/RedisLabs/spark-redis) (BSD 3-Clause) I18n Utils (com.salesforce.i18n:i18n-util:1.0.4 - https://github.com/salesforce/i18n-util) (BSD 3-Clause) Scala Compiler (org.scala-lang:scala-compiler:2.11.12 - http://www.scala-lang.org/) @@ -939,13 +1001,11 @@ The text of each license is also included at licenses/LICENSE-[project].txt. (BSD 3-clause) scala-parser-combinators (org.scala-lang.modules:scala-parser-combinators_2.11:1.1.1 - http://www.scala-lang.org/) (BSD 3-clause) scala-xml (org.scala-lang.modules:scala-xml_2.11:1.0.5 - http://www.scala-lang.org/) (BSD 3-clause) scala-xml (org.scala-lang.modules:scala-xml_2.11:1.0.6 - http://www.scala-lang.org/) - (BSD License) ASM (asm:asm:3.1 - https://asm.ow2.io/license.html) (BSD License) AntLR Parser Generator (antlr:antlr:2.7.7 - http://www.antlr.org/) (BSD License) Javolution (javolution:javolution:5.5.1 - http://javolution.org) (BSD License) curvesapi (com.github.virtuald:curvesapi:1.06 - https://github.com/virtuald/curvesapi) (BSD licence) ANTLR 3 Runtime (org.antlr:antlr-runtime:3.4 - http://www.antlr.org) (BSD licence) ANTLR 3 Runtime (org.antlr:antlr-runtime:3.5.2 - http://www.antlr.org) - (BSD licence) ANTLR ST4 4.0.4 (org.antlr:ST4:4.0.4 - http://www.stringtemplate.org) (BSD licence) ANTLR StringTemplate (org.antlr:stringtemplate:3.2.1 - http://www.stringtemplate.org) (BSD) ASM All (org.ow2.asm:asm-all:5.0.2 - http://asm.objectweb.org/asm-all/) (BSD) ASM Core (org.ow2.asm:asm:5.0.4 - http://asm.objectweb.org/asm/) @@ -971,6 +1031,7 @@ The text of each license is also included at licenses/LICENSE-[project].txt. (New BSD License) janino (org.codehaus.janino:janino:3.0.8 - http://janino-compiler.github.io/janino/) (New BSD License) janino (org.codehaus.janino:janino:3.0.9 - http://janino-compiler.github.io/janino/) (New BSD license) Protocol Buffer Java API (com.google.protobuf:protobuf-java:2.5.0 - http://code.google.com/p/protobuf) + (New BSD License) Hamcrest Core (org.hamcrest:hamcrest-core:1.3 - https://github.com/hamcrest/JavaHamcrest/hamcrest-core) (Revised BSD) JSch (com.jcraft:jsch:0.1.54 - http://www.jcraft.com/jsch/) (The BSD 3-Clause License) leveldbjni-all (org.fusesource.leveldbjni:leveldbjni-all:1.8 - http://leveldbjni.fusesource.org/leveldbjni-all) (The BSD License) ANTLR 4 Runtime (org.antlr:antlr4-runtime:4.5.1 - http://www.antlr.org/antlr4-runtime) @@ -980,6 +1041,9 @@ The text of each license is also included at licenses/LICENSE-[project].txt. (The BSD License) xmlenc Library (xmlenc:xmlenc:0.52 - http://xmlenc.sourceforge.net) (The New BSD License) Jodd Core (org.jodd:jodd-core:3.5.2 - http://jodd.org) (The New BSD License) Py4J (net.sf.py4j:py4j:0.10.7 - http://nexus.sonatype.org/oss-repository-hosting.html/py4j) + (The BSD License) xmlenc Library (xmlenc:xmlenc:0.52 - http://xmlenc.sourceforge.net) + (The BSD License) ASM Core (asm:asm:3.1 - http://asm.objectweb.org/asm/) + (New BSD License) Janino (org.codehaus.janino:janino:3.1.6 - http://docs.codehaus.org/display/JANINO/Home/janino) ======================================================================== @@ -1007,7 +1071,6 @@ The text of each license is also included at licenses/LICENSE-[project].txt. (CDDL License) HK2 API module (org.glassfish.hk2:hk2-api:2.5.0-b32 - https://hk2.java.net/hk2-api) (CDDL License) HK2 Implementation Utilities (org.glassfish.hk2:hk2-utils:2.4.0-b34 - https://hk2.java.net/hk2-utils) (CDDL License) HK2 Implementation Utilities (org.glassfish.hk2:hk2-utils:2.5.0-b32 - https://hk2.java.net/hk2-utils) - (CDDL License) JSP implementation (org.glassfish.web:javax.servlet.jsp:2.3.2 - http://jsp.java.net) (CDDL License) Java Servlet API (javax.servlet.jsp:jsp-api:2.1 - https://javaee.github.io/javaee-jsp-api) (CDDL License) Java Servlet API (javax.servlet:javax.servlet-api:3.1.0 - http://servlet-spec.java.net) (CDDL License) Java Servlet API (javax.servlet:servlet-api:2.5 - http://servlet-spec.java.net) @@ -1038,8 +1101,86 @@ The text of each license is also included at licenses/LICENSE-[project].txt. (CDDL License) JavaBeans Activation Framework (com.sun.activation:javax.activation:1.2.0 - http://java.net/all/javax.activation/) (CDDL License) JavaBeans Activation Framework API jar (javax.activation:javax.activation-api:1.2.0 - http://java.net/all/javax.activation-api/) (CDDL License) JavaMail API (com.sun.mail:javax.mail:1.5.6 - http://javamail.java.net/javax.mail) + (CDDL License) jersey-core (com.sun.jersey:jersey-core:1.9 - https://mvnrepository.com/artifact/com.sun.jersey/jersey-core/1.9) + (CDDL License) jersey-json (com.sun.jersey:jersey-json:1.9 - https://mvnrepository.com/artifact/com.sun.jersey/jersey-json/1.9) + (CDDL License) jersey-server (com.sun.jersey:jersey-server:1.9 - https://mvnrepository.com/artifact/com.sun.jersey/jersey-server/1.9) + (COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.0) (GNU General Public Library) Streaming API for XML (javax.xml.stream:stax-api:1.0-2 - no url defined) + (CDDL License) Expression Language 3.0 (org.glassfish:javax.el:3.0.0 - http://el-spec.java.net) + (CDDL License) Expression Language 3.0 (org.glassfish:javax.el:3.0.1-b12 - http://uel.java.net) + (CDDL License) Expression Language 3.0 API (javax.el:javax.el-api:3.0.0 - http://uel-spec.java.net) + (CDDL License) HK2 API module (org.glassfish.hk2:hk2-api:2.4.0-b34 - https://hk2.java.net/hk2-api) + (CDDL License) HK2 API module (org.glassfish.hk2:hk2-api:2.5.0-b32 - https://hk2.java.net/hk2-api) + (CDDL License) HK2 Implementation Utilities (org.glassfish.hk2:hk2-utils:2.4.0-b34 - https://hk2.java.net/hk2-utils) + (CDDL License) HK2 Implementation Utilities (org.glassfish.hk2:hk2-utils:2.5.0-b32 - https://hk2.java.net/hk2-utils) + (CDDL License) Java Servlet API (javax.servlet.jsp:jsp-api:2.1 - https://javaee.github.io/javaee-jsp-api) + (CDDL License) Java Servlet API (javax.servlet:javax.servlet-api:3.1.0 - http://servlet-spec.java.net) + (CDDL License) Java Servlet API (javax.servlet:servlet-api:2.5 - http://servlet-spec.java.net) + (CDDL License) Java Transaction API (javax.transaction:jta:1.1 - http://java.sun.com/products/jta) + (CDDL License) OSGi resource locator bundle - used by various API providers that rely on META-INF/services mechanism to locate providers. (org.glassfish.hk2:osgi-resource-locator:1.0.1 - http://glassfish.org/osgi-resource-locator/) + (CDDL License) ServiceLocator Default Implementation (org.glassfish.hk2:hk2-locator:2.4.0-b34 - https://hk2.java.net/hk2-locator) + (CDDL License) ServiceLocator Default Implementation (org.glassfish.hk2:hk2-locator:2.5.0-b32 - https://hk2.java.net/hk2-locator) + (CDDL License) aopalliance version 1.0 repackaged as a module (org.glassfish.hk2.external:aopalliance-repackaged:2.4.0-b34 - https://hk2.java.net/external/aopalliance-repackaged) + (CDDL License) aopalliance version 1.0 repackaged as a module (org.glassfish.hk2.external:aopalliance-repackaged:2.5.0-b32 - https://hk2.java.net/external/aopalliance-repackaged) + (CDDL License) javax.annotation API (javax.annotation:javax.annotation-api:1.2 - http://jcp.org/en/jsr/detail?id=250) + (CDDL License) javax.annotation API (javax.annotation:javax.annotation-api:1.3.2 - http://jcp.org/en/jsr/detail?id=250) + (CDDL License) javax.inject:1 as OSGi bundle (org.glassfish.hk2.external:javax.inject:2.4.0-b34 - https://hk2.java.net/external/javax.inject) + (CDDL License) javax.inject:1 as OSGi bundle (org.glassfish.hk2.external:javax.inject:2.5.0-b32 - https://hk2.java.net/external/javax.inject) + (CDDL License) jersey-container-servlet (org.glassfish.jersey.containers:jersey-container-servlet:2.22.2 - https://jersey.java.net/project/jersey-container-servlet/) + (CDDL License) jersey-container-servlet-core (org.glassfish.jersey.containers:jersey-container-servlet-core:2.22.2 - https://jersey.java.net/project/jersey-container-servlet-core/) + (CDDL License) jersey-container-servlet-core (org.glassfish.jersey.containers:jersey-container-servlet-core:2.25.1 - https://jersey.java.net/project/jersey-container-servlet-core/) + (CDDL License) jersey-core-client (org.glassfish.jersey.core:jersey-client:2.22.2 - https://jersey.java.net/jersey-client/) + (CDDL License) jersey-core-client (org.glassfish.jersey.core:jersey-client:2.25.1 - https://jersey.java.net/jersey-client/) + (CDDL License) jersey-core-common (org.glassfish.jersey.core:jersey-common:2.22.2 - https://jersey.java.net/jersey-common/) + (CDDL License) jersey-core-common (org.glassfish.jersey.core:jersey-common:2.25.1 - https://jersey.java.net/jersey-common/) + (CDDL License) jersey-core-server (org.glassfish.jersey.core:jersey-server:2.22.2 - https://jersey.java.net/jersey-server/) + (CDDL License) jersey-core-server (org.glassfish.jersey.core:jersey-server:2.25.1 - https://jersey.java.net/jersey-server/) + (CDDL License) jersey-media-jaxb (org.glassfish.jersey.media:jersey-media-jaxb:2.22.2 - https://jersey.java.net/project/jersey-media-jaxb/) + (CDDL License) jersey-media-jaxb (org.glassfish.jersey.media:jersey-media-jaxb:2.25.1 - https://jersey.java.net/project/jersey-media-jaxb/) + (CDDL License) jersey-repackaged-guava (org.glassfish.jersey.bundles.repackaged:jersey-guava:2.22.2 - https://jersey.java.net/project/project/jersey-guava/) + (CDDL License) jersey-repackaged-guava (org.glassfish.jersey.bundles.repackaged:jersey-guava:2.25.1 - https://jersey.java.net/project/project/jersey-guava/) + (CDDL License) JavaBeans Activation Framework (com.sun.activation:javax.activation:1.2.0 - http://java.net/all/javax.activation/) + (CDDL License) JavaBeans Activation Framework API jar (javax.activation:javax.activation-api:1.2.0 - http://java.net/all/javax.activation-api/) + (CDDL License) JavaMail API (com.sun.mail:javax.mail:1.5.6 - http://javamail.java.net/javax.mail) + (CDDL+GPL License) jersey-container-servlet (org.glassfish.jersey.containers:jersey-container-servlet:2.22.2 - https://jersey.java.net/project/jersey-container-servlet/) + (CDDL+GPL License) jersey-container-servlet-core (org.glassfish.jersey.containers:jersey-container-servlet-core:2.22.2 - https://jersey.java.net/project/jersey-container-servlet-core/) + (CDDL+GPL License) jersey-container-servlet-core (org.glassfish.jersey.containers:jersey-container-servlet-core:2.25.1 - https://jersey.java.net/project/jersey-container-servlet-core/) + (CDDL+GPL License) jersey-core-client (org.glassfish.jersey.core:jersey-client:2.22.2 - https://jersey.java.net/jersey-client/) + (CDDL+GPL License) jersey-core-client (org.glassfish.jersey.core:jersey-client:2.25.1 - https://jersey.java.net/jersey-client/) + (CDDL+GPL License) jersey-core-common (org.glassfish.jersey.core:jersey-common:2.22.2 - https://jersey.java.net/jersey-common/) + (CDDL+GPL License) jersey-core-common (org.glassfish.jersey.core:jersey-common:2.25.1 - https://jersey.java.net/jersey-common/) + (CDDL+GPL License) jersey-core-server (org.glassfish.jersey.core:jersey-server:2.22.2 - https://jersey.java.net/jersey-server/) + (CDDL+GPL License) jersey-core-server (org.glassfish.jersey.core:jersey-server:2.25.1 - https://jersey.java.net/jersey-server/) + (CDDL+GPL License) jersey-media-jaxb (org.glassfish.jersey.media:jersey-media-jaxb:2.22.2 - https://jersey.java.net/project/jersey-media-jaxb/) + (CDDL+GPL License) jersey-media-jaxb (org.glassfish.jersey.media:jersey-media-jaxb:2.25.1 - https://jersey.java.net/project/jersey-media-jaxb/) + (CDDL+GPL License) jersey-repackaged-guava (org.glassfish.jersey.bundles.repackaged:jersey-guava:2.22.2 - https://jersey.java.net/project/project/jersey-guava/) + (CDDL+GPL License) jersey-repackaged-guava (org.glassfish.jersey.bundles.repackaged:jersey-guava:2.25.1 - https://jersey.java.net/project/project/jersey-guava/) + (CDDL + GPLv2 with classpath exception) Expression Language 3.0 (org.glassfish:javax.el:3.0.0 - http://el-spec.java.net) + (CDDL + GPLv2 with classpath exception) Expression Language 3.0 (org.glassfish:javax.el:3.0.1-b12 - http://uel.java.net) + (CDDL + GPLv2 with classpath exception) Expression Language 3.0 API (javax.el:javax.el-api:3.0.0 - http://uel-spec.java.net) + (CDDL + GPLv2 with classpath exception) HK2 API module (org.glassfish.hk2:hk2-api:2.4.0-b34 - https://hk2.java.net/hk2-api) + (CDDL + GPLv2 with classpath exception) HK2 API module (org.glassfish.hk2:hk2-api:2.5.0-b32 - https://hk2.java.net/hk2-api) + (CDDL + GPLv2 with classpath exception) HK2 Implementation Utilities (org.glassfish.hk2:hk2-utils:2.4.0-b34 - https://hk2.java.net/hk2-utils) + (CDDL + GPLv2 with classpath exception) HK2 Implementation Utilities (org.glassfish.hk2:hk2-utils:2.5.0-b32 - https://hk2.java.net/hk2-utils) + (CDDL + GPLv2 with classpath exception) JSP implementation (org.glassfish.web:javax.servlet.jsp:2.3.2 - http://jsp.java.net) + (CDDL + GPLv2 with classpath exception) Java Servlet API (javax.servlet.jsp:jsp-api:2.1 - https://javaee.github.io/javaee-jsp-api) + (CDDL + GPLv2 with classpath exception) Java Servlet API (javax.servlet:javax.servlet-api:3.1.0 - http://servlet-spec.java.net) + (CDDL + GPLv2 with classpath exception) Java Servlet API (javax.servlet:servlet-api:2.5 - http://servlet-spec.java.net) + (CDDL + GPLv2 with classpath exception) Java Transaction API (javax.transaction:jta:1.1 - http://java.sun.com/products/jta) + (CDDL + GPLv2 with classpath exception) OSGi resource locator bundle - used by various API providers that rely on META-INF/services mechanism to locate providers. (org.glassfish.hk2:osgi-resource-locator:1.0.1 - http://glassfish.org/osgi-resource-locator/) + (CDDL + GPLv2 with classpath exception) ServiceLocator Default Implementation (org.glassfish.hk2:hk2-locator:2.4.0-b34 - https://hk2.java.net/hk2-locator) + (CDDL + GPLv2 with classpath exception) ServiceLocator Default Implementation (org.glassfish.hk2:hk2-locator:2.5.0-b32 - https://hk2.java.net/hk2-locator) + (CDDL + GPLv2 with classpath exception) aopalliance version 1.0 repackaged as a module (org.glassfish.hk2.external:aopalliance-repackaged:2.4.0-b34 - https://hk2.java.net/external/aopalliance-repackaged) + (CDDL + GPLv2 with classpath exception) aopalliance version 1.0 repackaged as a module (org.glassfish.hk2.external:aopalliance-repackaged:2.5.0-b32 - https://hk2.java.net/external/aopalliance-repackaged) + (CDDL + GPLv2 with classpath exception) javax.annotation API (javax.annotation:javax.annotation-api:1.2 - http://jcp.org/en/jsr/detail?id=250) + (CDDL + GPLv2 with classpath exception) javax.annotation API (javax.annotation:javax.annotation-api:1.3.2 - http://jcp.org/en/jsr/detail?id=250) + (CDDL + GPLv2 with classpath exception) javax.inject:1 as OSGi bundle (org.glassfish.hk2.external:javax.inject:2.4.0-b34 - https://hk2.java.net/external/javax.inject) + (CDDL + GPLv2 with classpath exception) javax.inject:1 as OSGi bundle (org.glassfish.hk2.external:javax.inject:2.5.0-b32 - https://hk2.java.net/external/javax.inject) + (CDDL/GPLv2+CE) JavaBeans Activation Framework (com.sun.activation:javax.activation:1.2.0 - http://java.net/all/javax.activation/) + (CDDL/GPLv2+CE) JavaBeans Activation Framework API jar (javax.activation:javax.activation-api:1.2.0 - http://java.net/all/javax.activation-api/) + (CDDL/GPLv2+CE) JavaMail API (com.sun.mail:javax.mail:1.5.6 - http://javamail.java.net/javax.mail) + ======================================================================== Eclipse Public License ======================================================================== @@ -1048,9 +1189,6 @@ The following components are provided under the Eclipse Public License. See proj The text of each license is also included at licenses/LICENSE-[project].txt. (EDL 1.0) JavaBeans Activation Framework API jar (jakarta.activation:jakarta.activation-api:1.2.1 - https://github.com/eclipse-ee4j/jaf/jakarta.activation-api) - (EDL 1.0) JavaBeans Activation Framework API jar (jakarta.activation:jakarta.activation-api:1.2.2 - https://github.com/eclipse-ee4j/jaf/jakarta.activation-api) - (EPL 2.0) Jakarta RESTful Web Services API (jakarta.ws.rs:jakarta.ws.rs-api:2.1.6 - https://github.com/eclipse-ee4j/jaxrs-api) - (Eclipse Distribution License - v 1.0) jakarta.xml.bind-api (jakarta.xml.bind:jakarta.xml.bind-api:2.3.2 - https://github.com/eclipse-ee4j/jaxb-api/jakarta.xml.bind-api) (Eclipse Distribution License - v 1.0) jakarta.xml.bind-api (jakarta.xml.bind:jakarta.xml.bind-api:2.3.3 - https://github.com/eclipse-ee4j/jaxb-api/jakarta.xml.bind-api) (Eclipse Public License 1.0) JUnit (junit:junit:4.12 - http://junit.org) (Eclipse Public License, Version 1.0) Aether API (org.eclipse.aether:aether-api:0.9.0.M2 - http://www.eclipse.org/aether/aether-api/) diff --git a/seatunnel-dist/release-docs/NOTICE b/seatunnel-dist/release-docs/NOTICE index fe3c949f619..2489bae1edb 100644 --- a/seatunnel-dist/release-docs/NOTICE +++ b/seatunnel-dist/release-docs/NOTICE @@ -663,16 +663,6 @@ The Apache Software Foundation (http://www.apache.org/). ======================================================================== -Apache Commons Email NOTICE - -======================================================================== -Apache Commons Email -Copyright 2001-2019 The Apache Software Foundation - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). -======================================================================== - Apache Commons Crypto NOTICE ======================================================================== @@ -1224,13 +1214,6 @@ Copyright 2014 The Apache Software Foundation Apache Thrift Copyright 2006-2010 The Apache Software Foundation. - Apache Ant - Copyright 1999-2013 The Apache Software Foundation - - The task is based on code Copyright (c) 2002, Landmark - Graphics Corp that has been kindly donated to the Apache Software - Foundation. - Apache Commons IO Copyright 2002-2012 The Apache Software Foundation @@ -1368,9 +1351,6 @@ Copyright (c) 2012 Twitter, Inc. This product includes/uses Glyphicons (http://glyphicons.com/), Copyright (c) 2010 - 2012 Jan Kovarík -This product includes DataNucleus (http://www.datanucleus.org/) -Copyright 2008-2008 DataNucleus - This product includes Guava (http://code.google.com/p/guava-libraries/) Copyright (C) 2006 Google Inc. @@ -1386,11 +1366,6 @@ Copyright (C) 2015 Red Hat, Inc. This product includes/uses OkHttp (https://github.com/square/okhttp) Copyright (C) 2012 The Android Open Source Project -========================================================================= -== NOTICE file corresponding to section 4(d) of the Apache License, == -== Version 2.0, in this case for the DataNucleus distribution. == -========================================================================= - =================================================================== This product includes software developed by many individuals, including the following: @@ -2922,11 +2897,6 @@ javax.inject Version: 1 * License: Apache License, 2.0 * Copyright (C) 2009 The JSR-330 Expert Group -Javassist Version 3.25.0-GA -* License: Apache License, 2.0 -* Project: http://www.javassist.org/ -* Copyright (C) 1999- Shigeru Chiba. All Rights Reserved. - Jackson JAX-RS Providers Version 2.10.1 * License: Apache License, 2.0 * Project: https://github.com/FasterXML/jackson-jaxrs-providers @@ -4367,24 +4337,110 @@ The Apache Software Foundation (http://www.apache.org/). ========================================================================= -Apache Pulsar NOTICE +Apache HttpClient Mime NOTICE ========================================================================= -Apache Pulsar -Copyright 2017-2021 The Apache Software Foundation +Apache HttpClient Mime +Copyright 1999-2020 The Apache Software Foundation This product includes software developed at The Apache Software Foundation (http://www.apache.org/). ========================================================================= -Apache HttpClient Mime NOTICE +Open Json NOTICE ========================================================================= +Android JSON library +Copyright (C) 2010 The Android Open Source Project -Apache HttpClient Mime -Copyright 1999-2020 The Apache Software Foundation +This product includes software developed by +The Android Open Source Project +========================================================================= + +Metrics NOTICE + +========================================================================= + +Metrics +Copyright 2010-2013 Coda Hale and Yammer, Inc. + +This product includes software developed by Coda Hale and Yammer, Inc. + +This product includes code derived from the JSR-166 project (ThreadLocalRandom, Striped64, +LongAdder), which was released with the following comments: + + Written by Doug Lea with assistance from members of JCP JSR-166 + Expert Group and released to the public domain, as explained at + http://creativecommons.org/publicdomain/zero/1.0/ + + +========================================================================= + +Joda-Time NOTICE + +============================================================================= += NOTICE file corresponding to section 4d of the Apache License Version 2.0 = +============================================================================= +This product includes software developed by +Joda.org (https://www.joda.org/). + +========================================================================= + + Apache Ant NOTICE + +========================================================================= + Apache Ant + Copyright 1999-2013 The Apache Software Foundation + + The task is based on code Copyright (c) 2002, Landmark + Graphics Corp that has been kindly donated to the Apache Software + Foundation. + + +========================================================================= + +Apache Ivy NOTICE + +========================================================================= +Apache Ivy (TM) +Copyright 2007-2014 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + +Portions of Ivy were originally developed at +Jayasoft SARL (http://www.jayasoft.fr/) +and are licensed to the Apache Software Foundation under the +"Software Grant License Agreement" + +SSH and SFTP support is provided by the JCraft JSch package, +which is open source software, available under +the terms of a BSD style license. +The original software and related information is available +at http://www.jcraft.com/jsch/. + +========================================================================= + +Apache Commons CLI NOTICE + +========================================================================= + +Apache Commons CLI +Copyright 2001-2017 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + +========================================================================= + +Apache Commons BeanUtils NOTICE + +========================================================================= + +Apache Commons BeanUtils +Copyright 2000-2019 The Apache Software Foundation This product includes software developed at The Apache Software Foundation (http://www.apache.org/). diff --git a/seatunnel-dist/release-docs/licenses/LICENSE-HikariCP.txt b/seatunnel-dist/release-docs/licenses/LICENSE-HikariCP.txt new file mode 100644 index 00000000000..8405e89a0b1 --- /dev/null +++ b/seatunnel-dist/release-docs/licenses/LICENSE-HikariCP.txt @@ -0,0 +1,191 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and +distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright +owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities +that control, are controlled by, or are under common control with that entity. +For the purposes of this definition, "control" means (i) the power, direct or +indirect, to cause the direction or management of such entity, whether by +contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the +outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising +permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including +but not limited to software source code, documentation source, and configuration +files. + +"Object" form shall mean any form resulting from mechanical transformation or +translation of a Source form, including but not limited to compiled object code, +generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made +available under the License, as indicated by a copyright notice that is included +in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that +is based on (or derived from) the Work and for which the editorial revisions, +annotations, elaborations, or other modifications represent, as a whole, an +original work of authorship. For the purposes of this License, Derivative Works +shall not include works that remain separable from, or merely link (or bind by +name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version +of the Work and any modifications or additions to that Work or Derivative Works +thereof, that is intentionally submitted to Licensor for inclusion in the Work +by the copyright owner or by an individual or Legal Entity authorized to submit +on behalf of the copyright owner. For the purposes of this definition, +"submitted" means any form of electronic, verbal, or written communication sent +to the Licensor or its representatives, including but not limited to +communication on electronic mailing lists, source code control systems, and +issue tracking systems that are managed by, or on behalf of, the Licensor for +the purpose of discussing and improving the Work, but excluding communication +that is conspicuously marked or otherwise designated in writing by the copyright +owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf +of whom a Contribution has been received by Licensor and subsequently +incorporated within the Work. + +2. Grant of Copyright License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable copyright license to reproduce, prepare Derivative Works of, +publicly display, publicly perform, sublicense, and distribute the Work and such +Derivative Works in Source or Object form. + +3. Grant of Patent License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable (except as stated in this section) patent license to make, have +made, use, offer to sell, sell, import, and otherwise transfer the Work, where +such license applies only to those patent claims licensable by such Contributor +that are necessarily infringed by their Contribution(s) alone or by combination +of their Contribution(s) with the Work to which such Contribution(s) was +submitted. If You institute patent litigation against any entity (including a +cross-claim or counterclaim in a lawsuit) alleging that the Work or a +Contribution incorporated within the Work constitutes direct or contributory +patent infringement, then any patent licenses granted to You under this License +for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. + +You may reproduce and distribute copies of the Work or Derivative Works thereof +in any medium, with or without modifications, and in Source or Object form, +provided that You meet the following conditions: + +You must give any other recipients of the Work or Derivative Works a copy of +this License; and +You must cause any modified files to carry prominent notices stating that You +changed the files; and +You must retain, in the Source form of any Derivative Works that You distribute, +all copyright, patent, trademark, and attribution notices from the Source form +of the Work, excluding those notices that do not pertain to any part of the +Derivative Works; and +If the Work includes a "NOTICE" text file as part of its distribution, then any +Derivative Works that You distribute must include a readable copy of the +attribution notices contained within such NOTICE file, excluding those notices +that do not pertain to any part of the Derivative Works, in at least one of the +following places: within a NOTICE text file distributed as part of the +Derivative Works; within the Source form or documentation, if provided along +with the Derivative Works; or, within a display generated by the Derivative +Works, if and wherever such third-party notices normally appear. The contents of +the NOTICE file are for informational purposes only and do not modify the +License. You may add Your own attribution notices within Derivative Works that +You distribute, alongside or as an addendum to the NOTICE text from the Work, +provided that such additional attribution notices cannot be construed as +modifying the License. +You may add Your own copyright statement to Your modifications and may provide +additional or different license terms and conditions for use, reproduction, or +distribution of Your modifications, or for any such Derivative Works as a whole, +provided Your use, reproduction, and distribution of the Work otherwise complies +with the conditions stated in this License. + +5. Submission of Contributions. + +Unless You explicitly state otherwise, any Contribution intentionally submitted +for inclusion in the Work by You to the Licensor shall be under the terms and +conditions of this License, without any additional terms or conditions. +Notwithstanding the above, nothing herein shall supersede or modify the terms of +any separate license agreement you may have executed with Licensor regarding +such Contributions. + +6. Trademarks. + +This License does not grant permission to use the trade names, trademarks, +service marks, or product names of the Licensor, except as required for +reasonable and customary use in describing the origin of the Work and +reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. + +Unless required by applicable law or agreed to in writing, Licensor provides the +Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, +including, without limitation, any warranties or conditions of TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are +solely responsible for determining the appropriateness of using or +redistributing the Work and assume any risks associated with Your exercise of +permissions under this License. + +8. Limitation of Liability. + +In no event and under no legal theory, whether in tort (including negligence), +contract, or otherwise, unless required by applicable law (such as deliberate +and grossly negligent acts) or agreed to in writing, shall any Contributor be +liable to You for damages, including any direct, indirect, special, incidental, +or consequential damages of any character arising as a result of this License or +out of the use or inability to use the Work (including but not limited to +damages for loss of goodwill, work stoppage, computer failure or malfunction, or +any and all other commercial damages or losses), even if such Contributor has +been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. + +While redistributing the Work or Derivative Works thereof, You may choose to +offer, and charge a fee for, acceptance of support, warranty, indemnity, or +other liability obligations and/or rights consistent with this License. However, +in accepting such obligations, You may act only on Your own behalf and on Your +sole responsibility, not on behalf of any other Contributor, and only if You +agree to indemnify, defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason of your +accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work + +To apply the Apache License to your work, attach the following boilerplate +notice, with the fields enclosed by brackets "[]" replaced with your own +identifying information. (Don't include the brackets!) The text should be +enclosed in the appropriate comment syntax for the file format. We also +recommend that a file or class name and description of purpose be included on +the same "printed page" as the copyright notice for easier identification within +third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/seatunnel-dist/release-docs/licenses/LICENSE-bouncycastle.txt b/seatunnel-dist/release-docs/licenses/LICENSE-bouncycastle.txt deleted file mode 100644 index 39847391f15..00000000000 --- a/seatunnel-dist/release-docs/licenses/LICENSE-bouncycastle.txt +++ /dev/null @@ -1,7 +0,0 @@ -Please note this should be read in the same way as the MIT license. -Please also note this licensing model is made possible through funding from donations and the sale of support contracts. - -The Bouncy Castle License Copyright (c) 2000-2021 The Legion Of The Bouncy Castle Inc. (https://www.bouncycastle.org) -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/seatunnel-dist/release-docs/licenses/LICENSE-com.sun.jersey.txt b/seatunnel-dist/release-docs/licenses/LICENSE-com.sun.jersey.txt new file mode 100644 index 00000000000..d1e3c0bea37 --- /dev/null +++ b/seatunnel-dist/release-docs/licenses/LICENSE-com.sun.jersey.txt @@ -0,0 +1,93 @@ +COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.0 1. + +Definitions. + +1.1. Contributor means each individual or entity that creates or contributes to the creation of Modifications. + +1.2. Contributor Version means the combination of the Original Software, prior Modifications used by a Contributor (if any), and the Modifications made by that particular Contributor. + +1.3. Covered Software means (a) the Original Software, or (b) Modifications, or (c) the combination of files containing Original Software with files containing Modifications, in each case including portions thereof. + +1.4. Executable means the Covered Software in any form other than Source Code. + +1.5. Initial Developer means the individual or entity that first makes Original Software available under this License. + +1.6. Larger Work means a work which combines Covered Software or portions thereof with code not governed by the terms of this License. + +1.7. License means this document. + +1.8. Licensable means having the right to grant, to the maximum extent possible, whether at the time of the initial grant or subsequently acquired, any and all of the rights conveyed herein. + +1.9. Modifications means the Source Code and Executable form of any of the following: A. Any file that results from an addition to, deletion from or modification of the contents of a file containing Original Software or previous Modifications; B. Any new file that contains any part of the Original Software or previous Modification; or C. Any new file that is contributed or otherwise made available under the terms of this License. + +1.10. Original Software means the Source Code and Executable form of computer software code that is originally released under this License. + +1.11. Patent Claims means any patent claim(s), now owned or hereafter acquired, including without limitation, method, process, and apparatus claims, in any patent Licensable by grantor. + +1.12. Source Code means (a) the common form of computer software code in which modifications are made and (b) associated documentation included in or with such code. + +1.13. You (or Your) means an individual or a legal entity exercising rights under, and complying with all of the terms of, this License. For legal entities, You includes any entity which controls, is controlled by, or is under common control with You. For purposes of this definition, control means (a) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (b) ownership of more than fifty percent (50%) of the outstanding shares or beneficial ownership of such entity. + +2. License Grants. + + 2.1. The Initial Developer Grant. Conditioned upon Your compliance with Section 3.1 below and subject to third party intellectual property claims, the Initial Developer hereby grants You a world-wide, royalty-free, non-exclusive license: + +(a) under intellectual property rights (other than patent or trademark) Licensable by Initial Developer, to use, reproduce, modify, display, perform, sublicense and distribute the Original Software (or portions thereof), with or without Modifications, and/or as part of a Larger Work; and + +(b) under Patent Claims infringed by the making, using or selling of Original Software, to make, have made, use, practice, sell, and offer for sale, and/or otherwise dispose of the Original Software (or portions thereof); + + (c) The licenses granted in Sections 2.1(a) and (b) are effective on the date Initial Developer first distributes or otherwise makes the Original Software available to a third party under the terms of this License; + + (d) Notwithstanding Section 2.1(b) above, no patent license is granted: (1) for code that You delete from the Original Software, or (2) for infringements caused by: (i) the modification of the Original Software, or (ii) the combination of the Original Software with other software or devices. + +2.2. Contributor Grant. Conditioned upon Your compliance with Section 3.1 below and subject to third party intellectual property claims, each Contributor hereby grants You a world-wide, royalty-free, non-exclusive license: + +(a) under intellectual property rights (other than patent or trademark) Licensable by Contributor to use, reproduce, modify, display, perform, sublicense and distribute the Modifications created by such Contributor (or portions thereof), either on an unmodified basis, with other Modifications, as Covered Software and/or as part of a Larger Work; and + +(b) under Patent Claims infringed by the making, using, or selling of Modifications made by that Contributor either alone and/or in combination with its Contributor Version (or portions of such combination), to make, use, sell, offer for sale, have made, and/or otherwise dispose of: (1) Modifications made by that Contributor (or portions thereof); and (2) the combination of Modifications made by that Contributor with its Contributor Version (or portions of such combination). + +(c) The licenses granted in Sections 2.2(a) and 2.2(b) are effective on the date Contributor first distributes or otherwise makes the Modifications available to a third party. + +(d) Notwithstanding Section 2.2(b) above, no patent license is granted: (1) for any code that Contributor has deleted from the Contributor Version; (2) for infringements caused by: (i) third party modifications of Contributor Version, or (ii) the combination of Modifications made by that Contributor with other software (except as part of the Contributor Version) or other devices; or (3) under Patent Claims infringed by Covered Software in the absence of Modifications made by that Contributor. + +3. Distribution Obligations. + +3.1. Availability of Source Code. Any Covered Software that You distribute or otherwise make available in Executable form must also be made available in Source Code form and that Source Code form must be distributed only under the terms of this License. You must include a copy of this License with every copy of the Source Code form of the Covered Software You distribute or otherwise make available. You must inform recipients of any such Covered Software in Executable form as to how they can obtain such Covered Software in Source Code form in a reasonable manner on or through a medium customarily used for software exchange. + +3.2. Modifications. The Modifications that You create or to which You contribute are governed by the terms of this License. You represent that You believe Your Modifications are Your original creation(s) and/or You have sufficient rights to grant the rights conveyed by this License. + +3.3. Required Notices. You must include a notice in each of Your Modifications that identifies You as the Contributor of the Modification. You may not remove or alter any copyright, patent or trademark notices contained within the Covered Software, or any notices of licensing or any descriptive text giving attribution to any Contributor or the Initial Developer. + +3.4. Application of Additional Terms. You may not offer or impose any terms on any Covered Software in Source Code form that alters or restricts the applicable version of this License or the recipients rights hereunder. You may choose to offer, and to charge a fee for, warranty, support, indemnity or liability obligations to one or more recipients of Covered Software. However, you may do so only on Your own behalf, and not on behalf of the Initial Developer or any Contributor. You must make it absolutely clear that any such warranty, support, indemnity or liability obligation is offered by You alone, and You hereby agree to indemnify the Initial Developer and every Contributor for any liability incurred by the Initial Developer or such Contributor as a result of warranty, support, indemnity or liability terms You offer. + +3.5. Distribution of Executable Versions. You may distribute the Executable form of the Covered Software under the terms of this License or under the terms of a license of Your choice, which may contain terms different from this License, provided that You are in compliance with the terms of this License and that the license for the Executable form does not attempt to limit or alter the recipients rights in the Source Code form from the rights set forth in this License. If You distribute the Covered Software in Executable form under a different license, You must make it absolutely clear that any terms which differ from this License are offered by You alone, not by the Initial Developer or Contributor. You hereby agree to indemnify the Initial Developer and every Contributor for any liability incurred by the Initial Developer or such Contributor as a result of any such terms You offer. + +3.6. Larger Works. You may create a Larger Work by combining Covered Software with other code not governed by the terms of this License and distribute the Larger Work as a single product. In such a case, You must make sure the requirements of this License are fulfilled for the Covered Software. + +4. Versions of the License. + +4.1. New Versions. Sun Microsystems, Inc. is the initial license steward and may publish revised and/or new versions of this License from time to time. Each version will be given a distinguishing version number. Except as provided in Section 4.3, no one other than the license steward has the right to modify this License. + +4.2. Effect of New Versions. You may always continue to use, distribute or otherwise make the Covered Software available under the terms of the version of the License under which You originally received the Covered Software. If the Initial Developer includes a notice in the Original Software prohibiting it from being distributed or otherwise made available under any subsequent version of the License, You must distribute and make the Covered Software available under the terms of the version of the License under which You originally received the Covered Software. Otherwise, You may also choose to use, distribute or otherwise make the Covered Software available under the terms of any subsequent version of the License published by the license steward. + +4.3. Modified Versions. When You are an Initial Developer and You want to create a new license for Your Original Software, You may create and use a modified version of this License if You: (a) rename the license and remove any references to the name of the license steward (except to note that the license differs from this License); and (b) otherwise make it clear that the license contains terms which differ from this License. + +5. DISCLAIMER OF WARRANTY. COVERED SOFTWARE IS PROVIDED UNDER THIS LICENSE ON AN AS IS BASIS, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES THAT THE COVERED SOFTWARE IS FREE OF DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE OR NON-INFRINGING. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE COVERED SOFTWARE IS WITH YOU. SHOULD ANY COVERED SOFTWARE PROVE DEFECTIVE IN ANY RESPECT, YOU (NOT THE INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE COST OF ANY NECESSARY SERVICING, REPAIR OR CORRECTION. THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. NO USE OF ANY COVERED SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER THIS DISCLAIMER. + +6. TERMINATION. + +6.1. This License and the rights granted hereunder will terminate automatically if You fail to comply with terms herein and fail to cure such breach within 30 days of becoming aware of the breach. Provisions which, by their nature, must remain in effect beyond the termination of this License shall survive. + +6.2. If You assert a patent infringement claim (excluding declaratory judgment actions) against Initial Developer or a Contributor (the Initial Developer or Contributor against whom You assert such claim is referred to as Participant) alleging that the Participant Software (meaning the Contributor Version where the Participant is a Contributor or the Original Software where the Participant is the Initial Developer) directly or indirectly infringes any patent, then any and all rights granted directly or indirectly to You by such Participant, the Initial Developer (if the Initial Developer is not the Participant) and all Contributors under Sections 2.1 and/or 2.2 of this License shall, upon 60 days notice from Participant terminate prospectively and automatically at the expiration of such 60 day notice period, unless if within such 60 day period You withdraw Your claim with respect to the Participant Software against such Participant either unilaterally or pursuant to a written agreement with Participant. + +6.3. In the event of termination under Sections 6.1 or 6.2 above, all end user licenses that have been validly granted by You or any distributor hereunder prior to termination (excluding licenses granted to You by any distributor) shall survive termination. + +7. LIMITATION OF LIABILITY. UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, THE INITIAL DEVELOPER, ANY OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR OF COVERED SOFTWARE, OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE LIABLE TO ANY PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY CHARACTER INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOST PROFITS, LOSS OF GOODWILL, WORK STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, OR ANY AND ALL OTHER COMMERCIAL DAMAGES OR LOSSES, EVEN IF SUCH PARTY SHALL HAVE BEEN INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION OF LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL INJURY RESULTING FROM SUCH PARTYS NEGLIGENCE TO THE EXTENT APPLICABLE LAW PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION OR LIMITATION OF INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO THIS EXCLUSION AND LIMITATION MAY NOT APPLY TO YOU. + +8. U.S. GOVERNMENT END USERS. The Covered Software is a commercial item, as that term is defined in 48 C.F.R. 2.101 (Oct. 1995), consisting of commercial computer software (as that term is defined at 48 C.F.R. 252.227-7014(a)(1)) and commercial computer software documentation as such terms are used in 48 C.F.R. 12.212 (Sept. 1995). Consistent with 48 C.F.R. 12.212 and 48 C.F.R. 227.7202-1 through 227.7202-4 (June 1995), all U.S. Government End Users acquire Covered Software with only those rights set forth herein. This U.S. Government Rights clause is in lieu of, and supersedes, any other FAR, DFAR, or other clause or provision that addresses Government rights in computer software under this License. + +9. MISCELLANEOUS. This License represents the complete agreement concerning subject matter hereof. If any provision of this License is held to be unenforceable, such provision shall be reformed only to the extent necessary to make it enforceable. This License shall be governed by the law of the jurisdiction specified in a notice contained within the Original Software (except to the extent applicable law, if any, provides otherwise), excluding such jurisdictions conflict-of-law provisions. Any litigation relating to this License shall be subject to the jurisdiction of the courts located in the jurisdiction and venue specified in a notice contained within the Original Software, with the losing party responsible for costs, including, without limitation, court costs and reasonable attorneys fees and expenses. The application of the United Nations Convention on Contracts for the International Sale of Goods is expressly excluded. Any law or regulation which provides that the language of a contract shall be construed against the drafter shall not apply to this License. You agree that You alone are responsible for compliance with the United States export administration regulations (and the export control laws and regulation of any other countries) when You use, distribute or otherwise make available any Covered Software. + +10. RESPONSIBILITY FOR CLAIMS. As between Initial Developer and the Contributors, each party is responsible for claims and damages arising, directly or indirectly, out of its utilization of rights under this License and You agree to work with Initial Developer and Contributors to distribute such responsibility on an equitable basis. Nothing herein is intended or shall be deemed to constitute any admission of liability. + +NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) The code released under the CDDL shall be governed by the laws of the State of California (excluding conflict-of-law provisions). Any litigation relating to this License shall be subject to the jurisdiction of the Federal Courts of the Northern District of California and the state courts of the State of California, with venue lying in Santa Clara County, California. \ No newline at end of file diff --git a/seatunnel-dist/release-docs/licenses/LICENSE-javax.el.txt b/seatunnel-dist/release-docs/licenses/LICENSE-javax.el.txt deleted file mode 100644 index 33e627cf22f..00000000000 --- a/seatunnel-dist/release-docs/licenses/LICENSE-javax.el.txt +++ /dev/null @@ -1,131 +0,0 @@ -COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.0 - -1. Definitions. - -1.1. "Contributor" means each individual or entity that creates or contributes to the creation of Modifications. - -1.2. "Contributor Version" means the combination of the Original Software, prior Modifications used by a Contributor (if any), and the Modifications made by that particular Contributor. - -1.3. "Covered Software" means (a) the Original Software, or (b) Modifications, or (c) the combination of files containing Original Software with files containing Modifications, in each case including portions thereof. - -1.4. "Executable" means the Covered Software in any form other than Source Code. - -1.5. "Initial Developer" means the individual or entity that first makes Original Software available under this License. - -1.6. "Larger Work" means a work which combines Covered Software or portions thereof with code not governed by the terms of this License. - -1.7. "License" means this document. - -1.8. "Licensable" means having the right to grant, to the maximum extent possible, whether at the time of the initial grant or subsequently acquired, any and all of the rights conveyed herein. - -1.9. "Modifications" means the Source Code and Executable form of any of the following: - -A. Any file that results from an addition to, deletion from or modification of the contents of a file containing Original Software or previous Modifications; - -B. Any new file that contains any part of the Original Software or previous Modification; or - -C. Any new file that is contributed or otherwise made available under the terms of this License. - -1.10. "Original Software" means the Source Code and Executable form of computer software code that is originally released under this License. - -1.11. "Patent Claims" means any patent claim(s), now owned or hereafter acquired, including without limitation, method, process, and apparatus claims, in any patent Licensable by grantor. - -1.12. "Source Code" means (a) the common form of computer software code in which modifications are made and (b) associated documentation included in or with such code. - -1.13. "You" (or "Your") means an individual or a legal entity exercising rights under, and complying with all of the terms of, this License. For legal entities, "You" includes any entity which controls, is controlled by, or is under common control with You. For purposes of this definition, "control" means (a) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (b) ownership of more than fifty percent (50%) of the outstanding shares or beneficial ownership of such entity. - -2. License Grants. - -2.1. The Initial Developer Grant. - -Conditioned upon Your compliance with Section 3.1 below and subject to third party intellectual property claims, the Initial Developer hereby grants You a world-wide, royalty-free, non-exclusive license: - -(a) under intellectual property rights (other than patent or trademark) Licensable by Initial Developer, to use, reproduce, modify, display, perform, sublicense and distribute the Original Software (or portions thereof), with or without Modifications, and/or as part of a Larger Work; and - -(b) under Patent Claims infringed by the making, using or selling of Original Software, to make, have made, use, practice, sell, and offer for sale, and/or otherwise dispose of the Original Software (or portions thereof). - -(c) The licenses granted in Sections 2.1(a) and (b) are effective on the date Initial Developer first distributes or otherwise makes the Original Software available to a third party under the terms of this License. - -(d) Notwithstanding Section 2.1(b) above, no patent license is granted: (1) for code that You delete from the Original Software, or (2) for infringements caused by: (i) the modification of the Original Software, or (ii) the combination of the Original Software with other software or devices. - -2.2. Contributor Grant. - -Conditioned upon Your compliance with Section 3.1 below and -subject to third party intellectual property claims, each -Contributor hereby grants You a world-wide, royalty-free, -non-exclusive license: - -(a) under intellectual property rights (other than patent or trademark) Licensable by Contributor to use, reproduce, modify, display, perform, sublicense and distribute the Modifications created by such Contributor (or portions thereof), either on an unmodified basis, with other Modifications, as Covered Software and/or as part of a Larger Work; and - -(b) under Patent Claims infringed by the making, using, or selling of Modifications made by that Contributor either alone and/or in combination with its Contributor Version (or portions of such combination), to make, use, sell, offer for sale, have made, and/or otherwise dispose of: (1) Modifications made by that Contributor (or portions thereof); and (2) the combination of Modifications made by that Contributor with its Contributor Version (or portions of such combination). - -(c) The licenses granted in Sections 2.2(a) and 2.2(b) are effective on the date Contributor first distributes or otherwise makes the Modifications available to a third party. -(d) Notwithstanding Section 2.2(b) above, no patent license is granted: (1) for any code that Contributor has deleted from the Contributor Version; (2) for infringements caused by: (i) third party modifications of Contributor Version, or (ii) the combination of Modifications made by that Contributor with other software (except as part of the Contributor Version) or other devices; or (3) under Patent Claims infringed by Covered Software in the absence of Modifications made by that Contributor. - -3. Distribution Obligations. - -3.1. Availability of Source Code. - -Any Covered Software that You distribute or otherwise make available in Executable form must also be made available in Source Code form and that Source Code form must be distributed only under the terms of this License. You must include a copy of this License with every copy of the Source Code form of the Covered Software You distribute or otherwise make available. You must inform recipients of any such Covered Software in Executable form as to how they can obtain such Covered Software in Source Code form in a reasonable manner on or through a medium customarily used for software exchange. - -3.2. Modifications. - -The Modifications that You create or to which You contribute are governed by the terms of this License. You represent that You believe Your Modifications are Your original creation(s) and/or You have sufficient rights to grant the rights conveyed by this License. - -3.3. Required Notices. - -You must include a notice in each of Your Modifications that identifies You as the Contributor of the Modification. You may not remove or alter any copyright, patent or trademark notices contained within the Covered Software, or any notices of licensing or any descriptive text giving attribution to any Contributor or the Initial Developer. - -3.4. Application of Additional Terms. - -You may not offer or impose any terms on any Covered Software in Source Code form that alters or restricts the applicable version of this License or the recipients' rights hereunder. You may choose to offer, and to charge a fee for, warranty, support, indemnity or liability obligations to one or more recipients of Covered Software. However, you may do so only on Your own behalf, and not on behalf of the Initial Developer or any Contributor. You must make it absolutely clear that any such warranty, support, indemnity or liability obligation is offered by You alone, and You hereby agree to indemnify the Initial Developer and every Contributor for any liability incurred by the Initial Developer or such Contributor as a result of warranty, support, indemnity or liability terms You offer. - -3.5. Distribution of Executable Versions. - -You may distribute the Executable form of the Covered Software under the terms of this License or under the terms of a license of Your choice, which may contain terms different from this License, provided that You are in compliance with the terms of this License and that the license for the Executable form does not attempt to limit or alter the recipient's rights in the Source Code form from the rights set forth in this License. If You distribute the Covered Software in Executable form under a different license, You must make it absolutely clear that any terms which differ from this License are offered by You alone, not by the Initial Developer or Contributor. You hereby agree to indemnify the Initial Developer and every Contributor for any liability incurred by the Initial Developer or such Contributor as a result of any such terms You offer. - -3.6. Larger Works. - -You may create a Larger Work by combining Covered Software with other code not governed by the terms of this License and distribute the Larger Work as a single product. In such a case, You must make sure the requirements of this License are fulfilled for the Covered Software. - -4. Versions of the License. - -4.1. New Versions. - -Sun Microsystems, Inc. is the initial license steward and may publish revised and/or new versions of this License from time to time. Each version will be given a distinguishing version number. Except as provided in Section 4.3, no one other than the license steward has the right to modify this License. - -4.2. Effect of New Versions. - -You may always continue to use, distribute or otherwise make the Covered Software available under the terms of the version of the License under which You originally received the Covered Software. If the Initial Developer includes a notice in the Original Software prohibiting it from being distributed or otherwise made available under any subsequent version of the License, You must distribute and make the Covered Software available under the terms of the version of the License under which You originally received the Covered Software. Otherwise, You may also choose to use, distribute or otherwise make the Covered Software available under the terms of any subsequent version of the License published by the license steward. - -4.3. Modified Versions. - -When You are an Initial Developer and You want to create a new license for Your Original Software, You may create and use a modified version of this License if You: (a) rename the license and remove any references to the name of the license steward (except to note that the license differs from this License); and (b) otherwise make it clear that the license contains terms which differ from this License. - -5. DISCLAIMER OF WARRANTY. - -COVERED SOFTWARE IS PROVIDED UNDER THIS LICENSE ON AN "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES THAT THE COVERED SOFTWARE IS FREE OF DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE OR NON-INFRINGING. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE COVERED SOFTWARE IS WITH YOU. SHOULD ANY COVERED SOFTWARE PROVE DEFECTIVE IN ANY RESPECT, YOU (NOT THE INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE COST OF ANY NECESSARY SERVICING, REPAIR OR CORRECTION. THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. NO USE OF ANY COVERED SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER THIS DISCLAIMER. - -6. TERMINATION. - -6.1. This License and the rights granted hereunder will terminate automatically if You fail to comply with terms herein and fail to cure such breach within 30 days of becoming aware of the breach. Provisions which, by their nature, must remain in effect beyond the termination of this License shall survive. - -6.2. If You assert a patent infringement claim (excluding declaratory judgment actions) against Initial Developer or a Contributor (the Initial Developer or Contributor against whom You assert such claim is referred to as "Participant") alleging that the Participant Software (meaning the Contributor Version where the Participant is a Contributor or the Original Software where the Participant is the Initial Developer) directly or indirectly infringes any patent, then any and all rights granted directly or indirectly to You by such Participant, the Initial Developer (if the Initial Developer is not the Participant) and all Contributors under Sections 2.1 and/or 2.2 of this License shall, upon 60 days notice from Participant terminate prospectively and automatically at the expiration of such 60 day notice period, unless if within such 60 day period You withdraw Your claim with respect to the Participant Software against such Participant either unilaterally or pursuant to a written agreement with Participant. - -6.3. In the event of termination under Sections 6.1 or 6.2 above, all end user licenses that have been validly granted by You or any distributor hereunder prior to termination (excluding licenses granted to You by any distributor) shall survive termination. - -7. LIMITATION OF LIABILITY. - -UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, THE INITIAL DEVELOPER, ANY OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR OF COVERED SOFTWARE, OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE LIABLE TO ANY PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY CHARACTER INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOST PROFITS, LOSS OF GOODWILL, WORK STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, OR ANY AND ALL OTHER COMMERCIAL DAMAGES OR LOSSES, EVEN IF SUCH PARTY SHALL HAVE BEEN INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION OF LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL INJURY RESULTING FROM SUCH PARTY'S NEGLIGENCE TO THE EXTENT APPLICABLE LAW PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION OR LIMITATION OF INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO THIS EXCLUSION AND LIMITATION MAY NOT APPLY TO YOU. - -8. U.S. GOVERNMENT END USERS. - -The Covered Software is a "commercial item," as that term is defined in 48 C.F.R. 2.101 (Oct. 1995), consisting of "commercial computer software" (as that term is defined at 48 C.F.R. ¤ 252.227-7014(a)(1)) and "commercial computer software documentation" as such terms are used in 48 C.F.R. 12.212 (Sept. 1995). Consistent with 48 C.F.R. 12.212 and 48 C.F.R. 227.7202-1 through 227.7202-4 (June 1995), all U.S. Government End Users acquire Covered Software with only those rights set forth herein. This U.S. Government Rights clause is in lieu of, and supersedes, any other FAR, DFAR, or other clause or provision that addresses Government rights in computer software under this License. - -9. MISCELLANEOUS. - -This License represents the complete agreement concerning subject matter hereof. If any provision of this License is held to be unenforceable, such provision shall be reformed only to the extent necessary to make it enforceable. This License shall be governed by the law of the jurisdiction specified in a notice contained within the Original Software (except to the extent applicable law, if any, provides otherwise), excluding such jurisdiction's conflict-of-law provisions. Any litigation relating to this License shall be subject to the jurisdiction of the courts located in the jurisdiction and venue specified in a notice contained within the Original Software, with the losing party responsible for costs, including, without limitation, court costs and reasonable attorneys' fees and expenses. The application of the United Nations Convention on Contracts for the International Sale of Goods is expressly excluded. Any law or regulation which provides that the language of a contract shall be construed against the drafter shall not apply to this License. You agree that You alone are responsible for compliance with the United States export administration regulations (and the export control laws and regulation of any other countries) when You use, distribute or otherwise make available any Covered Software. - -10. RESPONSIBILITY FOR CLAIMS. - -As between Initial Developer and the Contributors, each party is responsible for claims and damages arising, directly or indirectly, out of its utilization of rights under this License and You agree to work with Initial Developer and Contributors to distribute such responsibility on an equitable basis. Nothing herein is intended or shall be deemed to constitute any admission of liability. \ No newline at end of file diff --git a/seatunnel-dist/release-docs/licenses/LICENSE-mybatis.txt b/seatunnel-dist/release-docs/licenses/LICENSE-mybatis.txt new file mode 100644 index 00000000000..9c311afbf06 --- /dev/null +++ b/seatunnel-dist/release-docs/licenses/LICENSE-mybatis.txt @@ -0,0 +1,13 @@ +Copyright ${license.git.copyrightYears} the original author or authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. \ No newline at end of file diff --git a/seatunnel-dist/release-docs/licenses/LICENSE-spring.txt b/seatunnel-dist/release-docs/licenses/LICENSE-spring.txt new file mode 100644 index 00000000000..512235b40b6 --- /dev/null +++ b/seatunnel-dist/release-docs/licenses/LICENSE-spring.txt @@ -0,0 +1,125 @@ +# Spring RTS game engine + +## LICENSE + +Spring is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 2 of the License, or +(at your option) any later version. + +Spring is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +For details about the licenses, +see [GNU GPL v2 (gpl-2.0.txt)](gpl-2.0.html) +and [v3 (gpl-3.0.txt)](gpl-3.0.html). + +For a list of authors of this software, see [AUTHORS](AUTHORS.html). + +## Third Parties + +### file SOP.hpp +Copyright (c) 2012, Daniel Cornel. Published on drivenbynostalgia.com. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +### gflags +Copyright (c) 2006, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +### mingw-std-threads +Copyright (c) 2016, Mega Limited +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +### moodycamel's ConcurrentQueue +Copyright (c) 2013-2016, Cameron Desrochers. +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +- Redistributions of source code must retain the above copyright notice, this list of +conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright notice, this list of +conditions and the following disclaimer in the documentation and/or other materials +provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR +TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/seatunnel-dist/release-docs/licenses/LICENSE-stax-api-1.0.txt b/seatunnel-dist/release-docs/licenses/LICENSE-stax-api-1.0.txt deleted file mode 100644 index 33e627cf22f..00000000000 --- a/seatunnel-dist/release-docs/licenses/LICENSE-stax-api-1.0.txt +++ /dev/null @@ -1,131 +0,0 @@ -COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.0 - -1. Definitions. - -1.1. "Contributor" means each individual or entity that creates or contributes to the creation of Modifications. - -1.2. "Contributor Version" means the combination of the Original Software, prior Modifications used by a Contributor (if any), and the Modifications made by that particular Contributor. - -1.3. "Covered Software" means (a) the Original Software, or (b) Modifications, or (c) the combination of files containing Original Software with files containing Modifications, in each case including portions thereof. - -1.4. "Executable" means the Covered Software in any form other than Source Code. - -1.5. "Initial Developer" means the individual or entity that first makes Original Software available under this License. - -1.6. "Larger Work" means a work which combines Covered Software or portions thereof with code not governed by the terms of this License. - -1.7. "License" means this document. - -1.8. "Licensable" means having the right to grant, to the maximum extent possible, whether at the time of the initial grant or subsequently acquired, any and all of the rights conveyed herein. - -1.9. "Modifications" means the Source Code and Executable form of any of the following: - -A. Any file that results from an addition to, deletion from or modification of the contents of a file containing Original Software or previous Modifications; - -B. Any new file that contains any part of the Original Software or previous Modification; or - -C. Any new file that is contributed or otherwise made available under the terms of this License. - -1.10. "Original Software" means the Source Code and Executable form of computer software code that is originally released under this License. - -1.11. "Patent Claims" means any patent claim(s), now owned or hereafter acquired, including without limitation, method, process, and apparatus claims, in any patent Licensable by grantor. - -1.12. "Source Code" means (a) the common form of computer software code in which modifications are made and (b) associated documentation included in or with such code. - -1.13. "You" (or "Your") means an individual or a legal entity exercising rights under, and complying with all of the terms of, this License. For legal entities, "You" includes any entity which controls, is controlled by, or is under common control with You. For purposes of this definition, "control" means (a) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (b) ownership of more than fifty percent (50%) of the outstanding shares or beneficial ownership of such entity. - -2. License Grants. - -2.1. The Initial Developer Grant. - -Conditioned upon Your compliance with Section 3.1 below and subject to third party intellectual property claims, the Initial Developer hereby grants You a world-wide, royalty-free, non-exclusive license: - -(a) under intellectual property rights (other than patent or trademark) Licensable by Initial Developer, to use, reproduce, modify, display, perform, sublicense and distribute the Original Software (or portions thereof), with or without Modifications, and/or as part of a Larger Work; and - -(b) under Patent Claims infringed by the making, using or selling of Original Software, to make, have made, use, practice, sell, and offer for sale, and/or otherwise dispose of the Original Software (or portions thereof). - -(c) The licenses granted in Sections 2.1(a) and (b) are effective on the date Initial Developer first distributes or otherwise makes the Original Software available to a third party under the terms of this License. - -(d) Notwithstanding Section 2.1(b) above, no patent license is granted: (1) for code that You delete from the Original Software, or (2) for infringements caused by: (i) the modification of the Original Software, or (ii) the combination of the Original Software with other software or devices. - -2.2. Contributor Grant. - -Conditioned upon Your compliance with Section 3.1 below and -subject to third party intellectual property claims, each -Contributor hereby grants You a world-wide, royalty-free, -non-exclusive license: - -(a) under intellectual property rights (other than patent or trademark) Licensable by Contributor to use, reproduce, modify, display, perform, sublicense and distribute the Modifications created by such Contributor (or portions thereof), either on an unmodified basis, with other Modifications, as Covered Software and/or as part of a Larger Work; and - -(b) under Patent Claims infringed by the making, using, or selling of Modifications made by that Contributor either alone and/or in combination with its Contributor Version (or portions of such combination), to make, use, sell, offer for sale, have made, and/or otherwise dispose of: (1) Modifications made by that Contributor (or portions thereof); and (2) the combination of Modifications made by that Contributor with its Contributor Version (or portions of such combination). - -(c) The licenses granted in Sections 2.2(a) and 2.2(b) are effective on the date Contributor first distributes or otherwise makes the Modifications available to a third party. -(d) Notwithstanding Section 2.2(b) above, no patent license is granted: (1) for any code that Contributor has deleted from the Contributor Version; (2) for infringements caused by: (i) third party modifications of Contributor Version, or (ii) the combination of Modifications made by that Contributor with other software (except as part of the Contributor Version) or other devices; or (3) under Patent Claims infringed by Covered Software in the absence of Modifications made by that Contributor. - -3. Distribution Obligations. - -3.1. Availability of Source Code. - -Any Covered Software that You distribute or otherwise make available in Executable form must also be made available in Source Code form and that Source Code form must be distributed only under the terms of this License. You must include a copy of this License with every copy of the Source Code form of the Covered Software You distribute or otherwise make available. You must inform recipients of any such Covered Software in Executable form as to how they can obtain such Covered Software in Source Code form in a reasonable manner on or through a medium customarily used for software exchange. - -3.2. Modifications. - -The Modifications that You create or to which You contribute are governed by the terms of this License. You represent that You believe Your Modifications are Your original creation(s) and/or You have sufficient rights to grant the rights conveyed by this License. - -3.3. Required Notices. - -You must include a notice in each of Your Modifications that identifies You as the Contributor of the Modification. You may not remove or alter any copyright, patent or trademark notices contained within the Covered Software, or any notices of licensing or any descriptive text giving attribution to any Contributor or the Initial Developer. - -3.4. Application of Additional Terms. - -You may not offer or impose any terms on any Covered Software in Source Code form that alters or restricts the applicable version of this License or the recipients' rights hereunder. You may choose to offer, and to charge a fee for, warranty, support, indemnity or liability obligations to one or more recipients of Covered Software. However, you may do so only on Your own behalf, and not on behalf of the Initial Developer or any Contributor. You must make it absolutely clear that any such warranty, support, indemnity or liability obligation is offered by You alone, and You hereby agree to indemnify the Initial Developer and every Contributor for any liability incurred by the Initial Developer or such Contributor as a result of warranty, support, indemnity or liability terms You offer. - -3.5. Distribution of Executable Versions. - -You may distribute the Executable form of the Covered Software under the terms of this License or under the terms of a license of Your choice, which may contain terms different from this License, provided that You are in compliance with the terms of this License and that the license for the Executable form does not attempt to limit or alter the recipient's rights in the Source Code form from the rights set forth in this License. If You distribute the Covered Software in Executable form under a different license, You must make it absolutely clear that any terms which differ from this License are offered by You alone, not by the Initial Developer or Contributor. You hereby agree to indemnify the Initial Developer and every Contributor for any liability incurred by the Initial Developer or such Contributor as a result of any such terms You offer. - -3.6. Larger Works. - -You may create a Larger Work by combining Covered Software with other code not governed by the terms of this License and distribute the Larger Work as a single product. In such a case, You must make sure the requirements of this License are fulfilled for the Covered Software. - -4. Versions of the License. - -4.1. New Versions. - -Sun Microsystems, Inc. is the initial license steward and may publish revised and/or new versions of this License from time to time. Each version will be given a distinguishing version number. Except as provided in Section 4.3, no one other than the license steward has the right to modify this License. - -4.2. Effect of New Versions. - -You may always continue to use, distribute or otherwise make the Covered Software available under the terms of the version of the License under which You originally received the Covered Software. If the Initial Developer includes a notice in the Original Software prohibiting it from being distributed or otherwise made available under any subsequent version of the License, You must distribute and make the Covered Software available under the terms of the version of the License under which You originally received the Covered Software. Otherwise, You may also choose to use, distribute or otherwise make the Covered Software available under the terms of any subsequent version of the License published by the license steward. - -4.3. Modified Versions. - -When You are an Initial Developer and You want to create a new license for Your Original Software, You may create and use a modified version of this License if You: (a) rename the license and remove any references to the name of the license steward (except to note that the license differs from this License); and (b) otherwise make it clear that the license contains terms which differ from this License. - -5. DISCLAIMER OF WARRANTY. - -COVERED SOFTWARE IS PROVIDED UNDER THIS LICENSE ON AN "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES THAT THE COVERED SOFTWARE IS FREE OF DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE OR NON-INFRINGING. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE COVERED SOFTWARE IS WITH YOU. SHOULD ANY COVERED SOFTWARE PROVE DEFECTIVE IN ANY RESPECT, YOU (NOT THE INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE COST OF ANY NECESSARY SERVICING, REPAIR OR CORRECTION. THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. NO USE OF ANY COVERED SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER THIS DISCLAIMER. - -6. TERMINATION. - -6.1. This License and the rights granted hereunder will terminate automatically if You fail to comply with terms herein and fail to cure such breach within 30 days of becoming aware of the breach. Provisions which, by their nature, must remain in effect beyond the termination of this License shall survive. - -6.2. If You assert a patent infringement claim (excluding declaratory judgment actions) against Initial Developer or a Contributor (the Initial Developer or Contributor against whom You assert such claim is referred to as "Participant") alleging that the Participant Software (meaning the Contributor Version where the Participant is a Contributor or the Original Software where the Participant is the Initial Developer) directly or indirectly infringes any patent, then any and all rights granted directly or indirectly to You by such Participant, the Initial Developer (if the Initial Developer is not the Participant) and all Contributors under Sections 2.1 and/or 2.2 of this License shall, upon 60 days notice from Participant terminate prospectively and automatically at the expiration of such 60 day notice period, unless if within such 60 day period You withdraw Your claim with respect to the Participant Software against such Participant either unilaterally or pursuant to a written agreement with Participant. - -6.3. In the event of termination under Sections 6.1 or 6.2 above, all end user licenses that have been validly granted by You or any distributor hereunder prior to termination (excluding licenses granted to You by any distributor) shall survive termination. - -7. LIMITATION OF LIABILITY. - -UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, THE INITIAL DEVELOPER, ANY OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR OF COVERED SOFTWARE, OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE LIABLE TO ANY PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY CHARACTER INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOST PROFITS, LOSS OF GOODWILL, WORK STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, OR ANY AND ALL OTHER COMMERCIAL DAMAGES OR LOSSES, EVEN IF SUCH PARTY SHALL HAVE BEEN INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION OF LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL INJURY RESULTING FROM SUCH PARTY'S NEGLIGENCE TO THE EXTENT APPLICABLE LAW PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION OR LIMITATION OF INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO THIS EXCLUSION AND LIMITATION MAY NOT APPLY TO YOU. - -8. U.S. GOVERNMENT END USERS. - -The Covered Software is a "commercial item," as that term is defined in 48 C.F.R. 2.101 (Oct. 1995), consisting of "commercial computer software" (as that term is defined at 48 C.F.R. ¤ 252.227-7014(a)(1)) and "commercial computer software documentation" as such terms are used in 48 C.F.R. 12.212 (Sept. 1995). Consistent with 48 C.F.R. 12.212 and 48 C.F.R. 227.7202-1 through 227.7202-4 (June 1995), all U.S. Government End Users acquire Covered Software with only those rights set forth herein. This U.S. Government Rights clause is in lieu of, and supersedes, any other FAR, DFAR, or other clause or provision that addresses Government rights in computer software under this License. - -9. MISCELLANEOUS. - -This License represents the complete agreement concerning subject matter hereof. If any provision of this License is held to be unenforceable, such provision shall be reformed only to the extent necessary to make it enforceable. This License shall be governed by the law of the jurisdiction specified in a notice contained within the Original Software (except to the extent applicable law, if any, provides otherwise), excluding such jurisdiction's conflict-of-law provisions. Any litigation relating to this License shall be subject to the jurisdiction of the courts located in the jurisdiction and venue specified in a notice contained within the Original Software, with the losing party responsible for costs, including, without limitation, court costs and reasonable attorneys' fees and expenses. The application of the United Nations Convention on Contracts for the International Sale of Goods is expressly excluded. Any law or regulation which provides that the language of a contract shall be construed against the drafter shall not apply to this License. You agree that You alone are responsible for compliance with the United States export administration regulations (and the export control laws and regulation of any other countries) when You use, distribute or otherwise make available any Covered Software. - -10. RESPONSIBILITY FOR CLAIMS. - -As between Initial Developer and the Contributors, each party is responsible for claims and damages arising, directly or indirectly, out of its utilization of rights under this License and You agree to work with Initial Developer and Contributors to distribute such responsibility on an equitable basis. Nothing herein is intended or shall be deemed to constitute any admission of liability. \ No newline at end of file diff --git a/seatunnel-dist/src/main/assembly/assembly-bin-ci.xml b/seatunnel-dist/src/main/assembly/assembly-bin-ci.xml new file mode 100644 index 00000000000..1f15cbc08a7 --- /dev/null +++ b/seatunnel-dist/src/main/assembly/assembly-bin-ci.xml @@ -0,0 +1,193 @@ + + + bin + + tar.gz + + true + + + ../ + + + **/target/** + **/.classpath + **/.project + **/.settings/** + lib/** + + + + README.md + bin/** + config/** + plugins/** + + + + ../seatunnel-core/seatunnel-core-flink/target + + seatunnel-core-flink*.jar + + + %regex[.*((javadoc)|(sources))\.jar] + + /lib + + + ../seatunnel-core/seatunnel-core-flink/src/main/bin + /bin + 0755 + + + ../seatunnel-core/seatunnel-core-flink-sql/src/main/bin + /bin + 0755 + + + ../seatunnel-core/seatunnel-core-flink-sql/target + + seatunnel-core-flink-sql*.jar + + + %regex[.*((javadoc)|(sources))\.jar] + + /lib + + + ../seatunnel-core/seatunnel-core-spark/src/main/bin + /bin + 0755 + + + ../seatunnel-core/seatunnel-core-spark/target + + seatunnel-core-spark*.jar + + + %regex[.*((javadoc)|(sources))\.jar] + + /lib + + + ../seatunnel-core/seatunnel-flink-starter/src/main/bin + /bin + 0755 + + + ../seatunnel-core/seatunnel-flink-starter/target + + seatunnel-flink-starter*.jar + + + %regex[.*((javadoc)|(sources))\.jar] + + /lib + + + ../seatunnel-core/seatunnel-spark-starter/src/main/bin + /bin + 0755 + + + ../seatunnel-core/seatunnel-spark-starter/target + + seatunnel-spark-starter*.jar + + + %regex[.*((javadoc)|(sources))\.jar] + + /lib + + + + ../seatunnel-connectors/seatunnel-connectors-flink-dist/target/lib + + seatunnel-connector-flink*.jar + + + %regex[.*((javadoc)|(sources))\.jar] + + /connectors/flink + + + ../seatunnel-connectors/seatunnel-connectors-flink-sql-dist/target/lib + + flink-sql-connector*.jar + + + %regex[.*((javadoc)|(sources))\.jar] + + /connectors/flink-sql + + + ../seatunnel-connectors/seatunnel-connectors-spark-dist/target/lib + + seatunnel-connector-spark*.jar + + + %regex[.*((javadoc)|(sources))\.jar] + + /connectors/spark + + + ../seatunnel-connectors-v2-dist/target/lib + + connector-*.jar + + + %regex[.*((javadoc)|(sources))\.jar] + connector-common*.jar + + /connectors/seatunnel + + + ../ + + plugin-mapping.properties + + /connectors + + + ${project.build.directory}/bin + /bin + + * + + 0755 + + + + release-docs + . + + + + ${basedir}/.././ + + DISCLAIMER + + . + + + + diff --git a/seatunnel-dist/src/main/assembly/assembly-bin.xml b/seatunnel-dist/src/main/assembly/assembly-bin.xml index 1f15cbc08a7..d68a147da07 100644 --- a/seatunnel-dist/src/main/assembly/assembly-bin.xml +++ b/seatunnel-dist/src/main/assembly/assembly-bin.xml @@ -149,17 +149,6 @@ /connectors/spark - - ../seatunnel-connectors-v2-dist/target/lib - - connector-*.jar - - - %regex[.*((javadoc)|(sources))\.jar] - connector-common*.jar - - /connectors/seatunnel - ../ diff --git a/seatunnel-e2e/pom.xml b/seatunnel-e2e/pom.xml index 4c0b0fd0b81..9f5e6882bc9 100644 --- a/seatunnel-e2e/pom.xml +++ b/seatunnel-e2e/pom.xml @@ -29,9 +29,37 @@ seatunnel-flink-e2e seatunnel-spark-e2e - seatunnel-flink-new-connector-e2e - seatunnel-spark-new-connector-e2e + seatunnel-flink-connector-v2-e2e + seatunnel-spark-connector-v2-e2e seatunnel-flink-sql-e2e + + + org.apache.seatunnel + seatunnel-connectors-v2-dist + ${project.version} + + + org.apache.seatunnel + seatunnel-connectors-spark-dist + ${project.version} + + + org.apache.seatunnel + seatunnel-connectors-flink-dist + ${project.version} + + + + + + org.apache.maven.plugins + maven-dependency-plugin + + true + + + + \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-flink-new-connector-e2e/pom.xml b/seatunnel-e2e/seatunnel-flink-connector-v2-e2e/pom.xml similarity index 96% rename from seatunnel-e2e/seatunnel-flink-new-connector-e2e/pom.xml rename to seatunnel-e2e/seatunnel-flink-connector-v2-e2e/pom.xml index 5c59d849102..c494f80f886 100644 --- a/seatunnel-e2e/seatunnel-flink-new-connector-e2e/pom.xml +++ b/seatunnel-e2e/seatunnel-flink-connector-v2-e2e/pom.xml @@ -23,7 +23,7 @@ 4.0.0 - seatunnel-flink-new-connector-e2e + seatunnel-flink-connector-v2-e2e diff --git a/seatunnel-e2e/seatunnel-flink-new-connector-e2e/src/test/java/org/apache/seatunnel/e2e/flink/FlinkContainer.java b/seatunnel-e2e/seatunnel-flink-connector-v2-e2e/src/test/java/org/apache/seatunnel/e2e/flink/FlinkContainer.java similarity index 99% rename from seatunnel-e2e/seatunnel-flink-new-connector-e2e/src/test/java/org/apache/seatunnel/e2e/flink/FlinkContainer.java rename to seatunnel-e2e/seatunnel-flink-connector-v2-e2e/src/test/java/org/apache/seatunnel/e2e/flink/FlinkContainer.java index b706468c3f0..0ffa3163480 100644 --- a/seatunnel-e2e/seatunnel-flink-new-connector-e2e/src/test/java/org/apache/seatunnel/e2e/flink/FlinkContainer.java +++ b/seatunnel-e2e/seatunnel-flink-connector-v2-e2e/src/test/java/org/apache/seatunnel/e2e/flink/FlinkContainer.java @@ -164,5 +164,4 @@ private String getResource(String confFile) { private String getConnectorPath(String fileName) { return Paths.get(SEATUNNEL_CONNECTORS, "seatunnel", fileName).toString(); } - } diff --git a/seatunnel-e2e/seatunnel-flink-new-connector-e2e/src/test/java/org/apache/seatunnel/e2e/flink/assertion/FakeSourceToAssertIT.java b/seatunnel-e2e/seatunnel-flink-connector-v2-e2e/src/test/java/org/apache/seatunnel/e2e/flink/v2/assertion/FakeSourceToAssertIT.java similarity index 96% rename from seatunnel-e2e/seatunnel-flink-new-connector-e2e/src/test/java/org/apache/seatunnel/e2e/flink/assertion/FakeSourceToAssertIT.java rename to seatunnel-e2e/seatunnel-flink-connector-v2-e2e/src/test/java/org/apache/seatunnel/e2e/flink/v2/assertion/FakeSourceToAssertIT.java index cc67b836a63..ce89154f40c 100644 --- a/seatunnel-e2e/seatunnel-flink-new-connector-e2e/src/test/java/org/apache/seatunnel/e2e/flink/assertion/FakeSourceToAssertIT.java +++ b/seatunnel-e2e/seatunnel-flink-connector-v2-e2e/src/test/java/org/apache/seatunnel/e2e/flink/v2/assertion/FakeSourceToAssertIT.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.seatunnel.e2e.flink.assertion; +package org.apache.seatunnel.e2e.flink.v2.assertion; import org.apache.seatunnel.e2e.flink.FlinkContainer; diff --git a/seatunnel-e2e/seatunnel-flink-new-connector-e2e/src/test/java/org/apache/seatunnel/e2e/flink/fake/FakeSourceToConsoleIT.java b/seatunnel-e2e/seatunnel-flink-connector-v2-e2e/src/test/java/org/apache/seatunnel/e2e/flink/v2/fake/FakeSourceToConsoleIT.java similarity index 94% rename from seatunnel-e2e/seatunnel-flink-new-connector-e2e/src/test/java/org/apache/seatunnel/e2e/flink/fake/FakeSourceToConsoleIT.java rename to seatunnel-e2e/seatunnel-flink-connector-v2-e2e/src/test/java/org/apache/seatunnel/e2e/flink/v2/fake/FakeSourceToConsoleIT.java index 2663eb80af4..d480fc94671 100644 --- a/seatunnel-e2e/seatunnel-flink-new-connector-e2e/src/test/java/org/apache/seatunnel/e2e/flink/fake/FakeSourceToConsoleIT.java +++ b/seatunnel-e2e/seatunnel-flink-connector-v2-e2e/src/test/java/org/apache/seatunnel/e2e/flink/v2/fake/FakeSourceToConsoleIT.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.seatunnel.e2e.flink.fake; +package org.apache.seatunnel.e2e.flink.v2.fake; import org.apache.seatunnel.e2e.flink.FlinkContainer; @@ -28,7 +28,6 @@ public class FakeSourceToConsoleIT extends FlinkContainer { @Test - @SuppressWarnings("magicnumber") public void testFakeSourceToConsoleSink() throws IOException, InterruptedException { Container.ExecResult execResult = executeSeaTunnelFlinkJob("/fake/fakesource_to_console.conf"); Assert.assertEquals(0, execResult.getExitCode()); diff --git a/seatunnel-e2e/seatunnel-flink-connector-v2-e2e/src/test/java/org/apache/seatunnel/e2e/flink/v2/file/FakeSourceToFileIT.java b/seatunnel-e2e/seatunnel-flink-connector-v2-e2e/src/test/java/org/apache/seatunnel/e2e/flink/v2/file/FakeSourceToFileIT.java new file mode 100644 index 00000000000..ec52203c53b --- /dev/null +++ b/seatunnel-e2e/seatunnel-flink-connector-v2-e2e/src/test/java/org/apache/seatunnel/e2e/flink/v2/file/FakeSourceToFileIT.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.e2e.flink.v2.file; + +import org.apache.seatunnel.e2e.flink.FlinkContainer; + +import org.junit.Assert; +import org.junit.Test; +import org.testcontainers.containers.Container; + +import java.io.IOException; + +public class FakeSourceToFileIT extends FlinkContainer { + @Test + public void testFakeSourceToFileSink() throws IOException, InterruptedException { + Container.ExecResult execResult = executeSeaTunnelFlinkJob("/file/fakesource_to_file.conf"); + Assert.assertEquals(0, execResult.getExitCode()); + } +} diff --git a/seatunnel-e2e/seatunnel-flink-new-connector-e2e/src/test/resources/assertion/fakesource_to_assert.conf b/seatunnel-e2e/seatunnel-flink-connector-v2-e2e/src/test/resources/assertion/fakesource_to_assert.conf similarity index 100% rename from seatunnel-e2e/seatunnel-flink-new-connector-e2e/src/test/resources/assertion/fakesource_to_assert.conf rename to seatunnel-e2e/seatunnel-flink-connector-v2-e2e/src/test/resources/assertion/fakesource_to_assert.conf diff --git a/seatunnel-e2e/seatunnel-flink-new-connector-e2e/src/test/resources/fake/fakesource_to_console.conf b/seatunnel-e2e/seatunnel-flink-connector-v2-e2e/src/test/resources/fake/fakesource_to_console.conf similarity index 100% rename from seatunnel-e2e/seatunnel-flink-new-connector-e2e/src/test/resources/fake/fakesource_to_console.conf rename to seatunnel-e2e/seatunnel-flink-connector-v2-e2e/src/test/resources/fake/fakesource_to_console.conf diff --git a/seatunnel-e2e/seatunnel-flink-connector-v2-e2e/src/test/resources/file/fakesource_to_file.conf b/seatunnel-e2e/seatunnel-flink-connector-v2-e2e/src/test/resources/file/fakesource_to_file.conf new file mode 100644 index 00000000000..7e01a805f30 --- /dev/null +++ b/seatunnel-e2e/seatunnel-flink-connector-v2-e2e/src/test/resources/file/fakesource_to_file.conf @@ -0,0 +1,64 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +###### +###### This config file is a demonstration of streaming processing in seatunnel config +###### + +env { + # You can set flink configuration here + execution.parallelism = 1 + job.mode = "BATCH" + #execution.checkpoint.interval = 10000 + #execution.checkpoint.data-uri = "hdfs://localhost:9000/checkpoint" +} + +source { + # This is a example source plugin **only for test and demonstrate the feature source plugin** + FakeSource { + result_table_name = "fake" + field_name = "name,age" + } + + # If you would like to get more information about how to configure seatunnel and see full list of source plugins, + # please go to https://seatunnel.apache.org/docs/flink/configuration/source-plugins/Fake +} + +transform { + # If you would like to get more information about how to configure seatunnel and see full list of transform plugins, + # please go to https://seatunnel.apache.org/docs/flink/configuration/transform-plugins/Sql +} + +sink { + LocalFile { + path="file:///tmp/hive/warehouse/test2" + field_delimiter="\t" + row_delimiter="\n" + partition_by=["age"] + partition_dir_expression="${k0}=${v0}" + is_partition_field_write_in_file=true + file_name_expression="${transactionId}_${now}" + file_format="text" + sink_columns=["name","age"] + filename_time_format="yyyy.MM.dd" + is_enable_transaction=true + save_mode="error" + + } + + # If you would like to get more information about how to configure seatunnel and see full list of sink plugins, + # please go to https://seatunnel.apache.org/docs/flink/configuration/sink-plugins/Console +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-flink-e2e/src/test/java/org/apache/seatunnel/e2e/flink/clickhouse/FakeSourceToClickhouseIT.java b/seatunnel-e2e/seatunnel-flink-e2e/src/test/java/org/apache/seatunnel/e2e/flink/clickhouse/FakeSourceToClickhouseIT.java index adbea780648..1bf0fc6e5ac 100644 --- a/seatunnel-e2e/seatunnel-flink-e2e/src/test/java/org/apache/seatunnel/e2e/flink/clickhouse/FakeSourceToClickhouseIT.java +++ b/seatunnel-e2e/seatunnel-flink-e2e/src/test/java/org/apache/seatunnel/e2e/flink/clickhouse/FakeSourceToClickhouseIT.java @@ -50,7 +50,6 @@ public class FakeSourceToClickhouseIT extends FlinkContainer { private static final Logger LOGGER = LoggerFactory.getLogger(FakeSourceToClickhouseIT.class); @Before - @SuppressWarnings("magicnumber") public void startClickhouseContainer() throws InterruptedException { clickhouseServer = new GenericContainer<>(CLICKHOUSE_DOCKER_IMAGE) .withNetwork(NETWORK) diff --git a/seatunnel-e2e/seatunnel-spark-new-connector-e2e/pom.xml b/seatunnel-e2e/seatunnel-spark-connector-v2-e2e/pom.xml similarity index 96% rename from seatunnel-e2e/seatunnel-spark-new-connector-e2e/pom.xml rename to seatunnel-e2e/seatunnel-spark-connector-v2-e2e/pom.xml index 925075cbb18..732d9e51bf6 100644 --- a/seatunnel-e2e/seatunnel-spark-new-connector-e2e/pom.xml +++ b/seatunnel-e2e/seatunnel-spark-connector-v2-e2e/pom.xml @@ -24,7 +24,7 @@ 4.0.0 jar - seatunnel-spark-new-connector-e2e + seatunnel-spark-connector-v2-e2e diff --git a/seatunnel-e2e/seatunnel-spark-new-connector-e2e/src/test/java/org/apache/seatunnel/e2e/spark/SparkContainer.java b/seatunnel-e2e/seatunnel-spark-connector-v2-e2e/src/test/java/org/apache/seatunnel/e2e/spark/SparkContainer.java similarity index 100% rename from seatunnel-e2e/seatunnel-spark-new-connector-e2e/src/test/java/org/apache/seatunnel/e2e/spark/SparkContainer.java rename to seatunnel-e2e/seatunnel-spark-connector-v2-e2e/src/test/java/org/apache/seatunnel/e2e/spark/SparkContainer.java diff --git a/seatunnel-e2e/seatunnel-spark-new-connector-e2e/src/test/java/org/apache/seatunnel/e2e/spark/fake/FakeSourceToConsoleIT.java b/seatunnel-e2e/seatunnel-spark-connector-v2-e2e/src/test/java/org/apache/seatunnel/e2e/spark/v2/fake/FakeSourceToConsoleIT.java similarity index 94% rename from seatunnel-e2e/seatunnel-spark-new-connector-e2e/src/test/java/org/apache/seatunnel/e2e/spark/fake/FakeSourceToConsoleIT.java rename to seatunnel-e2e/seatunnel-spark-connector-v2-e2e/src/test/java/org/apache/seatunnel/e2e/spark/v2/fake/FakeSourceToConsoleIT.java index 2f9f1d1d7b1..0749ec06f21 100644 --- a/seatunnel-e2e/seatunnel-spark-new-connector-e2e/src/test/java/org/apache/seatunnel/e2e/spark/fake/FakeSourceToConsoleIT.java +++ b/seatunnel-e2e/seatunnel-spark-connector-v2-e2e/src/test/java/org/apache/seatunnel/e2e/spark/v2/fake/FakeSourceToConsoleIT.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.seatunnel.e2e.spark.fake; +package org.apache.seatunnel.e2e.spark.v2.fake; import org.apache.seatunnel.e2e.spark.SparkContainer; @@ -32,7 +32,6 @@ public class FakeSourceToConsoleIT extends SparkContainer { @Test - @SuppressWarnings("magicnumber") public void testFakeSourceToConsoleSine() throws IOException, InterruptedException { Container.ExecResult execResult = executeSeaTunnelSparkJob("/fake/fakesource_to_console.conf"); Assert.assertEquals(0, execResult.getExitCode()); diff --git a/seatunnel-e2e/seatunnel-spark-connector-v2-e2e/src/test/java/org/apache/seatunnel/e2e/spark/v2/file/FakeSourceToFileIT.java b/seatunnel-e2e/seatunnel-spark-connector-v2-e2e/src/test/java/org/apache/seatunnel/e2e/spark/v2/file/FakeSourceToFileIT.java new file mode 100644 index 00000000000..c2aeec5131e --- /dev/null +++ b/seatunnel-e2e/seatunnel-spark-connector-v2-e2e/src/test/java/org/apache/seatunnel/e2e/spark/v2/file/FakeSourceToFileIT.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.e2e.spark.v2.file; + +import org.apache.seatunnel.e2e.spark.SparkContainer; + +import org.junit.Assert; +import org.junit.Test; +import org.testcontainers.containers.Container; + +import java.io.IOException; + +/** + * This test case is used to verify that the fake source is able to send data to the console. + * Make sure the SeaTunnel job can submit successfully on spark engine. + */ +public class FakeSourceToFileIT extends SparkContainer { + + @Test + public void testFakeSourceToFile() throws IOException, InterruptedException { + Container.ExecResult execResult = executeSeaTunnelSparkJob("/file/fakesource_to_file.conf"); + Assert.assertEquals(0, execResult.getExitCode()); + } +} diff --git a/seatunnel-e2e/seatunnel-spark-new-connector-e2e/src/test/resources/fake/fakesource_to_console.conf b/seatunnel-e2e/seatunnel-spark-connector-v2-e2e/src/test/resources/fake/fakesource_to_console.conf similarity index 100% rename from seatunnel-e2e/seatunnel-spark-new-connector-e2e/src/test/resources/fake/fakesource_to_console.conf rename to seatunnel-e2e/seatunnel-spark-connector-v2-e2e/src/test/resources/fake/fakesource_to_console.conf diff --git a/seatunnel-e2e/seatunnel-spark-connector-v2-e2e/src/test/resources/file/fakesource_to_file.conf b/seatunnel-e2e/seatunnel-spark-connector-v2-e2e/src/test/resources/file/fakesource_to_file.conf new file mode 100644 index 00000000000..e70490855c3 --- /dev/null +++ b/seatunnel-e2e/seatunnel-spark-connector-v2-e2e/src/test/resources/file/fakesource_to_file.conf @@ -0,0 +1,68 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +###### +###### This config file is a demonstration of streaming processing in seatunnel config +###### + +env { + # You can set flink configuration here + execution.parallelism = 1 + job.mode = "BATCH" + #execution.checkpoint.interval = 10000 + #execution.checkpoint.data-uri = "hdfs://localhost:9000/checkpoint" +} + +source { + # This is a example source plugin **only for test and demonstrate the feature source plugin** + FakeSource { + result_table_name = "fake" + field_name = "name,age" + } + + # If you would like to get more information about how to configure seatunnel and see full list of source plugins, + # please go to https://seatunnel.apache.org/docs/flink/configuration/source-plugins/Fake +} + +transform { + + sql { + sql = "select name,age from fake" + } + # If you would like to get more information about how to configure seatunnel and see full list of transform plugins, + # please go to https://seatunnel.apache.org/docs/flink/configuration/transform-plugins/Sql +} + +sink { + LocalFile { + path="file:///tmp/hive/warehouse/test2" + field_delimiter="\t" + row_delimiter="\n" + partition_by=["age"] + partition_dir_expression="${k0}=${v0}" + is_partition_field_write_in_file=true + file_name_expression="${transactionId}_${now}" + file_format="text" + sink_columns=["name","age"] + filename_time_format="yyyy.MM.dd" + is_enable_transaction=true + save_mode="error" + + } + + # If you would like to get more information about how to configure seatunnel and see full list of sink plugins, + # please go to https://seatunnel.apache.org/docs/flink/configuration/sink-plugins/Console +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-spark-new-connector-e2e/src/test/resources/log4j.properties b/seatunnel-e2e/seatunnel-spark-new-connector-e2e/src/test/resources/log4j.properties deleted file mode 100644 index 89547981c79..00000000000 --- a/seatunnel-e2e/seatunnel-spark-new-connector-e2e/src/test/resources/log4j.properties +++ /dev/null @@ -1,22 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Set everything to be logged to the console -log4j.rootCategory=ERROR, console -log4j.appender.console=org.apache.log4j.ConsoleAppender -log4j.appender.console.target=System.err -log4j.appender.console.layout=org.apache.log4j.PatternLayout -log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n diff --git a/seatunnel-examples/pom.xml b/seatunnel-examples/pom.xml index 823b1404ecf..6025128c95d 100644 --- a/seatunnel-examples/pom.xml +++ b/seatunnel-examples/pom.xml @@ -33,8 +33,8 @@ seatunnel-flink-examples seatunnel-spark-examples seatunnel-flink-sql-examples - seatunnel-flink-new-connector-example - seatunnel-spark-new-connector-example + seatunnel-flink-connector-v2-example + seatunnel-spark-connector-v2-example diff --git a/seatunnel-examples/seatunnel-flink-new-connector-example/pom.xml b/seatunnel-examples/seatunnel-flink-connector-v2-example/pom.xml similarity index 93% rename from seatunnel-examples/seatunnel-flink-new-connector-example/pom.xml rename to seatunnel-examples/seatunnel-flink-connector-v2-example/pom.xml index c27915ac9d4..215a586f8a1 100644 --- a/seatunnel-examples/seatunnel-flink-new-connector-example/pom.xml +++ b/seatunnel-examples/seatunnel-flink-connector-v2-example/pom.xml @@ -27,7 +27,7 @@ 4.0.0 - seatunnel-flink-new-connector-example + seatunnel-flink-connector-v2-example compile @@ -56,6 +56,11 @@ connector-console ${project.version} + + org.apache.seatunnel + connector-file-local + ${project.version} + org.apache.seatunnel connector-socket @@ -103,4 +108,4 @@ - \ No newline at end of file + diff --git a/seatunnel-examples/seatunnel-flink-connector-v2-example/src/main/java/org/apache/seatunnel/example/flink/v2/FakeToLocalFileExample.java b/seatunnel-examples/seatunnel-flink-connector-v2-example/src/main/java/org/apache/seatunnel/example/flink/v2/FakeToLocalFileExample.java new file mode 100644 index 00000000000..fc15ff44b2d --- /dev/null +++ b/seatunnel-examples/seatunnel-flink-connector-v2-example/src/main/java/org/apache/seatunnel/example/flink/v2/FakeToLocalFileExample.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.example.flink.v2; + +import org.apache.seatunnel.core.starter.Seatunnel; +import org.apache.seatunnel.core.starter.command.Command; +import org.apache.seatunnel.core.starter.exception.CommandException; +import org.apache.seatunnel.core.starter.flink.args.FlinkCommandArgs; +import org.apache.seatunnel.core.starter.flink.command.FlinkCommandBuilder; + +import java.io.FileNotFoundException; +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.file.Paths; + +public class FakeToLocalFileExample { + + public static void main(String[] args) throws FileNotFoundException, URISyntaxException, CommandException { + String configFile = getTestConfigFile("/examples/fakesource_to_file.conf"); + FlinkCommandArgs flinkCommandArgs = new FlinkCommandArgs(); + flinkCommandArgs.setConfigFile(configFile); + flinkCommandArgs.setCheckConfig(false); + flinkCommandArgs.setVariables(null); + Command flinkCommand = + new FlinkCommandBuilder().buildCommand(flinkCommandArgs); + Seatunnel.run(flinkCommand); + } + + public static String getTestConfigFile(String configFile) throws FileNotFoundException, URISyntaxException { + URL resource = FakeToLocalFileExample.class.getResource(configFile); + if (resource == null) { + throw new FileNotFoundException("Can't find config file: " + configFile); + } + return Paths.get(resource.toURI()).toString(); + } +} diff --git a/seatunnel-examples/seatunnel-flink-new-connector-example/src/main/java/org/apache/seatunnel/example/flink/SeaTunnelApiExample.java b/seatunnel-examples/seatunnel-flink-connector-v2-example/src/main/java/org/apache/seatunnel/example/flink/v2/SeaTunnelApiExample.java similarity index 97% rename from seatunnel-examples/seatunnel-flink-new-connector-example/src/main/java/org/apache/seatunnel/example/flink/SeaTunnelApiExample.java rename to seatunnel-examples/seatunnel-flink-connector-v2-example/src/main/java/org/apache/seatunnel/example/flink/v2/SeaTunnelApiExample.java index 56a2882b677..79912ae9619 100644 --- a/seatunnel-examples/seatunnel-flink-new-connector-example/src/main/java/org/apache/seatunnel/example/flink/SeaTunnelApiExample.java +++ b/seatunnel-examples/seatunnel-flink-connector-v2-example/src/main/java/org/apache/seatunnel/example/flink/v2/SeaTunnelApiExample.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.seatunnel.example.flink; +package org.apache.seatunnel.example.flink.v2; import org.apache.seatunnel.core.starter.Seatunnel; import org.apache.seatunnel.core.starter.command.Command; diff --git a/seatunnel-examples/seatunnel-flink-new-connector-example/src/main/resources/examples/fake_to_console.conf b/seatunnel-examples/seatunnel-flink-connector-v2-example/src/main/resources/examples/fake_to_console.conf similarity index 100% rename from seatunnel-examples/seatunnel-flink-new-connector-example/src/main/resources/examples/fake_to_console.conf rename to seatunnel-examples/seatunnel-flink-connector-v2-example/src/main/resources/examples/fake_to_console.conf diff --git a/seatunnel-examples/seatunnel-flink-connector-v2-example/src/main/resources/examples/fakesource_to_file.conf b/seatunnel-examples/seatunnel-flink-connector-v2-example/src/main/resources/examples/fakesource_to_file.conf new file mode 100644 index 00000000000..f7b790c40b8 --- /dev/null +++ b/seatunnel-examples/seatunnel-flink-connector-v2-example/src/main/resources/examples/fakesource_to_file.conf @@ -0,0 +1,68 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +###### +###### This config file is a demonstration of streaming processing in seatunnel config +###### + +env { + # You can set flink configuration here + execution.parallelism = 1 + job.mode = "BATCH" + execution.checkpoint.interval = 10 + #execution.checkpoint.data-uri = "hdfs://localhost:9000/checkpoint" +} + +source { + # This is a example source plugin **only for test and demonstrate the feature source plugin** + FakeSource { + result_table_name = "fake" + field_name = "name,age" + } + + # If you would like to get more information about how to configure seatunnel and see full list of source plugins, + # please go to https://seatunnel.apache.org/docs/flink/configuration/source-plugins/Fake +} + +transform { + + sql { + sql = "select name,age from fake" + } + # If you would like to get more information about how to configure seatunnel and see full list of transform plugins, + # please go to https://seatunnel.apache.org/docs/flink/configuration/transform-plugins/Sql +} + +sink { + LocalFile { + path="file:///tmp/hive/warehouse/test2" + field_delimiter="\t" + row_delimiter="\n" + partition_by=["age"] + partition_dir_expression="${k0}=${v0}" + is_partition_field_write_in_file=true + file_name_expression="${transactionId}_${now}" + file_format="text" + sink_columns=["name","age"] + filename_time_format="yyyy.MM.dd" + is_enable_transaction=true + save_mode="error" + + } + + # If you would like to get more information about how to configure seatunnel and see full list of sink plugins, + # please go to https://seatunnel.apache.org/docs/flink/configuration/sink-plugins/Console +} \ No newline at end of file diff --git a/seatunnel-examples/seatunnel-spark-new-connector-example/pom.xml b/seatunnel-examples/seatunnel-spark-connector-v2-example/pom.xml similarity index 98% rename from seatunnel-examples/seatunnel-spark-new-connector-example/pom.xml rename to seatunnel-examples/seatunnel-spark-connector-v2-example/pom.xml index e75a88ce53d..8c3978e53d7 100644 --- a/seatunnel-examples/seatunnel-spark-new-connector-example/pom.xml +++ b/seatunnel-examples/seatunnel-spark-connector-v2-example/pom.xml @@ -27,7 +27,7 @@ 4.0.0 - seatunnel-spark-new-connector-example + seatunnel-spark-connector-v2-example compile @@ -80,12 +80,12 @@ ${spark.version} ${spark.scope} + net.jpountz.lz4 lz4 1.3.0 - \ No newline at end of file diff --git a/seatunnel-examples/seatunnel-spark-new-connector-example/src/main/java/org/apache/seatunnel/example/spark/SeaTunnelApiExample.java b/seatunnel-examples/seatunnel-spark-connector-v2-example/src/main/java/org/apache/seatunnel/example/spark/v2/SeaTunnelApiExample.java similarity index 94% rename from seatunnel-examples/seatunnel-spark-new-connector-example/src/main/java/org/apache/seatunnel/example/spark/SeaTunnelApiExample.java rename to seatunnel-examples/seatunnel-spark-connector-v2-example/src/main/java/org/apache/seatunnel/example/spark/v2/SeaTunnelApiExample.java index cdb988fba55..a4a84986cf3 100644 --- a/seatunnel-examples/seatunnel-spark-new-connector-example/src/main/java/org/apache/seatunnel/example/spark/SeaTunnelApiExample.java +++ b/seatunnel-examples/seatunnel-spark-connector-v2-example/src/main/java/org/apache/seatunnel/example/spark/v2/SeaTunnelApiExample.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.seatunnel.example.spark; +package org.apache.seatunnel.example.spark.v2; import org.apache.seatunnel.common.config.DeployMode; import org.apache.seatunnel.core.starter.Seatunnel; @@ -38,9 +38,9 @@ public static void main(String[] args) throws FileNotFoundException, URISyntaxEx sparkCommandArgs.setCheckConfig(false); sparkCommandArgs.setVariables(null); sparkCommandArgs.setDeployMode(DeployMode.CLIENT); - Command flinkCommand = + Command sparkCommand = new SparkCommandBuilder().buildCommand(sparkCommandArgs); - Seatunnel.run(flinkCommand); + Seatunnel.run(sparkCommand); } public static String getTestConfigFile(String configFile) throws FileNotFoundException, URISyntaxException { diff --git a/seatunnel-examples/seatunnel-spark-new-connector-example/src/main/resources/examples/spark.batch.conf b/seatunnel-examples/seatunnel-spark-connector-v2-example/src/main/resources/examples/spark.batch.conf similarity index 100% rename from seatunnel-examples/seatunnel-spark-new-connector-example/src/main/resources/examples/spark.batch.conf rename to seatunnel-examples/seatunnel-spark-connector-v2-example/src/main/resources/examples/spark.batch.conf diff --git a/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/AbstractPluginDiscovery.java b/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/AbstractPluginDiscovery.java index edbd788129d..529049455e8 100644 --- a/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/AbstractPluginDiscovery.java +++ b/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/AbstractPluginDiscovery.java @@ -20,6 +20,7 @@ import org.apache.seatunnel.api.common.PluginIdentifierInterface; import org.apache.seatunnel.apis.base.plugin.Plugin; import org.apache.seatunnel.common.config.Common; +import org.apache.seatunnel.common.utils.ReflectionUtils; import org.apache.seatunnel.shade.com.typesafe.config.Config; import org.apache.seatunnel.shade.com.typesafe.config.ConfigValue; @@ -29,6 +30,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import javax.annotation.Nullable; + import java.io.File; import java.io.FileFilter; import java.net.MalformedURLException; @@ -40,6 +43,7 @@ import java.util.Optional; import java.util.ServiceLoader; import java.util.concurrent.ConcurrentHashMap; +import java.util.function.BiConsumer; import java.util.stream.Collectors; public abstract class AbstractPluginDiscovery implements PluginDiscovery { @@ -47,10 +51,26 @@ public abstract class AbstractPluginDiscovery implements PluginDiscovery { private static final Logger LOGGER = LoggerFactory.getLogger(AbstractPluginDiscovery.class); private final Path pluginDir; - protected final ConcurrentHashMap> pluginInstanceMap = - new ConcurrentHashMap<>(Common.COLLECTION_SIZE); + /** + * Add jar url to classloader. The different engine should have different logic to add url into + * their own classloader + */ + private BiConsumer addURLToClassLoader = (classLoader, url) -> { + if (classLoader instanceof URLClassLoader) { + ReflectionUtils.invoke(classLoader, "addURL", url); + } else { + throw new UnsupportedOperationException("can't support custom load jar"); + } + }; + protected final ConcurrentHashMap> pluginJarPath = - new ConcurrentHashMap<>(Common.COLLECTION_SIZE); + new ConcurrentHashMap<>(Common.COLLECTION_SIZE); + + public AbstractPluginDiscovery(String pluginSubDir, BiConsumer addURLToClassloader) { + this.pluginDir = Common.connectorJarDir(pluginSubDir); + this.addURLToClassLoader = addURLToClassloader; + LOGGER.info("Load {} Plugin from {}", getPluginBaseClass().getSimpleName(), pluginDir); + } public AbstractPluginDiscovery(String pluginSubDir) { this.pluginDir = Common.connectorJarDir(pluginSubDir); @@ -60,27 +80,69 @@ public AbstractPluginDiscovery(String pluginSubDir) { @Override public List getPluginJarPaths(List pluginIdentifiers) { return pluginIdentifiers.stream() - .map(this::getPluginJarPath) - .filter(Optional::isPresent) - .map(Optional::get).distinct() - .collect(Collectors.toList()); + .map(this::getPluginJarPath) + .filter(Optional::isPresent) + .map(Optional::get).distinct() + .collect(Collectors.toList()); } @Override public List getAllPlugins(List pluginIdentifiers) { return pluginIdentifiers.stream() - .map(this::getPluginInstance).distinct() + .map(this::createPluginInstance).distinct() .collect(Collectors.toList()); } @Override - public T getPluginInstance(PluginIdentifier pluginIdentifier) { - Optional pluginInstance = pluginInstanceMap - .computeIfAbsent(pluginIdentifier, this::createPluginInstance); - if (!pluginInstance.isPresent()) { - throw new IllegalArgumentException("Can't find plugin: " + pluginIdentifier); + public T createPluginInstance(PluginIdentifier pluginIdentifier) { + ClassLoader classLoader = Thread.currentThread().getContextClassLoader(); + T pluginInstance = loadPluginInstance(pluginIdentifier, classLoader); + if (pluginInstance != null) { + LOGGER.info("Load plugin: {} from classpath", pluginIdentifier); + return pluginInstance; + } + Optional pluginJarPath = getPluginJarPath(pluginIdentifier); + // if the plugin jar not exist in classpath, will load from plugin dir. + if (pluginJarPath.isPresent()) { + try { + // use current thread classloader to avoid different classloader load same class error. + this.addURLToClassLoader.accept(classLoader, pluginJarPath.get()); + } catch (Exception e) { + LOGGER.warn("can't load jar use current thread classloader, use URLClassLoader instead now." + + " message: " + e.getMessage()); + classLoader = new URLClassLoader(new URL[]{pluginJarPath.get()}, Thread.currentThread().getContextClassLoader()); + } + pluginInstance = loadPluginInstance(pluginIdentifier, classLoader); + if (pluginInstance != null) { + LOGGER.info("Load plugin: {} from path: {} use classloader: {}", + pluginIdentifier, pluginJarPath.get(), classLoader.getClass().getName()); + return pluginInstance; + } + } + throw new RuntimeException("Plugin " + pluginIdentifier + " not found."); + } + + @Nullable + private T loadPluginInstance(PluginIdentifier pluginIdentifier, ClassLoader classLoader) { + ServiceLoader serviceLoader = ServiceLoader.load(getPluginBaseClass(), classLoader); + for (T t : serviceLoader) { + if (t instanceof Plugin) { + // old api + Plugin pluginInstance = (Plugin) t; + if (StringUtils.equalsIgnoreCase(pluginInstance.getPluginName(), pluginIdentifier.getPluginName())) { + return (T) pluginInstance; + } + } else if (t instanceof PluginIdentifierInterface) { + // new api + PluginIdentifierInterface pluginIdentifierInstance = (PluginIdentifierInterface) t; + if (StringUtils.equalsIgnoreCase(pluginIdentifierInstance.getPluginName(), pluginIdentifier.getPluginName())) { + return (T) pluginIdentifierInstance; + } + } else { + throw new UnsupportedOperationException("Plugin instance: " + t + " is not supported."); + } } - return pluginInstance.get(); + return null; } /** @@ -146,36 +208,4 @@ public boolean accept(File pathname) { return Optional.empty(); } } - - private Optional createPluginInstance(PluginIdentifier pluginIdentifier) { - Optional pluginJarPath = getPluginJarPath(pluginIdentifier); - ClassLoader classLoader; - // if the plugin jar not exist in plugin dir, will load from classpath. - if (pluginJarPath.isPresent()) { - LOGGER.info("Load plugin: {} from path: {}", pluginIdentifier, pluginJarPath.get()); - classLoader = new URLClassLoader(new URL[]{pluginJarPath.get()}, Thread.currentThread().getContextClassLoader()); - } else { - LOGGER.info("Load plugin: {} from classpath", pluginIdentifier); - classLoader = Thread.currentThread().getContextClassLoader(); - } - ServiceLoader serviceLoader = ServiceLoader.load(getPluginBaseClass(), classLoader); - for (T t : serviceLoader) { - if (t instanceof Plugin) { - // old api - Plugin pluginInstance = (Plugin) t; - if (StringUtils.equalsIgnoreCase(pluginInstance.getPluginName(), pluginIdentifier.getPluginName())) { - return Optional.of((T) pluginInstance); - } - } else if (t instanceof PluginIdentifierInterface) { - // new api - PluginIdentifierInterface pluginIdentifierInstance = (PluginIdentifierInterface) t; - if (StringUtils.equalsIgnoreCase(pluginIdentifierInstance.getPluginName(), pluginIdentifier.getPluginName())) { - return Optional.of((T) pluginIdentifierInstance); - } - } else { - throw new UnsupportedOperationException("Plugin instance: " + t + " is not supported."); - } - } - return Optional.empty(); - } } diff --git a/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/PluginDiscovery.java b/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/PluginDiscovery.java index cdc85860dd5..8a571f92c2f 100644 --- a/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/PluginDiscovery.java +++ b/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/PluginDiscovery.java @@ -58,7 +58,7 @@ public interface PluginDiscovery { * @param pluginIdentifier plugin identifier. * @return plugin instance. If not found, throw IllegalArgumentException. */ - T getPluginInstance(PluginIdentifier pluginIdentifier); + T createPluginInstance(PluginIdentifier pluginIdentifier); /** * Get all plugin instances. diff --git a/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/PluginIdentifier.java b/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/PluginIdentifier.java index 97d6e9f81db..37e322b7390 100644 --- a/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/PluginIdentifier.java +++ b/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/PluginIdentifier.java @@ -70,7 +70,6 @@ public boolean equals(Object o) { } @Override - @SuppressWarnings("checkstyle:magicnumber") public int hashCode() { int result = engineType != null ? engineType.hashCode() : 0; result = 31 * result + (pluginType != null ? pluginType.hashCode() : 0); diff --git a/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/flink/FlinkAbstractPluginDiscovery.java b/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/flink/FlinkAbstractPluginDiscovery.java new file mode 100644 index 00000000000..a9956fd3fff --- /dev/null +++ b/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/flink/FlinkAbstractPluginDiscovery.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.plugin.discovery.flink; + +import org.apache.seatunnel.common.utils.ReflectionUtils; +import org.apache.seatunnel.plugin.discovery.AbstractPluginDiscovery; + +import java.net.URLClassLoader; + +public abstract class FlinkAbstractPluginDiscovery extends AbstractPluginDiscovery { + + public FlinkAbstractPluginDiscovery(String pluginSubDir) { + super(pluginSubDir, (classLoader, url) -> { + if (classLoader.getClass().getName().endsWith("SafetyNetWrapperClassLoader")) { + URLClassLoader c = (URLClassLoader) ReflectionUtils.getField(classLoader, "inner").get(); + ReflectionUtils.invoke(c, "addURL", url); + } else if (classLoader instanceof URLClassLoader) { + ReflectionUtils.invoke(classLoader, "addURL", url); + } else { + throw new RuntimeException("Unsupported classloader: " + classLoader.getClass().getName()); + } + }); + } + +} diff --git a/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/flink/FlinkSinkPluginDiscovery.java b/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/flink/FlinkSinkPluginDiscovery.java index a2185a4ae3d..8c973cd0ef4 100644 --- a/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/flink/FlinkSinkPluginDiscovery.java +++ b/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/flink/FlinkSinkPluginDiscovery.java @@ -18,9 +18,8 @@ package org.apache.seatunnel.plugin.discovery.flink; import org.apache.seatunnel.flink.BaseFlinkSink; -import org.apache.seatunnel.plugin.discovery.AbstractPluginDiscovery; -public class FlinkSinkPluginDiscovery extends AbstractPluginDiscovery { +public class FlinkSinkPluginDiscovery extends FlinkAbstractPluginDiscovery { public FlinkSinkPluginDiscovery() { super("flink"); diff --git a/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/flink/FlinkSourcePluginDiscovery.java b/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/flink/FlinkSourcePluginDiscovery.java index 24ff89e0325..fd9e415642b 100644 --- a/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/flink/FlinkSourcePluginDiscovery.java +++ b/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/flink/FlinkSourcePluginDiscovery.java @@ -18,9 +18,8 @@ package org.apache.seatunnel.plugin.discovery.flink; import org.apache.seatunnel.flink.BaseFlinkSource; -import org.apache.seatunnel.plugin.discovery.AbstractPluginDiscovery; -public class FlinkSourcePluginDiscovery extends AbstractPluginDiscovery { +public class FlinkSourcePluginDiscovery extends FlinkAbstractPluginDiscovery { public FlinkSourcePluginDiscovery() { super("flink"); } diff --git a/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/flink/FlinkTransformPluginDiscovery.java b/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/flink/FlinkTransformPluginDiscovery.java index cc77b49a0bd..12a91d0881f 100644 --- a/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/flink/FlinkTransformPluginDiscovery.java +++ b/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/flink/FlinkTransformPluginDiscovery.java @@ -18,14 +18,13 @@ package org.apache.seatunnel.plugin.discovery.flink; import org.apache.seatunnel.flink.BaseFlinkTransform; -import org.apache.seatunnel.plugin.discovery.AbstractPluginDiscovery; import org.apache.seatunnel.plugin.discovery.PluginIdentifier; import java.net.URL; import java.util.ArrayList; import java.util.List; -public class FlinkTransformPluginDiscovery extends AbstractPluginDiscovery { +public class FlinkTransformPluginDiscovery extends FlinkAbstractPluginDiscovery { public FlinkTransformPluginDiscovery() { super("flink"); diff --git a/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/seatunnel/SeaTunnelSinkPluginDiscovery.java b/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/seatunnel/SeaTunnelSinkPluginDiscovery.java index d3286c544b1..e2ca9427e20 100644 --- a/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/seatunnel/SeaTunnelSinkPluginDiscovery.java +++ b/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/seatunnel/SeaTunnelSinkPluginDiscovery.java @@ -20,12 +20,19 @@ import org.apache.seatunnel.api.sink.SeaTunnelSink; import org.apache.seatunnel.plugin.discovery.AbstractPluginDiscovery; +import java.net.URL; +import java.util.function.BiConsumer; + public class SeaTunnelSinkPluginDiscovery extends AbstractPluginDiscovery { public SeaTunnelSinkPluginDiscovery() { super("seatunnel"); } + public SeaTunnelSinkPluginDiscovery(BiConsumer addURLToClassLoader) { + super("seatunnel", addURLToClassLoader); + } + @Override protected Class getPluginBaseClass() { return SeaTunnelSink.class; diff --git a/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/seatunnel/SeaTunnelSourcePluginDiscovery.java b/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/seatunnel/SeaTunnelSourcePluginDiscovery.java index 8618e037819..f9da2a0a9b8 100644 --- a/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/seatunnel/SeaTunnelSourcePluginDiscovery.java +++ b/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/seatunnel/SeaTunnelSourcePluginDiscovery.java @@ -20,11 +20,19 @@ import org.apache.seatunnel.api.source.SeaTunnelSource; import org.apache.seatunnel.plugin.discovery.AbstractPluginDiscovery; +import java.net.URL; +import java.util.function.BiConsumer; + public class SeaTunnelSourcePluginDiscovery extends AbstractPluginDiscovery { + public SeaTunnelSourcePluginDiscovery() { super("seatunnel"); } + public SeaTunnelSourcePluginDiscovery(BiConsumer addURLToClassLoader) { + super("seatunnel", addURLToClassLoader); + } + @Override protected Class getPluginBaseClass() { return SeaTunnelSource.class; diff --git a/seatunnel-server/pom.xml b/seatunnel-server/pom.xml new file mode 100644 index 00000000000..dc936151072 --- /dev/null +++ b/seatunnel-server/pom.xml @@ -0,0 +1,32 @@ + + + + + seatunnel + org.apache.seatunnel + ${revision} + + 4.0.0 + + seatunnel-server + pom + + seatunnel-app + + + \ No newline at end of file diff --git a/seatunnel-server/seatunnel-app/pom.xml b/seatunnel-server/seatunnel-app/pom.xml new file mode 100644 index 00000000000..eeb6b7ced5a --- /dev/null +++ b/seatunnel-server/seatunnel-app/pom.xml @@ -0,0 +1,170 @@ + + + + + seatunnel-server + org.apache.seatunnel + ${revision} + + 4.0.0 + + seatunnel-app + + + + + + org.springframework.boot + spring-boot-starter-web + + + org.springframework.boot + spring-boot-starter-tomcat + + + log4j-to-slf4j + org.apache.logging.log4j + + + + + + org.springframework.boot + spring-boot-starter-jetty + + + org.eclipse.jetty.websocket + javax-websocket-server-impl + + + org.eclipse.jetty.websocket + websocket-server + + + + + + org.mybatis.spring.boot + mybatis-spring-boot-starter + ${mybatis-spring-boot-starter.version} + + + spring-beans + org.springframework + + + spring-boot-autoconfigure + org.springframework.boot + + + spring-core + org.springframework + + + spring-boot-starter + org.springframework.boot + + + + + + io.springfox + springfox-swagger2 + ${springfox-swagger.version} + + + spring-aop + org.springframework + + + spring-beans + org.springframework + + + spring-context + org.springframework + + + + + + io.springfox + springfox-swagger-ui + ${springfox-swagger.version} + + + + io.swagger + swagger-annotations + ${swagger-annotations.version} + + + + org.hibernate.validator + hibernate-validator + + + classmate + com.fasterxml + + + + + + org.apache.commons + commons-lang3 + + + + mysql + mysql-connector-java + provided + + + + org.scala-lang + scala-library + provided + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + 1.8 + 1.8 + + + + org.springframework.boot + spring-boot-maven-plugin + ${spring-boot.version} + + + + repackage + + + + + + + \ No newline at end of file diff --git a/seatunnel-server/seatunnel-app/src/main/java/org/apache/seatunnel/app/SeatunnelApplication.java b/seatunnel-server/seatunnel-app/src/main/java/org/apache/seatunnel/app/SeatunnelApplication.java new file mode 100644 index 00000000000..757e863fdba --- /dev/null +++ b/seatunnel-server/seatunnel-app/src/main/java/org/apache/seatunnel/app/SeatunnelApplication.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.app; + +import org.mybatis.spring.annotation.MapperScan; +import org.springframework.boot.SpringApplication; +import org.springframework.boot.autoconfigure.SpringBootApplication; +import org.springframework.boot.context.properties.EnableConfigurationProperties; +import org.springframework.scheduling.annotation.EnableAsync; +import org.springframework.scheduling.annotation.EnableScheduling; +import org.springframework.transaction.annotation.EnableTransactionManagement; + +@SpringBootApplication +@EnableTransactionManagement +@EnableConfigurationProperties +@EnableScheduling +@EnableAsync(proxyTargetClass = true) +@MapperScan({"org.apache.seatunnel.app.dal"}) +public class SeatunnelApplication { + public static void main(String[] args) { + SpringApplication.run(SeatunnelApplication.class, args); + } +} diff --git a/seatunnel-server/seatunnel-app/src/main/java/org/apache/seatunnel/app/common/DatasourceStatusEnum.java b/seatunnel-server/seatunnel-app/src/main/java/org/apache/seatunnel/app/common/DatasourceStatusEnum.java new file mode 100644 index 00000000000..b271a5b0433 --- /dev/null +++ b/seatunnel-server/seatunnel-app/src/main/java/org/apache/seatunnel/app/common/DatasourceStatusEnum.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.app.common; + +public enum DatasourceStatusEnum { + ONLINE(1, "online"), + OFFLINE(2, "offline"), + DELETED(3, "deleted"), + ; + + private final int code; + private final String description; + + DatasourceStatusEnum(int code, String description) { + this.code = code; + this.description = description; + } + + public int getCode() { + return code; + } + + public String getDescription() { + return description; + } +} diff --git a/seatunnel-server/seatunnel-app/src/main/java/org/apache/seatunnel/app/common/Result.java b/seatunnel-server/seatunnel-app/src/main/java/org/apache/seatunnel/app/common/Result.java new file mode 100644 index 00000000000..79d7ae12b85 --- /dev/null +++ b/seatunnel-server/seatunnel-app/src/main/java/org/apache/seatunnel/app/common/Result.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.app.common; + +public class Result { + + private static final Result OK = success(); + + private int code = 0; + + private String msg; + + private T data; + + private Result() { + this.data = null; + } + + private Result(SeatunnelErrorEnum errorEnum) { + this.code = errorEnum.getCode(); + this.msg = errorEnum.getMsg(); + this.data = null; + } + + private Result(SeatunnelErrorEnum errorEnum, String... messages) { + this.code = errorEnum.getCode(); + this.msg = String.format(errorEnum.getTemplate(), messages); + this.data = null; + } + + public static Result success() { + return new Result<>(); + } + + public static Result success(T data) { + Result result = success(); + result.setData(data); + return result; + } + + public static Result failure(SeatunnelErrorEnum errorEnum) { + Result result = new Result<>(errorEnum); + return result; + } + + public static Result failure(SeatunnelErrorEnum errorEnum, String... messages) { + Result result = new Result<>(errorEnum, messages); + return result; + } + + public boolean isSuccess() { + return OK.getCode() == this.code; + } + + public boolean isFailed() { + return !this.isSuccess(); + } + + public int getCode() { + return code; + } + + public void setCode(int code) { + this.code = code; + } + + public String getMsg() { + return msg; + } + + public void setMsg(String msg) { + this.msg = msg; + } + + public T getData() { + return data; + } + + public void setData(T data) { + this.data = data; + } +} diff --git a/seatunnel-server/seatunnel-app/src/main/java/org/apache/seatunnel/app/common/ScriptParamStatusEnum.java b/seatunnel-server/seatunnel-app/src/main/java/org/apache/seatunnel/app/common/ScriptParamStatusEnum.java new file mode 100644 index 00000000000..56195dd8de8 --- /dev/null +++ b/seatunnel-server/seatunnel-app/src/main/java/org/apache/seatunnel/app/common/ScriptParamStatusEnum.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.app.common; + +public enum ScriptParamStatusEnum { + NORMAL(0, "normal"), + DELETED(1, "deleted"), + ; + + private final int code; + private final String description; + + ScriptParamStatusEnum(int code, String description) { + this.code = code; + this.description = description; + } + + public int getCode() { + return code; + } + + public String getDescription() { + return description; + } +} diff --git a/seatunnel-server/seatunnel-app/src/main/java/org/apache/seatunnel/app/common/ScriptStatusEnum.java b/seatunnel-server/seatunnel-app/src/main/java/org/apache/seatunnel/app/common/ScriptStatusEnum.java new file mode 100644 index 00000000000..17704a217b0 --- /dev/null +++ b/seatunnel-server/seatunnel-app/src/main/java/org/apache/seatunnel/app/common/ScriptStatusEnum.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.app.common; + +public enum ScriptStatusEnum { + UNPUBLISHED(0, "unpublished"), + PUBLISHED(1, "published"), + DELETED(2, "deleted"), + ; + + private final int code; + private final String description; + + ScriptStatusEnum(int code, String description) { + this.code = code; + this.description = description; + } + + public int getCode() { + return code; + } + + public String getDescription() { + return description; + } +} diff --git a/seatunnel-server/seatunnel-app/src/main/java/org/apache/seatunnel/app/common/ScriptTypeEnum.java b/seatunnel-server/seatunnel-app/src/main/java/org/apache/seatunnel/app/common/ScriptTypeEnum.java new file mode 100644 index 00000000000..89d627c6041 --- /dev/null +++ b/seatunnel-server/seatunnel-app/src/main/java/org/apache/seatunnel/app/common/ScriptTypeEnum.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.app.common; + +public enum ScriptTypeEnum { + OFFLINE(0, "offline"), + REALTIME(1, "realtime"), + ; + + private final int code; + private final String description; + + ScriptTypeEnum(int code, String description) { + this.code = code; + this.description = description; + } + + public int getCode() { + return code; + } + + public String getDescription() { + return description; + } +} diff --git a/seatunnel-server/seatunnel-app/src/main/java/org/apache/seatunnel/app/common/SeatunnelErrorEnum.java b/seatunnel-server/seatunnel-app/src/main/java/org/apache/seatunnel/app/common/SeatunnelErrorEnum.java new file mode 100644 index 00000000000..5e54ef5fac7 --- /dev/null +++ b/seatunnel-server/seatunnel-app/src/main/java/org/apache/seatunnel/app/common/SeatunnelErrorEnum.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.app.common; + +public enum SeatunnelErrorEnum { + + SCRIPT_ALREADY_EXIST(10001, "script already exist", "You already have a script with the same name : '%s'"), + NO_SUCH_SCRIPT(10002, "no such script", "No such script. Maybe deleted by others."), + USER_ALREADY_EXISTS(10003, "user already exist", "The same username [%s] is exist."), + NO_SUCH_USER(10002, "no such user", "No such user. Maybe deleted by others."), + ILLEGAL_STATE(99998, "illegal state", "%s"), + UNKNOWN(99999, "unknown exception", "%s") + ; + + private final int code; + private final String msg; + private final String template; + + SeatunnelErrorEnum(int code, String msg, String template) { + this.code = code; + this.msg = msg; + this.template = template; + } + + public int getCode() { + return code; + } + + public String getMsg() { + return msg; + } + + public String getTemplate() { + return template; + } +} diff --git a/seatunnel-server/seatunnel-app/src/main/java/org/apache/seatunnel/app/common/SeatunnelException.java b/seatunnel-server/seatunnel-app/src/main/java/org/apache/seatunnel/app/common/SeatunnelException.java new file mode 100644 index 00000000000..e683296311c --- /dev/null +++ b/seatunnel-server/seatunnel-app/src/main/java/org/apache/seatunnel/app/common/SeatunnelException.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.app.common; + +import java.util.Arrays; + +public class SeatunnelException extends RuntimeException{ + private SeatunnelErrorEnum errorEnum; + + public SeatunnelException(SeatunnelErrorEnum e) { + super(e.getMsg()); + this.errorEnum = e; + } + + public SeatunnelException(SeatunnelErrorEnum e, String... msg) { + super(e.getMsg().concat(" ").concat(Arrays.toString(msg))); + this.errorEnum = e; + } + + public static SeatunnelException newInstance(SeatunnelErrorEnum e, String... msg) { + return new SeatunnelException(e, msg); + + } + + public static SeatunnelException newInstance(SeatunnelErrorEnum e) { + return new SeatunnelException(e); + + } + + public SeatunnelErrorEnum getErrorEnum() { + return errorEnum; + } +} diff --git a/seatunnel-server/seatunnel-app/src/main/java/org/apache/seatunnel/app/common/UserStatusEnum.java b/seatunnel-server/seatunnel-app/src/main/java/org/apache/seatunnel/app/common/UserStatusEnum.java new file mode 100644 index 00000000000..c62b7e1cf73 --- /dev/null +++ b/seatunnel-server/seatunnel-app/src/main/java/org/apache/seatunnel/app/common/UserStatusEnum.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.app.common; + +public enum UserStatusEnum { + ENABLE(0, "enable"), + DISABLE(1, "disable"), + ; + private final int code; + private final String description; + + UserStatusEnum(int code, String description) { + this.code = code; + this.description = description; + } + + public int getCode() { + return code; + } + + public String getDescription() { + return description; + } +} diff --git a/seatunnel-server/seatunnel-app/src/main/java/org/apache/seatunnel/app/common/UserTypeEnum.java b/seatunnel-server/seatunnel-app/src/main/java/org/apache/seatunnel/app/common/UserTypeEnum.java new file mode 100644 index 00000000000..1b050c51697 --- /dev/null +++ b/seatunnel-server/seatunnel-app/src/main/java/org/apache/seatunnel/app/common/UserTypeEnum.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.app.common; + +public enum UserTypeEnum { + NORMAL(0, "normal"), + ; + + private final int code; + private final String description; + + UserTypeEnum(int code, String description) { + this.code = code; + this.description = description; + } + + public int getCode() { + return code; + } + + public String getDescription() { + return description; + } +} diff --git a/seatunnel-server/seatunnel-app/src/main/java/org/apache/seatunnel/app/config/Swagger2.java b/seatunnel-server/seatunnel-app/src/main/java/org/apache/seatunnel/app/config/Swagger2.java new file mode 100644 index 00000000000..f8a5718da5a --- /dev/null +++ b/seatunnel-server/seatunnel-app/src/main/java/org/apache/seatunnel/app/config/Swagger2.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.app.config; + +import com.fasterxml.classmate.TypeResolver; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.web.servlet.config.annotation.EnableWebMvc; +import org.springframework.web.servlet.config.annotation.ResourceHandlerRegistry; +import org.springframework.web.servlet.config.annotation.ViewControllerRegistry; +import org.springframework.web.servlet.config.annotation.WebMvcConfigurer; +import springfox.documentation.builders.ApiInfoBuilder; +import springfox.documentation.builders.PathSelectors; +import springfox.documentation.builders.RequestHandlerSelectors; +import springfox.documentation.service.ApiInfo; +import springfox.documentation.spi.DocumentationType; +import springfox.documentation.spring.web.plugins.Docket; +import springfox.documentation.swagger2.annotations.EnableSwagger2; + +@Configuration +@EnableSwagger2 +@EnableWebMvc +public class Swagger2 implements WebMvcConfigurer { + @Autowired + private TypeResolver typeResolver; + + @Bean + public Docket createRestApi() { + + return new Docket(DocumentationType.SWAGGER_2) + .apiInfo(apiInfo()) + .select() + .apis(RequestHandlerSelectors.basePackage("org.apache.seatunnel.app.controller")) + .paths(PathSelectors.any()) + .build(); + + } + + @Override + public void addViewControllers(ViewControllerRegistry registry) { + registry.addRedirectViewController("/api/v2/api-docs", "/v2/api-docs"); + registry.addRedirectViewController("/api/swagger-resources/configuration/ui", "/swagger-resources/configuration/ui"); + registry.addRedirectViewController("/api/swagger-resources/configuration/security", "/swagger-resources/configuration/security"); + registry.addRedirectViewController("/api/swagger-resources", "/swagger-resources"); + + registry.addRedirectViewController("/api/null/api-docs", + "/api-docs").setKeepQueryParams(true); + registry.addRedirectViewController("/api/null/swagger-resources/configuration/ui", + "/swagger-resources/configuration/ui"); + registry.addRedirectViewController("/api/null/swagger-resources/configuration/security", + "/swagger-resources/configuration/security"); + registry.addRedirectViewController("/api/null/swagger-resources", "/swagger-resources"); + } + + @Override + public void addResourceHandlers(ResourceHandlerRegistry registry) { + registry.addResourceHandler("/api/swagger-ui.html**").addResourceLocations("classpath:/META-INF/resources/swagger-ui.html"); + registry.addResourceHandler("/api/webjars/**").addResourceLocations("classpath:/META-INF/resources/webjars/"); + registry.addResourceHandler("/doc.html**").addResourceLocations("classpath:/META-INF/resources/"); + } + + private ApiInfo apiInfo() { + return new ApiInfoBuilder() + .title("seatunnel api docs") + .version("1.0.0") + .description("API description of Seatunnel") + .termsOfServiceUrl("https://seatunnel.apache.org/") + .build(); + } + +} diff --git a/seatunnel-server/seatunnel-app/src/main/java/org/apache/seatunnel/app/controller/ScriptController.java b/seatunnel-server/seatunnel-app/src/main/java/org/apache/seatunnel/app/controller/ScriptController.java new file mode 100644 index 00000000000..a17d0fc12b9 --- /dev/null +++ b/seatunnel-server/seatunnel-app/src/main/java/org/apache/seatunnel/app/controller/ScriptController.java @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.app.controller; + +import org.apache.seatunnel.app.common.Result; +import org.apache.seatunnel.app.domain.request.script.AddEmptyScriptReq; +import org.apache.seatunnel.app.domain.request.script.ScriptListReq; +import org.apache.seatunnel.app.domain.request.script.UpdateScriptContentReq; +import org.apache.seatunnel.app.domain.request.script.UpdateScriptParamReq; +import org.apache.seatunnel.app.domain.response.script.AddEmptyScriptRes; +import org.apache.seatunnel.app.domain.response.script.ScriptParamRes; +import org.apache.seatunnel.app.domain.response.script.ScriptSimpleInfoRes; +import org.apache.seatunnel.app.service.IScriptService; + +import io.swagger.annotations.ApiImplicitParam; +import io.swagger.annotations.ApiImplicitParams; +import io.swagger.annotations.ApiOperation; +import org.springframework.web.bind.annotation.DeleteMapping; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.PutMapping; +import org.springframework.web.bind.annotation.RequestBody; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RestController; + +import javax.annotation.Resource; +import javax.validation.constraints.NotNull; + +import java.util.List; + +@RequestMapping("/api/v1/script") +@RestController +public class ScriptController { + @Resource + private IScriptService iScriptService; + + @PostMapping("/script") + @ApiOperation(value = "add an empty script", httpMethod = "POST") + public Result addEmptyScript(@RequestBody @NotNull AddEmptyScriptReq addEmptyScriptReq) { + return Result.success(iScriptService.addEmptyScript(addEmptyScriptReq)); + } + + @PutMapping("/scriptContent") + @ApiOperation(value = "update script", httpMethod = "PUT") + public Result updateScriptContent(@RequestBody @NotNull UpdateScriptContentReq updateScriptContentReq) { + iScriptService.updateScriptContent(updateScriptContentReq); + return Result.success(); + } + + @DeleteMapping("/script") + @ApiOperation(value = "delete script", httpMethod = "DELETE") + @ApiImplicitParams({ + @ApiImplicitParam(name = "id", value = "script id", dataType = "Integer"), + }) + public Result delete(@RequestParam @NotNull Integer id) { + iScriptService.delete(id); + return Result.success(); + } + + @PostMapping("/list") + @ApiOperation(value = "script list", httpMethod = "POST") + public Result> list(@RequestBody @NotNull ScriptListReq scriptListReq) { + return Result.success(iScriptService.list(scriptListReq)); + } + + @GetMapping("/scriptContent") + @ApiOperation(value = "fetch script content", httpMethod = "GET") + @ApiImplicitParams({ + @ApiImplicitParam(name = "id", value = "script id", dataType = "Integer"), + }) + public Result fetchScriptContent(@RequestParam @NotNull Integer id) { + return Result.success(iScriptService.fetchScriptContent(id)); + } + + @PutMapping("/scriptParam") + @ApiOperation(value = "update script param", httpMethod = "PUT") + public Result updateScriptParam(@RequestBody @NotNull UpdateScriptParamReq updateScriptParamReq) { + iScriptService.updateScriptParam(updateScriptParamReq); + return Result.success(); + } + + @GetMapping("/scriptParam") + @ApiOperation(value = "fetch script param", httpMethod = "GET") + @ApiImplicitParams({ + @ApiImplicitParam(name = "id", value = "script id", dataType = "Integer"), + }) + public Result> fetchScriptParam(@RequestParam @NotNull Integer id) { + return Result.success(iScriptService.fetchScriptParam(id)); + } +} diff --git a/seatunnel-server/seatunnel-app/src/main/java/org/apache/seatunnel/app/controller/UserController.java b/seatunnel-server/seatunnel-app/src/main/java/org/apache/seatunnel/app/controller/UserController.java new file mode 100644 index 00000000000..ec5c16d714a --- /dev/null +++ b/seatunnel-server/seatunnel-app/src/main/java/org/apache/seatunnel/app/controller/UserController.java @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.app.controller; + +import org.apache.seatunnel.app.common.Result; +import org.apache.seatunnel.app.domain.request.user.AddUserReq; +import org.apache.seatunnel.app.domain.request.user.UpdateUserReq; +import org.apache.seatunnel.app.domain.request.user.UserListReq; +import org.apache.seatunnel.app.domain.response.user.AddUserRes; +import org.apache.seatunnel.app.domain.response.user.UserSimpleInfoRes; +import org.apache.seatunnel.app.service.IUserService; + +import io.swagger.annotations.ApiImplicitParam; +import io.swagger.annotations.ApiImplicitParams; +import io.swagger.annotations.ApiOperation; +import org.springframework.web.bind.annotation.DeleteMapping; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.PutMapping; +import org.springframework.web.bind.annotation.RequestBody; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RestController; + +import javax.annotation.Resource; +import javax.validation.constraints.NotNull; + +import java.util.List; + +@RequestMapping("/api/v1/user") +@RestController +public class UserController { + + @Resource + private IUserService iUserService; + + @PostMapping("/user") + @ApiOperation(value = "add user", httpMethod = "POST") + public Result add(@RequestBody @NotNull AddUserReq addReq) { + return Result.success(iUserService.add(addReq)); + } + + @PutMapping("/user") + @ApiOperation(value = "update user", httpMethod = "PUT") + public Result update(@RequestBody @NotNull UpdateUserReq updateReq) { + iUserService.update(updateReq); + return Result.success(); + } + + @DeleteMapping("/user") + @ApiOperation(value = "delete user", httpMethod = "DELETE") + @ApiImplicitParams({ + @ApiImplicitParam(name = "id", value = "user id", dataType = "Integer"), + }) + public Result delete(@RequestParam @NotNull Integer id) { + iUserService.delete(id); + return Result.success(); + } + + @PostMapping("/list") + @ApiOperation(value = "user list", httpMethod = "POST") + public Result> list(@RequestBody @NotNull UserListReq userListReq) { + return Result.success(iUserService.list(userListReq)); + } + + @PutMapping("/enable") + @ApiOperation(value = "enable a user", httpMethod = "PUT") + @ApiImplicitParams({ + @ApiImplicitParam(name = "id", value = "user id", dataType = "Integer"), + }) + public Result enable(@RequestParam @NotNull Integer id) { + iUserService.enable(id); + return Result.success(); + } + + @PutMapping("/disable") + @ApiOperation(value = "disable a user", httpMethod = "PUT") + @ApiImplicitParams({ + @ApiImplicitParam(name = "id", value = "user id", dataType = "Integer"), + }) + public Result disable(@RequestParam @NotNull Integer id) { + iUserService.disable(id); + return Result.success(); + } +} diff --git a/seatunnel-server/seatunnel-app/src/main/java/org/apache/seatunnel/app/dal/dao/IScriptDao.java b/seatunnel-server/seatunnel-app/src/main/java/org/apache/seatunnel/app/dal/dao/IScriptDao.java new file mode 100644 index 00000000000..7da0332ac69 --- /dev/null +++ b/seatunnel-server/seatunnel-app/src/main/java/org/apache/seatunnel/app/dal/dao/IScriptDao.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.app.dal.dao; + +import org.apache.seatunnel.app.dal.entity.Script; +import org.apache.seatunnel.app.domain.dto.script.AddEmptyScriptDto; +import org.apache.seatunnel.app.domain.dto.script.CheckScriptDuplicateDto; +import org.apache.seatunnel.app.domain.dto.script.ListScriptsDto; +import org.apache.seatunnel.app.domain.dto.script.UpdateScriptContentDto; + +import java.util.List; + +public interface IScriptDao { + void checkScriptDuplicate(CheckScriptDuplicateDto dto); + + int addEmptyScript(AddEmptyScriptDto dto); + + Script getScript(Integer id); + + void updateScriptContent(UpdateScriptContentDto dto); + + void deleteScript(int id); + + List