Skip to content

Commit

Permalink
[Connector-V2] Add File Sink Connector (#2117)
Browse files Browse the repository at this point in the history
* tmp commit

* add hadoop2 and hadoop3 shade jar

* add hadoop2 and hadoop3 shade jar

* add license head

* change know denpendencies

* tmp commit

* tmp commit

* change hadoop dependency scope to provide

* back pom

* fix checkstyle

* add example

* fix example bug

* remove file connector from example and e2e because hadoop2 can not compile with jdk11

* no need jdk8 and jdk11 profile because we don't use hadoop shade jar

* change hadoop jar dependency scope to provided

* back

* file connector can not build in jdk11

* drop hadoop shade

* add gitignore item

* add hadoop and local file sink

* fix pom error

* fix pom error

* fix pom error

* implement new interface

* fix UT error

* fix e2e error

* update build timeout from 30min to 40min

* fix e2e error

* remove auto service

* fix e2e error

* fix e2e error

* fix e2e error

* found e2e error

* fix e2e error

* fix e2e error

* fix e2e error

* merge from upstream

* merge from upstream

* merge from upstream

* merge from upstream

* merge from upstream

* add mvn jvm option

* add mvn jvm option

* add license

* add licnese

* add licnese

* fix dependency

* fix build jvm oom

* fix build jvm oom

* fix build jvm oom

* fix dependency

* fix dependency

* fix e2e error

* add codeql check timeout from 30min to 60min

* merge from dev

* merge from dev

* fix ci error

* fix checkstyle

* fix ci

* fix ci

* aa

* aa

* aa

* add .idea

* del .idea

* del .idea

* del .idea

* del .idea

* remove no use license

* remove no use before and after method in test

* fix license; remove dependency

* fix review

* fix build order

* fix license

* fix license

* fix review

* fix review

* fix review

* fix review

* fix review

* fix review

* fix review

* fix review

* fix review

* add code-analysys timeout to 120

* retry ci

* update license and remove no use jar from LICENSE file

* retry ci

Co-authored-by: Hisoka <fanjiaeminem@qq.com>
  • Loading branch information
EricJoy2048 and Hisoka-X authored Jul 7, 2022
1 parent 59ce8a2 commit e2283da
Show file tree
Hide file tree
Showing 80 changed files with 3,818 additions and 93 deletions.
12 changes: 8 additions & 4 deletions .github/workflows/backend.yml
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ jobs:
java: [ '8', '11' ]
os: [ 'ubuntu-latest', 'windows-latest' ]
runs-on: ${{ matrix.os }}
timeout-minutes: 50
timeout-minutes: 80
steps:
- uses: actions/checkout@v3
with:
Expand All @@ -115,7 +115,7 @@ jobs:
name: Dependency licenses
needs: [ sanity-check ]
runs-on: ubuntu-latest
timeout-minutes: 30
timeout-minutes: 40
steps:
- uses: actions/checkout@v3
with:
Expand Down Expand Up @@ -155,7 +155,9 @@ jobs:
cache: 'maven'
- name: Run Unit tests
run: |
./mvnw -T 2C -B clean verify -D"maven.test.skip"=false -D"checkstyle.skip"=true -D"scalastyle.skip"=true -D"license.skipAddThirdParty"=true --no-snapshot-updates
./mvnw -B -T 1C clean verify -D"maven.test.skip"=false -D"checkstyle.skip"=true -D"scalastyle.skip"=true -D"license.skipAddThirdParty"=true --no-snapshot-updates
env:
MAVEN_OPTS: -Xmx2048m

integration-test:
name: Integration Test
Expand All @@ -176,4 +178,6 @@ jobs:
cache: 'maven'
- name: Run Integration tests
run: |
./mvnw -T 2C -B verify -DskipUT=true -DskipIT=false -D"checkstyle.skip"=true -D"scalastyle.skip"=true -D"license.skipAddThirdParty"=true --no-snapshot-updates
./mvnw -T 1C -B verify -DskipUT=true -DskipIT=false -D"checkstyle.skip"=true -D"scalastyle.skip"=true -D"license.skipAddThirdParty"=true --no-snapshot-updates
env:
MAVEN_OPTS: -Xmx2048m
1 change: 1 addition & 0 deletions .github/workflows/code-analysys.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ on:
jobs:
build:
runs-on: ubuntu-latest
timeout-minutes: 120
steps:
- uses: actions/checkout@v2
with:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ jobs:
check:
name: Spark
runs-on: ubuntu-latest
timeout-minutes: 30
timeout-minutes: 60
steps:
- uses: actions/checkout@v2
- name: Set up JDK 1.8
Expand Down
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ target/
# Intellij Idea files
.idea/
*.iml
.idea/*

.DS_Store

Expand Down Expand Up @@ -40,4 +41,4 @@ Test.scala
test.conf
log4j.properties
spark-warehouse
*.flattened-pom.xml
*.flattened-pom.xml
2 changes: 2 additions & 0 deletions plugin-mapping.properties
Original file line number Diff line number Diff line change
Expand Up @@ -102,4 +102,6 @@ seatunnel.sink.Clickhouse = connector-clickhouse
seatunnel.sink.ClickhouseFile = connector-clickhouse
seatunnel.source.Jdbc = connector-jdbc
seatunnel.sink.Jdbc = connector-jdbc
seatunnel.sink.HdfsFile = connector-file-hadoop
seatunnel.sink.LocalFile = connector-file-local
seatunnel.source.Pulsar = connector-pulsar
15 changes: 13 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,7 @@
<slf4j.version>1.7.25</slf4j.version>
<guava.version>19.0</guava.version>
<auto-service.version>1.0.1</auto-service.version>
<powermock.version>2.0.9</powermock.version>
<hadoop2.version>2.6.5</hadoop2.version>
<hadoop3.version>3.0.0</hadoop3.version>
<seatunnel.shade.package>org.apache.seatunnel.shade</seatunnel.shade.package>
Expand Down Expand Up @@ -659,12 +660,23 @@
<version>${guava.version}</version>
</dependency>

<dependency>
<groupId>org.powermock</groupId>
<artifactId>powermock-module-junit4</artifactId>
<version>${powermock.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.powermock</groupId>
<artifactId>powermock-api-mockito2</artifactId>
<version>${powermock.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.github.jsonzou</groupId>
<artifactId>jmockdata</artifactId>
<version>${jmockdata.version}</version>
</dependency>

<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
Expand Down Expand Up @@ -1277,7 +1289,6 @@
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
</plugin>

<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-failsafe-plugin</artifactId>
Expand Down
12 changes: 11 additions & 1 deletion seatunnel-connectors-v2-dist/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
limitations under the License.
-->

<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
Expand Down Expand Up @@ -80,9 +81,18 @@
<artifactId>connector-hive</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.seatunnel</groupId>
<artifactId>connector-file-hadoop</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.seatunnel</groupId>
<artifactId>connector-file-local</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>


<build>
<plugins>
<plugin>
Expand Down
72 changes: 72 additions & 0 deletions seatunnel-connectors-v2/connector-file/connector-file-base/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>connector-file</artifactId>
<groupId>org.apache.seatunnel</groupId>
<version>${revision}</version>
</parent>
<modelVersion>4.0.0</modelVersion>

<artifactId>connector-file-base</artifactId>

<dependencies>
<dependency>
<groupId>org.apache.seatunnel</groupId>
<artifactId>seatunnel-api</artifactId>
<version>${project.version}</version>
</dependency>

<dependency>
<groupId>org.apache.seatunnel</groupId>
<artifactId>seatunnel-core-base</artifactId>
<version>${project.version}</version>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-collections4</artifactId>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
</dependency>

<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.powermock</groupId>
<artifactId>powermock-module-junit4</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.powermock</groupId>
<artifactId>powermock-api-mockito2</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.seatunnel.connectors.seatunnel.file.config;

import static com.google.common.base.Preconditions.checkNotNull;

import org.apache.seatunnel.shade.com.typesafe.config.Config;

import lombok.Data;
import lombok.NonNull;
import org.apache.commons.lang3.StringUtils;

import java.io.Serializable;
import java.util.Locale;

@Data
public class AbstractTextFileConfig implements DelimiterConfig, CompressConfig, Serializable {
private static final long serialVersionUID = 1L;

protected String compressCodec;

protected String fieldDelimiter = String.valueOf('\001');

protected String rowDelimiter = "\n";

protected String path;
protected String fileNameExpression;
protected FileFormat fileFormat = FileFormat.TEXT;

public AbstractTextFileConfig(@NonNull Config config) {
checkNotNull(config.getString(Constant.PATH));

if (config.hasPath(Constant.COMPRESS_CODEC)) {
throw new RuntimeException("compress not support now");
}

if (config.hasPath(Constant.FIELD_DELIMITER) && !StringUtils.isBlank(config.getString(Constant.FIELD_DELIMITER))) {
this.fieldDelimiter = config.getString(Constant.FIELD_DELIMITER);
}

if (config.hasPath(Constant.ROW_DELIMITER) && !StringUtils.isBlank(config.getString(Constant.ROW_DELIMITER))) {
this.rowDelimiter = config.getString(Constant.ROW_DELIMITER);
}

if (config.hasPath(Constant.PATH) && !StringUtils.isBlank(config.getString(Constant.PATH))) {
this.path = config.getString(Constant.PATH);
}

if (config.hasPath(Constant.FILE_NAME_EXPRESSION) && !StringUtils.isBlank(config.getString(Constant.FILE_NAME_EXPRESSION))) {
this.fileNameExpression = config.getString(Constant.FILE_NAME_EXPRESSION);
}

if (config.hasPath(Constant.FILE_FORMAT) && !StringUtils.isBlank(config.getString(Constant.FILE_FORMAT))) {
this.fileFormat = FileFormat.valueOf(config.getString(Constant.FILE_FORMAT).toUpperCase(Locale.ROOT));
}
}

protected AbstractTextFileConfig() {
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.seatunnel.connectors.seatunnel.file.config;

public interface CompressConfig {
String getCompressCodec();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.seatunnel.connectors.seatunnel.file.config;

public class Constant {
public static final String SEATUNNEL = "seatunnel";
public static final String NON_PARTITION = "NON_PARTITION";
public static final String TRANSACTION_ID_SPLIT = "_";
public static final String TRANSACTION_EXPRESSION = "transactionId";

public static final String SAVE_MODE = "save_mode";
public static final String COMPRESS_CODEC = "compress_codec";

public static final String PATH = "path";
public static final String FIELD_DELIMITER = "field_delimiter";
public static final String ROW_DELIMITER = "row_delimiter";
public static final String PARTITION_BY = "partition_by";
public static final String PARTITION_DIR_EXPRESSION = "partition_dir_expression";
public static final String IS_PARTITION_FIELD_WRITE_IN_FILE = "is_partition_field_write_in_file";
public static final String TMP_PATH = "tmp_path";
public static final String FILE_NAME_EXPRESSION = "file_name_expression";
public static final String FILE_FORMAT = "file_format";
public static final String SINK_COLUMNS = "sink_columns";
public static final String FILENAME_TIME_FORMAT = "filename_time_format";
public static final String IS_ENABLE_TRANSACTION = "is_enable_transaction";
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.seatunnel.connectors.seatunnel.file.config;

public interface DelimiterConfig {
String getFieldDelimiter();

String getRowDelimiter();
}
Loading

0 comments on commit e2283da

Please sign in to comment.