From 8261bd0abb4428475a5552a77e1837d251aa4e45 Mon Sep 17 00:00:00 2001 From: fanjia <1095948736@qq.com> Date: Mon, 25 Apr 2022 16:18:44 +0800 Subject: [PATCH 1/6] add Config introduce. --- docs/en/connector/config-example.md | 8 --- docs/en/faq.md | 5 +- docs/en/start/config.md | 87 +++++++++++++++++++++++++++++ docs/en/start/local.mdx | 2 +- docs/sidebars.js | 2 +- 5 files changed, 90 insertions(+), 14 deletions(-) delete mode 100644 docs/en/connector/config-example.md create mode 100644 docs/en/start/config.md diff --git a/docs/en/connector/config-example.md b/docs/en/connector/config-example.md deleted file mode 100644 index e5e21e7f885..00000000000 --- a/docs/en/connector/config-example.md +++ /dev/null @@ -1,8 +0,0 @@ -# Config Examples - -This section show you the example about SeaTunnel configuration file, we already have exists useful examples in -[example-config](https://github.com/apache/incubator-seatunnel/tree/dev/config) - -## What's More - -If you want to know the details of this format configuration, Please see [HOCON](https://github.com/lightbend/config/blob/main/HOCON.md). \ No newline at end of file diff --git a/docs/en/faq.md b/docs/en/faq.md index f509f64fb75..95d5a86e1a3 100644 --- a/docs/en/faq.md +++ b/docs/en/faq.md @@ -1,11 +1,8 @@ # FAQ -why-i-should-install-computing-engine-like-spark-or-flink - ## Why I should install computing engine like Spark or Flink - -TODO +Now SeaTunnel uses computing engines like spark and flink to complete resource scheduling and node communication, so we can focus on the ease of use of data synchronization and the development of high-performance components. But this is only temporary. ## I have a question, but I can not solve it by myself diff --git a/docs/en/start/config.md b/docs/en/start/config.md new file mode 100644 index 00000000000..fd841da0e2d --- /dev/null +++ b/docs/en/start/config.md @@ -0,0 +1,87 @@ +--- +sidebar_position: 5 +--- + +# Intro to config file + +In SeaTunnel, the most important thing is the Config file, through which users can customize their own data synchronization requirements to maximize the potential of SeaTunnel. So next, I will introduce you how to configure the Config file. + +## Example + +Before you read on, you can find config file examples [here](https://github.com/apache/incubator-seatunnel/tree/dev/config) and in distribute package's config directory. + +## Config file structure + +The Config file will be similar to the one below. +```hocon +{ + env { + execution.parallelism = 1 + } + + source { + FakeSource { + result_table_name = "fake" + field_name = "name,age" + } + } + + transform { + sql { + sql = "select name,age from fake" + } + } + + sink { + Clickhouse { + host = "clickhouse:8123" + database = "default" + table = "seatunnel_console" + fields = ["name"] + username = "default" + password = "" + } + } +} +``` +As you can see, the Config file contains several sections: env, source, transform, sink. Different modules have different functions. After you understand these modules, you will understand how SeaTunnel works. + +### env + +Used to add some engine optional parameters, no matter which engine (Spark or Flink), the corresponding optional parameters should be filled in here. + + + +### source + +source is used to define where SeaTunnel needs to fetch data, and use the fetched data for the next step. Multiple sources can be defined at the same time. The supported source at now check [Source of SeaTunnel](../connector/source). +Each source has its own specific parameters to define how to fetch data, and SeaTunnel also extracts the parameters that each source will use, such as the `result_table_name` parameter, which is used to specify the name of the data generated by the current source, which is convenient for follow-up used by other modules. + +### transform + +When we have the data source, we may need to further process the data, so we have the transform module. Of course, this uses the word 'may', which means that we can also directly treat the transform as non-existent, directly from source to sink. Like below. + +```hocon +transform { + // no thing on here +} +``` + +Like source, transform has specific parameters that belong to each module. +The supported source at now check. The supported transform at now check [Transform of SeaTunnel](../transform) + +### sink + +Our purpose with SeaTunnel is to synchronize data from one place to another, so it is critical to define how and where data is written. With the sink module provided by SeaTunnel, you can complete this operation quickly and efficiently. +Sink and source are very similar, but the difference is reading and writing. So go check out our [supported sinks](../connector/sink). + +### Other + +You will find that when multiple sources and multiple sinks are defined, which data is read by each sink, and which is the data read by each transform? We use `result_table_name` and `source_table_name` two key configurations. +Each source module will be configured with a `result_table_name` to indicate the name of the data source generated by the data source, and other transform and sink modules can use `source_table_name` to refer to the corresponding data source name, indicating that I want to read the data for processing. +Then transform, as an intermediate processing module, can use both `result_table_name` and `source_table_name` configurations at the same time. +But you will find that in the above example Config, not every module is configured with these two parameters, because in SeaTunnel, there is a default convention, if these two parameters are not configured, then the generated data from the last module of the previous node will be used. This is much more convenient when there is only one source. + +## What's More + +If you want to know the details of this format configuration, Please see [HOCON](https://github.com/lightbend/config/blob/main/HOCON.md). diff --git a/docs/en/start/local.mdx b/docs/en/start/local.mdx index 0e57c4b05ae..71ee455f6cb 100644 --- a/docs/en/start/local.mdx +++ b/docs/en/start/local.mdx @@ -11,7 +11,7 @@ import TabItem from '@theme/TabItem'; Before you getting start the local run, you need to make sure you already have installed the following software which SeaTunnel required: -* [Java](https://www.java.com/en/download/) (only JDK 8 supported by now) installed and `JAVA_HOME` set. +* [Java](https://www.java.com/en/download/) (Java 8 or 11, other versions greater than Java 8 can theoretically work as well) installed and `JAVA_HOME` set. * Download the engine, you can choose and download one of them from below as your favour, you could see more information about [why we need engine in SeaTunnel](../faq.md#why-i-should-install-computing-engine-like-spark-or-flink) * Spark: Please [download Spark](https://spark.apache.org/downloads.html) first(**required version >= 2** and version < 3.x). For more information you could see [Getting Started: standalone](https://spark.apache.org/docs/latest/spark-standalone.html#installing-spark-standalone-to-a-cluster) diff --git a/docs/sidebars.js b/docs/sidebars.js index d96db4c6d95..2c49200d96a 100644 --- a/docs/sidebars.js +++ b/docs/sidebars.js @@ -69,13 +69,13 @@ const sidebars = { 'start/local', 'start/docker', 'start/kubernetes', + 'start/config' ], }, { type: 'category', label: 'Connector', items: [ - 'connector/config-example', { type: 'category', label: 'Source', From 01b177e4d8a925df5a57e2d2e8c0536d600f7877 Mon Sep 17 00:00:00 2001 From: fanjia <1095948736@qq.com> Date: Mon, 25 Apr 2022 19:28:56 +0800 Subject: [PATCH 2/6] add Config introduce. --- docs/en/start/config.md | 97 +++++++++++++++++++++++++---------------- 1 file changed, 59 insertions(+), 38 deletions(-) diff --git a/docs/en/start/config.md b/docs/en/start/config.md index fd841da0e2d..a07791a2f43 100644 --- a/docs/en/start/config.md +++ b/docs/en/start/config.md @@ -4,62 +4,74 @@ sidebar_position: 5 # Intro to config file -In SeaTunnel, the most important thing is the Config file, through which users can customize their own data synchronization requirements to maximize the potential of SeaTunnel. So next, I will introduce you how to configure the Config file. +In SeaTunnel, the most important thing is the Config file, through which users can customize their own data +synchronization requirements to maximize the potential of SeaTunnel. So next, I will introduce you how to +configure the Config file. ## Example -Before you read on, you can find config file examples [here](https://github.com/apache/incubator-seatunnel/tree/dev/config) and in distribute package's config directory. +Before you read on, you can find config file +examples [here](https://github.com/apache/incubator-seatunnel/tree/dev/config) and in distribute package's +config directory. ## Config file structure The Config file will be similar to the one below. + ```hocon -{ - env { - execution.parallelism = 1 - } +env { + execution.parallelism = 1 +} - source { - FakeSource { - result_table_name = "fake" - field_name = "name,age" - } +source { + FakeSource { + result_table_name = "fake" + field_name = "name,age" } +} - transform { - sql { - sql = "select name,age from fake" - } +transform { + sql { + sql = "select name,age from fake" } +} - sink { - Clickhouse { - host = "clickhouse:8123" - database = "default" - table = "seatunnel_console" - fields = ["name"] - username = "default" - password = "" - } +sink { + Clickhouse { + host = "clickhouse:8123" + database = "default" + table = "seatunnel_console" + fields = ["name"] + username = "default" + password = "" } } ``` -As you can see, the Config file contains several sections: env, source, transform, sink. Different modules have different functions. After you understand these modules, you will understand how SeaTunnel works. + +As you can see, the Config file contains several sections: env, source, transform, sink. Different modules +have different functions. After you understand these modules, you will understand how SeaTunnel works. ### env -Used to add some engine optional parameters, no matter which engine (Spark or Flink), the corresponding optional parameters should be filled in here. +Used to add some engine optional parameters, no matter which engine (Spark or Flink), the corresponding +optional parameters should be filled in here. ### source -source is used to define where SeaTunnel needs to fetch data, and use the fetched data for the next step. Multiple sources can be defined at the same time. The supported source at now check [Source of SeaTunnel](../connector/source). -Each source has its own specific parameters to define how to fetch data, and SeaTunnel also extracts the parameters that each source will use, such as the `result_table_name` parameter, which is used to specify the name of the data generated by the current source, which is convenient for follow-up used by other modules. +source is used to define where SeaTunnel needs to fetch data, and use the fetched data for the next step. +Multiple sources can be defined at the same time. The supported source at now +check [Source of SeaTunnel](../connector/source). Each source has its own specific parameters to define how to +fetch data, and SeaTunnel also extracts the parameters that each source will use, such as +the `result_table_name` parameter, which is used to specify the name of the data generated by the current +source, which is convenient for follow-up used by other modules. ### transform -When we have the data source, we may need to further process the data, so we have the transform module. Of course, this uses the word 'may', which means that we can also directly treat the transform as non-existent, directly from source to sink. Like below. +When we have the data source, we may need to further process the data, so we have the transform module. Of +course, this uses the word 'may', which means that we can also directly treat the transform as non-existent, +directly from source to sink. Like below. ```hocon transform { @@ -67,21 +79,30 @@ transform { } ``` -Like source, transform has specific parameters that belong to each module. -The supported source at now check. The supported transform at now check [Transform of SeaTunnel](../transform) +Like source, transform has specific parameters that belong to each module. The supported source at now check. +The supported transform at now check [Transform of SeaTunnel](../transform) ### sink -Our purpose with SeaTunnel is to synchronize data from one place to another, so it is critical to define how and where data is written. With the sink module provided by SeaTunnel, you can complete this operation quickly and efficiently. -Sink and source are very similar, but the difference is reading and writing. So go check out our [supported sinks](../connector/sink). +Our purpose with SeaTunnel is to synchronize data from one place to another, so it is critical to define how +and where data is written. With the sink module provided by SeaTunnel, you can complete this operation quickly +and efficiently. Sink and source are very similar, but the difference is reading and writing. So go check out +our [supported sinks](../connector/sink). ### Other -You will find that when multiple sources and multiple sinks are defined, which data is read by each sink, and which is the data read by each transform? We use `result_table_name` and `source_table_name` two key configurations. -Each source module will be configured with a `result_table_name` to indicate the name of the data source generated by the data source, and other transform and sink modules can use `source_table_name` to refer to the corresponding data source name, indicating that I want to read the data for processing. -Then transform, as an intermediate processing module, can use both `result_table_name` and `source_table_name` configurations at the same time. -But you will find that in the above example Config, not every module is configured with these two parameters, because in SeaTunnel, there is a default convention, if these two parameters are not configured, then the generated data from the last module of the previous node will be used. This is much more convenient when there is only one source. +You will find that when multiple sources and multiple sinks are defined, which data is read by each sink, and +which is the data read by each transform? We use `result_table_name` and `source_table_name` two key +configurations. Each source module will be configured with a `result_table_name` to indicate the name of the +data source generated by the data source, and other transform and sink modules can use `source_table_name` to +refer to the corresponding data source name, indicating that I want to read the data for processing. Then +transform, as an intermediate processing module, can use both `result_table_name` and `source_table_name` +configurations at the same time. But you will find that in the above example Config, not every module is +configured with these two parameters, because in SeaTunnel, there is a default convention, if these two +parameters are not configured, then the generated data from the last module of the previous node will be used. +This is much more convenient when there is only one source. ## What's More -If you want to know the details of this format configuration, Please see [HOCON](https://github.com/lightbend/config/blob/main/HOCON.md). +If you want to know the details of this format configuration, Please +see [HOCON](https://github.com/lightbend/config/blob/main/HOCON.md). From 795485843c4917776d6ad73bb47ddfc9e667b35f Mon Sep 17 00:00:00 2001 From: fanjia <1095948736@qq.com> Date: Wed, 27 Apr 2022 14:11:36 +0800 Subject: [PATCH 3/6] update doc, add a new item `basic concept` --- docs/en/{start => concept}/config.md | 2 +- docs/en/faq.md | 2 +- docs/sidebars.js | 10 ++++++++-- 3 files changed, 10 insertions(+), 4 deletions(-) rename docs/en/{start => concept}/config.md (99%) diff --git a/docs/en/start/config.md b/docs/en/concept/config.md similarity index 99% rename from docs/en/start/config.md rename to docs/en/concept/config.md index a07791a2f43..533c3a5af99 100644 --- a/docs/en/start/config.md +++ b/docs/en/concept/config.md @@ -1,5 +1,5 @@ --- -sidebar_position: 5 +sidebar_position: 2 --- # Intro to config file diff --git a/docs/en/faq.md b/docs/en/faq.md index 95d5a86e1a3..e72d3248cfa 100644 --- a/docs/en/faq.md +++ b/docs/en/faq.md @@ -2,7 +2,7 @@ ## Why I should install computing engine like Spark or Flink -Now SeaTunnel uses computing engines like spark and flink to complete resource scheduling and node communication, so we can focus on the ease of use of data synchronization and the development of high-performance components. But this is only temporary. +Now SeaTunnel uses computing engines such as spark and flink to complete resource scheduling and node communication, so we can focus on the ease of use of data synchronization and the development of high-performance components. But this is only temporary. ## I have a question, but I can not solve it by myself diff --git a/docs/sidebars.js b/docs/sidebars.js index 2c49200d96a..4a68369e664 100644 --- a/docs/sidebars.js +++ b/docs/sidebars.js @@ -54,6 +54,13 @@ const sidebars = { 'intro/history', ], }, + { + type: 'category', + label: 'Basic Concept', + items: [ + 'concept/config', + ], + }, { type: 'category', label: 'Quick Start', @@ -68,8 +75,7 @@ const sidebars = { items: [ 'start/local', 'start/docker', - 'start/kubernetes', - 'start/config' + 'start/kubernetes' ], }, { From 68a6720f4a56bec6a42c4f6b32ff77ff9aafde99 Mon Sep 17 00:00:00 2001 From: Jiajie Zhong Date: Fri, 29 Apr 2022 10:21:18 +0800 Subject: [PATCH 4/6] Update docs/sidebars.js --- docs/sidebars.js | 7 ------- 1 file changed, 7 deletions(-) diff --git a/docs/sidebars.js b/docs/sidebars.js index 4a68369e664..4721ccff325 100644 --- a/docs/sidebars.js +++ b/docs/sidebars.js @@ -54,13 +54,6 @@ const sidebars = { 'intro/history', ], }, - { - type: 'category', - label: 'Basic Concept', - items: [ - 'concept/config', - ], - }, { type: 'category', label: 'Quick Start', From b2fcfebfb9526f5316e1cb26f3fe1ea9d86c2fd8 Mon Sep 17 00:00:00 2001 From: Jiajie Zhong Date: Fri, 29 Apr 2022 10:21:26 +0800 Subject: [PATCH 5/6] Update docs/sidebars.js --- docs/sidebars.js | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/sidebars.js b/docs/sidebars.js index 4721ccff325..f1456efc8b2 100644 --- a/docs/sidebars.js +++ b/docs/sidebars.js @@ -71,6 +71,13 @@ const sidebars = { 'start/kubernetes' ], }, + { + type: 'category', + label: 'Basic Concept', + items: [ + 'concept/config', + ], + }, { type: 'category', label: 'Connector', From 03b43f5dc67dec4c8157faf1c2be9ac572b8f6d0 Mon Sep 17 00:00:00 2001 From: Jiajie Zhong Date: Fri, 29 Apr 2022 10:23:02 +0800 Subject: [PATCH 6/6] Update docs/sidebars.js --- docs/sidebars.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/sidebars.js b/docs/sidebars.js index f1456efc8b2..1ce0f90027b 100644 --- a/docs/sidebars.js +++ b/docs/sidebars.js @@ -73,7 +73,7 @@ const sidebars = { }, { type: 'category', - label: 'Basic Concept', + label: 'Concept', items: [ 'concept/config', ],