diff --git a/CHANGELOG.md b/CHANGELOG.md index 001ed29c3..0903964df 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,32 +1,39 @@ # Changelog -## 3.1.1 +## 3.1.5 -### General +### General Changes -* Transformer add column's basic operation -* Use prestosql's hadoop and hive jars instead of apache's -* various misc codes optimize - -### dbffilereader +* Various code clean -* remove supported for reading compressed dbf file - -### jsonreader - -* fixed parse non-string type value - -### dbffilewriter - -* fixed boolean type writing error - -### hdfswriter +### DBF reader -* Use keyword `parquest` indicates support parquet format, old keyword `par` is not used +* Reconstruct this plugin with 3rd-party jar package +* Add support for `Date` type +* Fix for the occasional null pointer exception + +### DBF writer + +* Add support for `Date` type + +## 3.1.4 + +This is an emergency fix version to fix a serious problem in a previous release ( [\#62](https://github.com/wgzhao/DataX/issues/62)). + +## 3.1.3 + +### Redis reader + +* Delete temporary local file +* Only parse redis `String` data type, other types will be ignore + +### HDFS reader + +* Add support for reading Parquet file (#54) ## 3.1.2 -### General +### General Changes * Does not parse the `-m` command line argument, it doesn't really do anything! @@ -47,19 +54,27 @@ * Add support for `json` data type -## 3.1.3 - -### Redis reader - -* Delete temporary local file -* Only parse redis `String` data type, other types will be ignore - -### HDFS reader +## 3.1.1 -* Add support for reading Parquet file (#54) +### General Changes -## 3.1.4 +* Transformer add column's basic operation +* Use prestosql's hadoop and hive jars instead of apache's +* various misc codes optimize + +### dbffilereader -This is an emergency fix version to fix a serious problem in a previous release ( [\#62](https://github.com/wgzhao/DataX/issues/62)). +* remove supported for reading compressed dbf file + +### jsonreader + +* fixed parse non-string type value + +### dbffilewriter + +* fixed boolean type writing error + +### hdfswriter +* Use keyword `parquest` indicates support parquet format, old keyword `par` is not used diff --git a/docs/src/main/sphinx/reader/dbffilereader.md b/docs/src/main/sphinx/reader/dbffilereader.md index db0f89d4c..897f482a0 100644 --- a/docs/src/main/sphinx/reader/dbffilereader.md +++ b/docs/src/main/sphinx/reader/dbffilereader.md @@ -8,60 +8,56 @@ ```json { -"job": { + "job": { "setting": { - "speed": { - "channel": 2 - } + "speed": { + "channel": 2 + } }, "content": [ - { - "reader": { - "name": "dbffilereader", - "parameter": { - "column": [ - { - "index": 0, - "type": "string" - }, - { - "index": 1, - "type": "string" - }, - { - "index": 2, - "type": "string" - }, - { - "index": 3, - "type": "string" - }, - { - "index": 4, - "type": "string" - }, - { - "value": "201908", - "type": "string" - }, - { - "value": "dbf", - "type": "string" - } - ], - "path": ["/tmp/test.dbf"], - "encoding": "GBK" - } - }, - "writer": { - "name": "streamwriter", - "parameter": { - "print": "true" - } - } - } + { + "reader": { + "name": "dbffilereader", + "parameter": { + "column": [ + { + "index": 0, + "type": "string" + }, + { + "index": 1, + "type": "long" + }, + { + "index": 2, + "type": "string" + }, + { + "index": 3, + "type": "boolean" + }, + { + "index": 4, + "type": "string" + }, + { + "value": "dbf", + "type": "string" + } + ], + "path": [ "/tmp/out"], + "encoding": "GBK" + } + }, + "writer": { + "name": "streamwriter", + "parameter": { + "print": "true" + } + } + } ] -} + } } ``` @@ -69,15 +65,13 @@ `parameter` 配置项支持以下配置 -| 配置项 | 是否必须 | 默认值 | 描述 | -| :--------------- | :------: | ------------ |-------------| -| path | 是 | 无 | DBF文件路径,支持写多个路径,详细情况见下 | -| column | 是 | 类型默认为String | 所配置的表中需要同步的列集合, 是 `{type: value}` 或 `{type: index}` 的集合,详细配置见下 | -| compress | 否 | 无 | 文本压缩类型,默认不填写意味着没有压缩。支持压缩类型为zip、gzip、bzip2 | -| encoding | 否 | UTF-8 | DBF文件编码,比如 `GBK`, `UTF-8` | -| nullFormat | 否 | `\N` | 定义哪个字符串可以表示为null, | -| dbversion | 否 | 无 | 指定DBF文件版本,不指定则自动猜测 | - +| 配置项 | 是否必须 | 默认值 | 描述 | +| :----------| :------: | ------------ |-------------| +| path | 是 | 无 | DBF文件路径,支持写多个路径,详细情况见下 | +| column | 是 | 类型默认为String | 所配置的表中需要同步的列集合, 是 `{type: value}` 或 `{type: index}` 的集合,详细配置见下 | +| encoding | 否 | GBK | DBF文件编码,比如 `GBK`, `UTF-8` | +| nullFormat | 否 | `\N` | 定义哪个字符串可以表示为null, | + ### path 描述:本地文件系统的路径信息,注意这里可以支持填写多个路径。 diff --git a/docs/src/main/sphinx/writer/dbffilewriter.md b/docs/src/main/sphinx/writer/dbffilewriter.md index a9a0fd443..692f97eee 100644 --- a/docs/src/main/sphinx/writer/dbffilewriter.md +++ b/docs/src/main/sphinx/writer/dbffilewriter.md @@ -6,90 +6,88 @@ DbfFileWriter提供了向本地文件写入类dbf格式的一个或者多个表 写入本地文件内容存放的是一张dbf表,例如dbf格式的文件信息。 -## 2 功能与限制 +## 2 功能说明 -件实现了从DataX协议转为本地dbf文件功能,本地文件本身是结构化数据存储,DbfFileWriter如下几个方面约定: - -1. 支持且仅支持写入dbf的文件。 - -2. 支持文本压缩,现有压缩格式为gzip、bzip2。 - -3. 支持多线程写入,每个线程写入不同子文件。 - -我们不能做到: - -1. 单个文件不能支持并发写入。 - -## 3 功能说明 - -### 3.1 配置样例 +### 2.1 配置样例 ```json { -"job": { - "setting": { - "speed": { - "batchSize": 20480, - "bytes": -1, - "channel": 1 + "job": { + "setting": { + "speed": { + "batchSize": 20480, + "bytes": -1, + "channel": 1 } - }, - "content": [{ - "reader": { - "name": "streamreader", - "parameter": { - "column" : [ + }, + "content": [ + { + "reader": { + "name": "streamreader", + "parameter": { + "column": [ { - "value": "DataX", - "type": "string" + "value": "DataX", + "type": "string" }, { - "value": 19880808, - "type": "long" + "value": 19880808, + "type": "long" }, { - "value": "1988-08-08 16:00:04", - "type": "date" + "value": "1989-06-04 00:00:00", + "type": "date" }, { - "value": true, - "type": "bool" + "value": true, + "type": "bool" + }, + { + "value":"中文测试", + "type": "string" } - ], - "sliceRecordCount": 1000 + ], + "sliceRecordCount": 10 } - }, - "writer": { - "name": "dbffilewriter", - "parameter": { - "column": [ - { - "name": "col1", - "type": "char", - "length": 100 - }, - { - "name":"col2", - "type":"numeric", + }, + "writer": { + "name": "dbffilewriter", + "parameter": { + "column": [ + { + "name": "col1", + "type": "char", + "length": 100 + }, + { + "name": "col2", + "type": "numeric", "length": 18, "scale": 0 - }, - { - "name": "col3", - "type": "date" - }, - { - "name":"col4", - "type":"logical" - } - ], + }, + { + "name": "col3", + "type": "date" + }, + { + "name": "col4", + "type": "logical" + }, + { + "name": "col5", + "type":"char", + "length": 100 + } + ], "fileName": "test.dbf", - "path": "/tmp/out", - "writeMode": "truncate" - } + "path": "/tmp/out", + "writeMode": "truncate", + "encoding": "GBK" } + } } - ]} + ] + } } ``` @@ -101,11 +99,9 @@ DbfFileWriter提供了向本地文件写入类dbf格式的一个或者多个表 | column | 是 | 类型默认为String | 所配置的表中需要同步的列集合, 是 `{type: value}` 或 `{type: index}` 的集合 | | fileName | 是 | 无 | DbfFileWriter写入的文件名 | | writeMode | 是 | 无 | DbfFileWriter写入前数据清理处理模式,支持 `truncate`, `append`, `nonConflict` 三种模式,详见如下 | -| compress | 否 | 无 | 文本压缩类型,默认不填写意味着没有压缩。支持压缩类型为zip、gzip、bzip2 | | encoding | 否 | UTF-8 | DBF文件编码,比如 `GBK`, `UTF-8` | | nullFormat | 否 | `\N` | 定义哪个字符串可以表示为null, | | dateFormat | 否 | 无 | 日期类型的数据序列化到文件中时的格式,例如 `"dateFormat": "yyyy-MM-dd"` | -| fileFormat | 否 | 无 | 文件写出的格式,暂时只支持DBASE III | #### writeMode