diff --git a/docs/cn/bucketizer.md b/docs/cn/bucketizer.md
index 8ea6cec11..9a9df3a9d 100644
--- a/docs/cn/bucketizer.md
+++ b/docs/cn/bucketizer.md
@@ -1,19 +1,23 @@
 ## 功能介绍
 给定切分点，将连续变量分桶，可支持单列输入或多列输入，对应需要给出单列切分点或者多列切分点。
 
-每列切分点需要严格递增，且至少有三个点。
-
 ## 参数说明
 
 <!-- This is the start of auto-generated parameter info -->
 <!-- DO NOT EDIT THIS PART!!! -->
+
 | 名称 | 中文名称 | 描述 | 类型 | 是否必须？ | 默认值 |
 | --- | --- | --- | --- | --- | --- |
-| handleInvalid | 如何处理无效值 | 可以选择skip：跳过，error：报错抛异常。 | String |  | "error" |
-| selectedCols | 计算列对应的列名列表 | 计算列对应的列名列表 | String[] |  |  |
-| splitsArray | 多列的切分点 | 多列的切分点 | String[] |  |  |
+| selectedCols | 选择的列名 | 计算列对应的列名列表 | String[] | ✓ |  |
+| reservedCols | 算法保留列名 | 算法保留列 | String[] |  | null |
 | outputCols | 输出结果列列名数组 | 输出结果列列名数组，可选，默认null | String[] |  | null |
-| reservedCols | 算法保留列名 | 算法保留列 | String[] |  | null |<!-- This is the end of auto-generated parameter info -->
+| handleInvalid | 未知Token处理策略 | 未知Token处理策略，"keep", "skip", "error" | String | | "keep" |
+| encode | 编码方式 | 编码方式，"INDEX", "VECTOR", "ASSEMBLED_VECTOR" | String |   |INDEX |
+| dropLast | 是否删除最后一个元素 | 是否删除最后一个元素 | Boolean |  | true |
+| leftOpen | 是否左开右闭 | 是否左开右闭 | Boolean | | true |
+| cutsArray | 多列的切分点 | 多列的切分点 | double[][] | ✓ |  |
+
+<!-- This is the end of auto-generated parameter info -->
 
 ## 脚本示例
 #### 脚本代码
@@ -29,7 +33,7 @@ data = np.array([
 df = pd.DataFrame({"double": data[:, 0], "bool": data[:, 1], "number": data[:, 2], "str": data[:, 3]})
 
 inOp = BatchOperator.fromDataframe(df, schemaStr='double double, bool boolean, number int, str string')
-bucketizer = Bucketizer().setSelectedCols(["double"]).setSplitsArray(["-Infinity:2:Infinity"])
+bucketizer = Bucketizer().setSelectedCols(["double"]).setCutsArray([[2]])
 bucketizer.transform(inOp).print()
 ```
 #### 脚本运行结果
diff --git a/docs/cn/bucketizerbatchop.md b/docs/cn/bucketizerbatchop.md
index ad66ec3e2..81d9249cd 100644
--- a/docs/cn/bucketizerbatchop.md
+++ b/docs/cn/bucketizerbatchop.md
@@ -1,19 +1,20 @@
 ## 功能介绍
 给定切分点，将连续变量分桶，可支持单列输入或多列输入，对应需要给出单列切分点或者多列切分点。
 
-每列切分点需要严格递增，且至少有三个点。
-
 ## 参数说明
 
 <!-- This is the start of auto-generated parameter info -->
 <!-- DO NOT EDIT THIS PART!!! -->
 | 名称 | 中文名称 | 描述 | 类型 | 是否必须？ | 默认值 |
 | --- | --- | --- | --- | --- | --- |
-| handleInvalid | 如何处理无效值 | 可以选择skip：跳过，error：报错抛异常。 | String |  | "error" |
-| selectedCols | 计算列对应的列名列表 | 计算列对应的列名列表 | String[] |  |  |
-| splitsArray | 多列的切分点 | 多列的切分点 | String[] |  |  |
+| selectedCols | 选择的列名 | 计算列对应的列名列表 | String[] | ✓ |  |
+| reservedCols | 算法保留列名 | 算法保留列 | String[] |  | null |
 | outputCols | 输出结果列列名数组 | 输出结果列列名数组，可选，默认null | String[] |  | null |
-| reservedCols | 算法保留列名 | 算法保留列 | String[] |  | null |<!-- This is the end of auto-generated parameter info -->
+| handleInvalid | 未知Token处理策略 | 未知Token处理策略，"keep", "skip", "error" | String | | "keep" |
+| encode | 编码方式 | 编码方式，"INDEX", "VECTOR", "ASSEMBLED_VECTOR" | String |   |INDEX |
+| dropLast | 是否删除最后一个元素 | 是否删除最后一个元素 | Boolean |  | true |
+| leftOpen | 是否左开右闭 | 是否左开右闭 | Boolean | | true |
+| cutsArray | 多列的切分点 | 多列的切分点 | double[][] | ✓ |  |<!-- This is the end of auto-generated parameter info -->
 
 ## 脚本示例
 #### 脚本代码
@@ -31,10 +32,10 @@ df = pd.DataFrame({"double": data[:, 0], "bool": data[:, 1], "number": data[:, 2
 inOp1 = BatchOperator.fromDataframe(df, schemaStr='double double, bool boolean, number int, str string')
 inOp2 = StreamOperator.fromDataframe(df, schemaStr='double double, bool boolean, number int, str string')
 
-bucketizer = BucketizerBatchOp().setSelectedCols(["double"]).setSplitsArray(["-Infinity:2:Infinity"])
+bucketizer = BucketizerBatchOp().setSelectedCols(["double"]).setCutsArray([[2]])
 bucketizer.linkFrom(inOp1).print()
 
-bucketizer = BucketizerStreamOp().setSelectedCols(["double"]).setSplitsArray(["-Infinity:2:Infinity"])
+bucketizer = BucketizerStreamOp().setSelectedCols(["double"]).setCutsArray([[2]])
 bucketizer.linkFrom(inOp2).print()
 
 StreamOperator.execute()
diff --git a/docs/cn/bucketizerstreamop.md b/docs/cn/bucketizerstreamop.md
index ab57c3508..81d9249cd 100644
--- a/docs/cn/bucketizerstreamop.md
+++ b/docs/cn/bucketizerstreamop.md
@@ -1,18 +1,20 @@
 ## 功能介绍
 给定切分点，将连续变量分桶，可支持单列输入或多列输入，对应需要给出单列切分点或者多列切分点。
 
-每列切分点需要严格递增，且至少有三个点。
-
+## 参数说明
 
 <!-- This is the start of auto-generated parameter info -->
 <!-- DO NOT EDIT THIS PART!!! -->
 | 名称 | 中文名称 | 描述 | 类型 | 是否必须？ | 默认值 |
 | --- | --- | --- | --- | --- | --- |
-| handleInvalid | 如何处理无效值 | 可以选择skip：跳过，error：报错抛异常。 | String |  | "error" |
-| selectedCols | 计算列对应的列名列表 | 计算列对应的列名列表 | String[] |  |  |
-| splitsArray | 多列的切分点 | 多列的切分点 | String[] |  |  |
+| selectedCols | 选择的列名 | 计算列对应的列名列表 | String[] | ✓ |  |
+| reservedCols | 算法保留列名 | 算法保留列 | String[] |  | null |
 | outputCols | 输出结果列列名数组 | 输出结果列列名数组，可选，默认null | String[] |  | null |
-| reservedCols | 算法保留列名 | 算法保留列 | String[] |  | null |<!-- This is the end of auto-generated parameter info -->
+| handleInvalid | 未知Token处理策略 | 未知Token处理策略，"keep", "skip", "error" | String | | "keep" |
+| encode | 编码方式 | 编码方式，"INDEX", "VECTOR", "ASSEMBLED_VECTOR" | String |   |INDEX |
+| dropLast | 是否删除最后一个元素 | 是否删除最后一个元素 | Boolean |  | true |
+| leftOpen | 是否左开右闭 | 是否左开右闭 | Boolean | | true |
+| cutsArray | 多列的切分点 | 多列的切分点 | double[][] | ✓ |  |<!-- This is the end of auto-generated parameter info -->
 
 ## 脚本示例
 #### 脚本代码
@@ -30,10 +32,10 @@ df = pd.DataFrame({"double": data[:, 0], "bool": data[:, 1], "number": data[:, 2
 inOp1 = BatchOperator.fromDataframe(df, schemaStr='double double, bool boolean, number int, str string')
 inOp2 = StreamOperator.fromDataframe(df, schemaStr='double double, bool boolean, number int, str string')
 
-bucketizer = BucketizerBatchOp().setSelectedCols(["double"]).setSplitsArray(["-Infinity:2:Infinity"])
+bucketizer = BucketizerBatchOp().setSelectedCols(["double"]).setCutsArray([[2]])
 bucketizer.linkFrom(inOp1).print()
 
-bucketizer = BucketizerStreamOp().setSelectedCols(["double"]).setSplitsArray(["-Infinity:2:Infinity"])
+bucketizer = BucketizerStreamOp().setSelectedCols(["double"]).setCutsArray([[2]])
 bucketizer.linkFrom(inOp2).print()
 
 StreamOperator.execute()
diff --git a/docs/cn/onehotencoder.md b/docs/cn/onehotencoder.md
index 66886cb52..1e4726417 100644
--- a/docs/cn/onehotencoder.md
+++ b/docs/cn/onehotencoder.md
@@ -9,66 +9,48 @@ one-hot编码，也称独热编码，对于每一个特征，如果它有m个可
 <!-- OLD_TABLE -->
 <!-- This is the start of auto-generated parameter info -->
 <!-- DO NOT EDIT THIS PART!!! -->
-| 名称 | 中文名称 | 描述 | 类型 | 是否必须？ | 默认值 |
+ 名称 | 中文名称 | 描述 | 类型 | 是否必须？ | 默认值 |
 | --- | --- | --- | --- | --- | --- |
-| dropLast | 是否删除最后一个元素 | 删除最后一个元素是为了保证线性无关性。默认true | Boolean |  | true |
-| ignoreNull | 受否忽略null | 忽略将不对null 编码 | Boolean |  | false |
+| discreteThresholdsArray | 离散个数阈值 | 离散个数阈值，每一列对应数组中一个元素 | Integer[] |  | |
+| discreteThresholds | 离散个数阈值 | 离散个数阈值，低于该阈值的离散样本将不会单独成一个组别 | Integer |  | Integer.MIN_VALUE |
 | selectedCols | 选择的列名 | 计算列对应的列名列表 | String[] | ✓ |  |
+ selectedCols | 选择的列名 | 计算列对应的列名列表 | String[] | ✓ |  |
 | reservedCols | 算法保留列名 | 算法保留列 | String[] |  | null |
-| outputCol | 输出结果列列名 | 输出结果列列名，必选 | String | ✓ |  |<!-- This is the end of auto-generated parameter info -->
+| outputCols | 输出结果列列名数组 | 输出结果列列名数组，可选，默认null | String[] |  | null |
+| handleInvalid | 未知Token处理策略 | 未知Token处理策略，"keep", "skip", "error" | String | | "keep" |
+| encode | 编码方式 | 编码方式，"INDEX", "VECTOR", "ASSEMBLED_VECTOR" | String |   | "ASSEMBLED_VECTOR" |
+| dropLast | 是否删除最后一个元素 | 是否删除最后一个元素 | Boolean |  | true |
+
+<!-- This is the end of auto-generated parameter info -->
 
 
 ## 脚本示例
 #### 运行脚本
 ```python
+import numpy as np
+import pandas as pd
 data = np.array([
-    ["assisbragasm", 1],
-    ["assiseduc", 1],
-    ["assist", 1],
-    ["assiseduc", 1],
-    ["assistebrasil", 1],
-    ["assiseduc", 1],
-    ["assistebrasil", 1],
-    ["assistencialgsamsung", 1]
+    [1.1, True, "2", "A"],
+    [1.1, False, "2", "B"],
+    [1.1, True, "1", "B"],
+    [2.2, True, "1", "A"]
 ])
+df = pd.DataFrame({"double": data[:, 0], "bool": data[:, 1], "number": data[:, 2], "str": data[:, 3]})
 
-# load data
-df = pd.DataFrame({"query": data[:, 0], "weight": data[:, 1]})
-
-inOp = dataframeToOperator(df, schemaStr='query string, weight long', op_type='batch')
-
-# one hot train
-one_hot = OneHotEncoder()\
-    .setSelectedCols(["query"])\
-    .setDropLast(False)\
-    .setIgnoreNull(False)\
-    .setOutputCol("predicted_r")\
-    .setReservedCols(["weight"])
-    
-    
-model = one_hot.fit(inOp)
-model.transform(inOp).print()
+inOp1 = BatchOperator.fromDataframe(df, schemaStr='double double, bool boolean, number int, str string')
 
-# stream predict
-inOp2 = dataframeToOperator(df, schemaStr='query string, weight long', op_type='stream')
-model.transform(inOp2).print()
-
-StreamOperator.execute()
+onehot = OneHotEncoder().setSelectedCols(["double", "bool"]).setDiscreteThresholds(2).setEncode("ASSEMBLED_VECTOR").setOutputCols(["pred"]).setDropLast(False)
+onehot.fit(inOp).transform(inOp).collectToDataframe()
 ```
 
 #### 运行结果
 
 ```python
-  weight predicted_r
-0       1    $6$4:1.0
-1       1    $6$3:1.0
-2       1    $6$2:1.0
-3       1    $6$3:1.0
-4       1    $6$1:1.0
-5       1    $6$3:1.0
-6       1    $6$1:1.0
-7       1    $6$0:1.0
-
+   double   bool  number str            pred
+0     1.1   True       2   A  $6$0:1.0 3:1.0
+1     1.1  False       2   B  $6$0:1.0 5:1.0
+2     1.1   True       1   B  $6$0:1.0 3:1.0
+3     2.2   True       1   A  $6$2:1.0 3:1.0
 ```
 
 
diff --git a/docs/cn/onehotpredictbatchop.md b/docs/cn/onehotpredictbatchop.md
index 5936c4f51..520edc725 100644
--- a/docs/cn/onehotpredictbatchop.md
+++ b/docs/cn/onehotpredictbatchop.md
@@ -13,55 +13,53 @@ one-hot编码，也称独热编码，对于每一个特征，如果它有m个可
 <!-- DO NOT EDIT THIS PART!!! -->
 | 名称 | 中文名称 | 描述 | 类型 | 是否必须？ | 默认值 |
 | --- | --- | --- | --- | --- | --- |
+| selectedCols | 选择的列名 | 计算列对应的列名列表 | String[] | ✓ |  |
 | reservedCols | 算法保留列名 | 算法保留列 | String[] |  | null |
-| outputCol | 输出结果列列名 | 输出结果列列名，必选 | String | ✓ |  |<!-- This is the end of auto-generated parameter info -->
+| outputCols | 输出结果列列名数组 | 输出结果列列名数组，可选，默认null | String[] |  | null |
+| handleInvalid | 未知Token处理策略 | 未知Token处理策略，"keep", "skip", "error" | String | | "keep" |
+| encode | 编码方式 | 编码方式，"INDEX", "VECTOR", "ASSEMBLED_VECTOR" | String |   | "ASSEMBLED_VECTOR" |
+| dropLast | 是否删除最后一个元素 | 是否删除最后一个元素 | Boolean |  | true |
+
+
+<!-- This is the end of auto-generated parameter info -->
 
 ## 脚本示例
 #### 运行脚本
 ```python
+import numpy as np
+import pandas as pd
 data = np.array([
-    ["assisbragasm", 1],
-    ["assiseduc", 1],
-    ["assist", 1],
-    ["assiseduc", 1],
-    ["assistebrasil", 1],
-    ["assiseduc", 1],
-    ["assistebrasil", 1],
-    ["assistencialgsamsung", 1]
+    [1.1, True, "2", "A"],
+    [1.1, False, "2", "B"],
+    [1.1, True, "1", "B"],
+    [2.2, True, "1", "A"]
 ])
-
-# load data
-df = pd.DataFrame({"query": data[:, 0], "weight": data[:, 1]})
-
-inOp = dataframeToOperator(df, schemaStr='query string, weight long', op_type='batch')
-
-# one hot train
-one_hot = OneHotTrainBatchOp().setSelectedCols(["query"]).setDropLast(False).setIgnoreNull(False)
-model = inOp.link(one_hot)
-
-# batch predict
-predictor = OneHotPredictBatchOp().setOutputCol("predicted_r").setReservedCols(["weight"])
-print(BatchOperator.collectToDataframe(predictor.linkFrom(model, inOp)))
-
-# stream predict
-inOp2 = dataframeToOperator(df, schemaStr='query string, weight long', op_type='stream')
-predictor = OneHotPredictStreamOp(model).setOutputCol("predicted_r").setReservedCols(["weight"])
-predictor.linkFrom(inOp2).print()
-
+df = pd.DataFrame({"double": data[:, 0], "bool": data[:, 1], "number": data[:, 2], "str": data[:, 3]})
+
+inOp1 = BatchOperator.fromDataframe(df, schemaStr='double double, bool boolean, number int, str string')
+inOp2 = StreamOperator.fromDataframe(df, schemaStr='double double, bool boolean, number int, str string')
+
+onehot = OneHotTrainBatchOp().setSelectedCols(["double", "bool", "number", "str"]).setDiscreteThresholds(2)
+predictBatch = OneHotPredictBatchOp().setSelectedCols(["double", "bool"]).setEncode("ASSEMBLED_VECTOR").setOutputCols(["pred"]).setDropLast(False)
+onehot.linkFrom(inOp1)
+predictBatch.linkFrom(onehot, inOp1)
+[model,predict] = collectToDataframes(onehot, predictBatch)
+print(model)
+print(predict)
+
+predictStream = OneHotPredictStreamOp(onehot).setSelectedCols(["double", "bool"]).setEncode("ASSEMBLED_VECTOR").setOutputCols(["vec"])
+predictStream.linkFrom(inOp2)
+predictStream.print(refreshInterval=-1)
 StreamOperator.execute()
 ```
 #### 运行结果
 
 ```python
-  weight predicted_r
-0       1    $6$4:1.0
-1       1    $6$3:1.0
-2       1    $6$2:1.0
-3       1    $6$3:1.0
-4       1    $6$1:1.0
-5       1    $6$3:1.0
-6       1    $6$1:1.0
-7       1    $6$0:1.0
+   double   bool  number str            pred
+0     1.1   True       2   A  $6$0:1.0 3:1.0
+1     1.1  False       2   B  $6$0:1.0 5:1.0
+2     1.1   True       1   B  $6$0:1.0 3:1.0
+3     2.2   True       1   A  $6$2:1.0 3:1.0
 
 ```
 
@@ -69,3 +67,7 @@ StreamOperator.execute()
 
 
 
+
+
+
+
diff --git a/docs/cn/onehotpredictstreamop.md b/docs/cn/onehotpredictstreamop.md
index ae38fb0c0..520edc725 100644
--- a/docs/cn/onehotpredictstreamop.md
+++ b/docs/cn/onehotpredictstreamop.md
@@ -1,3 +1,5 @@
+
+
 # one-hot编码组件
 
 ## 算法介绍
@@ -11,56 +13,53 @@ one-hot编码，也称独热编码，对于每一个特征，如果它有m个可
 <!-- DO NOT EDIT THIS PART!!! -->
 | 名称 | 中文名称 | 描述 | 类型 | 是否必须？ | 默认值 |
 | --- | --- | --- | --- | --- | --- |
+| selectedCols | 选择的列名 | 计算列对应的列名列表 | String[] | ✓ |  |
 | reservedCols | 算法保留列名 | 算法保留列 | String[] |  | null |
-| outputCol | 输出结果列列名 | 输出结果列列名，必选 | String | ✓ |  |<!-- This is the end of auto-generated parameter info -->
+| outputCols | 输出结果列列名数组 | 输出结果列列名数组，可选，默认null | String[] |  | null |
+| handleInvalid | 未知Token处理策略 | 未知Token处理策略，"keep", "skip", "error" | String | | "keep" |
+| encode | 编码方式 | 编码方式，"INDEX", "VECTOR", "ASSEMBLED_VECTOR" | String |   | "ASSEMBLED_VECTOR" |
+| dropLast | 是否删除最后一个元素 | 是否删除最后一个元素 | Boolean |  | true |
 
 
+<!-- This is the end of auto-generated parameter info -->
+
 ## 脚本示例
 #### 运行脚本
 ```python
+import numpy as np
+import pandas as pd
 data = np.array([
-    ["assisbragasm", 1],
-    ["assiseduc", 1],
-    ["assist", 1],
-    ["assiseduc", 1],
-    ["assistebrasil", 1],
-    ["assiseduc", 1],
-    ["assistebrasil", 1],
-    ["assistencialgsamsung", 1]
+    [1.1, True, "2", "A"],
+    [1.1, False, "2", "B"],
+    [1.1, True, "1", "B"],
+    [2.2, True, "1", "A"]
 ])
-
-# load data
-df = pd.DataFrame({"query": data[:, 0], "weight": data[:, 1]})
-
-inOp = dataframeToOperator(df, schemaStr='query string, weight long', op_type='batch')
-
-# one hot train
-one_hot = OneHotTrainBatchOp().setSelectedCols(["query"]).setDropLast(False).setIgnoreNull(False)
-model = inOp.link(one_hot)
-
-# batch predict
-predictor = OneHotPredictBatchOp().setOutputCol("predicted_r").setReservedCols(["weight"])
-print(BatchOperator.collectToDataframe(predictor.linkFrom(model, inOp)))
-
-# stream predict
-inOp2 = dataframeToOperator(df, schemaStr='query string, weight long', op_type='stream')
-predictor = OneHotPredictStreamOp(model).setOutputCol("predicted_r").setReservedCols(["weight"])
-predictor.linkFrom(inOp2).print()
-
+df = pd.DataFrame({"double": data[:, 0], "bool": data[:, 1], "number": data[:, 2], "str": data[:, 3]})
+
+inOp1 = BatchOperator.fromDataframe(df, schemaStr='double double, bool boolean, number int, str string')
+inOp2 = StreamOperator.fromDataframe(df, schemaStr='double double, bool boolean, number int, str string')
+
+onehot = OneHotTrainBatchOp().setSelectedCols(["double", "bool", "number", "str"]).setDiscreteThresholds(2)
+predictBatch = OneHotPredictBatchOp().setSelectedCols(["double", "bool"]).setEncode("ASSEMBLED_VECTOR").setOutputCols(["pred"]).setDropLast(False)
+onehot.linkFrom(inOp1)
+predictBatch.linkFrom(onehot, inOp1)
+[model,predict] = collectToDataframes(onehot, predictBatch)
+print(model)
+print(predict)
+
+predictStream = OneHotPredictStreamOp(onehot).setSelectedCols(["double", "bool"]).setEncode("ASSEMBLED_VECTOR").setOutputCols(["vec"])
+predictStream.linkFrom(inOp2)
+predictStream.print(refreshInterval=-1)
 StreamOperator.execute()
 ```
 #### 运行结果
 
 ```python
-  weight predicted_r
-0       1    $6$4:1.0
-1       1    $6$3:1.0
-2       1    $6$2:1.0
-3       1    $6$3:1.0
-4       1    $6$1:1.0
-5       1    $6$3:1.0
-6       1    $6$1:1.0
-7       1    $6$0:1.0
+   double   bool  number str            pred
+0     1.1   True       2   A  $6$0:1.0 3:1.0
+1     1.1  False       2   B  $6$0:1.0 5:1.0
+2     1.1   True       1   B  $6$0:1.0 3:1.0
+3     2.2   True       1   A  $6$2:1.0 3:1.0
 
 ```
 
@@ -68,3 +67,7 @@ StreamOperator.execute()
 
 
 
+
+
+
+
diff --git a/docs/cn/onehottrainbatchop.md b/docs/cn/onehottrainbatchop.md
index 25062fdf0..48a49a74a 100644
--- a/docs/cn/onehottrainbatchop.md
+++ b/docs/cn/onehottrainbatchop.md
@@ -10,57 +10,50 @@ one-hot编码，也称独热编码，对于每一个特征，如果它有m个可
 <!-- DO NOT EDIT THIS PART!!! -->
 | 名称 | 中文名称 | 描述 | 类型 | 是否必须？ | 默认值 |
 | --- | --- | --- | --- | --- | --- |
-| dropLast | 是否删除最后一个元素 | 删除最后一个元素是为了保证线性无关性。默认true | Boolean |  | true |
-| ignoreNull | 受否忽略null | 忽略将不对null 编码 | Boolean |  | false |
-| selectedCols | 选择的列名 | 计算列对应的列名列表 | String[] | ✓ |  |<!-- This is the end of auto-generated parameter info -->
+| discreteThresholdsArray | 离散个数阈值 | 离散个数阈值，每一列对应数组中一个元素 | Integer[] |  | |
+| discreteThresholds | 离散个数阈值 | 离散个数阈值，低于该阈值的离散样本将不会单独成一个组别 | Integer |  | Integer.MIN_VALUE |
+| selectedCols | 选择的列名 | 计算列对应的列名列表 | String[] | ✓ |  |
+
+<!-- This is the end of auto-generated parameter info -->
 
 
 ## 脚本示例
 #### 运行脚本
 ```python
+import numpy as np
+import pandas as pd
 data = np.array([
-    ["assisbragasm", 1],
-    ["assiseduc", 1],
-    ["assist", 1],
-    ["assiseduc", 1],
-    ["assistebrasil", 1],
-    ["assiseduc", 1],
-    ["assistebrasil", 1],
-    ["assistencialgsamsung", 1]
+    [1.1, True, "2", "A"],
+    [1.1, False, "2", "B"],
+    [1.1, True, "1", "B"],
+    [2.2, True, "1", "A"]
 ])
-
-# load data
-df = pd.DataFrame({"query": data[:, 0], "weight": data[:, 1]})
-
-inOp = dataframeToOperator(df, schemaStr='query string, weight long', op_type='batch')
-
-# one hot train
-one_hot = OneHotTrainBatchOp().setSelectedCols(["query"]).setDropLast(False).setIgnoreNull(False)
-model = inOp.link(one_hot)
-
-# batch predict
-predictor = OneHotPredictBatchOp().setOutputCol("predicted_r").setReservedCols(["weight"])
-print(BatchOperator.collectToDataframe(predictor.linkFrom(model, inOp)))
-
-# stream predict
-inOp2 = dataframeToOperator(df, schemaStr='query string, weight long', op_type='stream')
-predictor = OneHotPredictStreamOp(model).setOutputCol("predicted_r").setReservedCols(["weight"])
-predictor.linkFrom(inOp2).print()
-
+df = pd.DataFrame({"double": data[:, 0], "bool": data[:, 1], "number": data[:, 2], "str": data[:, 3]})
+
+inOp1 = BatchOperator.fromDataframe(df, schemaStr='double double, bool boolean, number int, str string')
+inOp2 = StreamOperator.fromDataframe(df, schemaStr='double double, bool boolean, number int, str string')
+
+onehot = OneHotTrainBatchOp().setSelectedCols(["double", "bool", "number", "str"]).setDiscreteThresholds(2)
+predictBatch = OneHotPredictBatchOp().setSelectedCols(["double", "bool"]).setEncode("ASSEMBLED_VECTOR").setOutputCols(["pred"]).setDropLast(False)
+onehot.linkFrom(inOp1)
+predictBatch.linkFrom(onehot, inOp1)
+[model,predict] = collectToDataframes(onehot, predictBatch)
+print(model)
+print(predict)
+
+predictStream = OneHotPredictStreamOp(onehot).setSelectedCols(["double", "bool"]).setEncode("ASSEMBLED_VECTOR").setOutputCols(["vec"])
+predictStream.linkFrom(inOp2)
+predictStream.print(refreshInterval=-1)
 StreamOperator.execute()
 ```
 #### 运行结果
 
 ```python
-  weight predicted_r
-0       1    $6$4:1.0
-1       1    $6$3:1.0
-2       1    $6$2:1.0
-3       1    $6$3:1.0
-4       1    $6$1:1.0
-5       1    $6$3:1.0
-6       1    $6$1:1.0
-7       1    $6$0:1.0
+   double   bool  number str            pred
+0     1.1   True       2   A  $6$0:1.0 3:1.0
+1     1.1  False       2   B  $6$0:1.0 5:1.0
+2     1.1   True       1   B  $6$0:1.0 3:1.0
+3     2.2   True       1   A  $6$2:1.0 3:1.0
 
 ```
 
diff --git a/docs/cn/quantilediscretizer.md b/docs/cn/quantilediscretizer.md
index ea03b150f..709e723f1 100644
--- a/docs/cn/quantilediscretizer.md
+++ b/docs/cn/quantilediscretizer.md
@@ -10,14 +10,21 @@
 <!-- OLD_TABLE -->
 <!-- This is the start of auto-generated parameter info -->
 <!-- DO NOT EDIT THIS PART!!! -->
+
 | 名称 | 中文名称 | 描述 | 类型 | 是否必须？ | 默认值 |
 | --- | --- | --- | --- | --- | --- |
 | selectedCols | 选择的列名 | 计算列对应的列名列表 | String[] | ✓ |  |
 | numBuckets | quantile个数 | quantile个数，对所有列有效。 | Integer |  | 2 |
 | numBucketsArray | quantile个数 | quantile个数，每一列对应数组中一个元素。 | Integer[] |  | null |
+| leftOpen | 左开右闭 | 左开右闭 | Boolean | | true |
 | selectedCols | 选择的列名 | 计算列对应的列名列表 | String[] | ✓ |  |
 | reservedCols | 算法保留列名 | 算法保留列 | String[] |  | null |
-| outputCols | 输出结果列列名数组 | 输出结果列列名数组，可选，默认null | String[] |  | null |<!-- This is the end of auto-generated parameter info -->
+| outputCols | 输出结果列列名数组 | 输出结果列列名数组，可选，默认null | String[] |  | null |
+| handleInvalid | 未知Token处理策略 | 未知Token处理策略，"keep", "skip", "error" | String | | "keep" |
+| encode | 编码方式 | 编码方式，"INDEX", "VECTOR", "ASSEMBLED_VECTOR" | String |   |INDEX |
+| dropLast | 是否删除最后一个元素 | 是否删除最后一个元素 | Boolean |  | true |
+
+<!-- This is the end of auto-generated parameter info -->
 
 ## 脚本示例
 
diff --git a/docs/cn/quantilediscretizerpredictbatchop.md b/docs/cn/quantilediscretizerpredictbatchop.md
index 93d5a4f91..a968051e5 100644
--- a/docs/cn/quantilediscretizerpredictbatchop.md
+++ b/docs/cn/quantilediscretizerpredictbatchop.md
@@ -6,11 +6,17 @@
 ## 参数说明
 <!-- This is the start of auto-generated parameter info -->
 <!-- DO NOT EDIT THIS PART!!! -->
+
 | 名称 | 中文名称 | 描述 | 类型 | 是否必须？ | 默认值 |
 | --- | --- | --- | --- | --- | --- |
 | selectedCols | 选择的列名 | 计算列对应的列名列表 | String[] | ✓ |  |
 | reservedCols | 算法保留列名 | 算法保留列 | String[] |  | null |
-| outputCols | 输出结果列列名数组 | 输出结果列列名数组，可选，默认null | String[] |  | null |<!-- This is the end of auto-generated parameter info -->
+| outputCols | 输出结果列列名数组 | 输出结果列列名数组，可选，默认null | String[] |  | null |
+| handleInvalid | 未知Token处理策略 | 未知Token处理策略，"keep", "skip", "error" | String | | "keep" |
+| encode | 编码方式 | 编码方式，"INDEX", "VECTOR", "ASSEMBLED_VECTOR" | String |   |INDEX |
+| dropLast | 是否删除最后一个元素 | 是否删除最后一个元素 | Boolean |  | true |
+
+<!-- This is the end of auto-generated parameter info -->
 
 ## 脚本示例
 
diff --git a/docs/cn/quantilediscretizerpredictstreamop.md b/docs/cn/quantilediscretizerpredictstreamop.md
index f2edc6421..25d761f38 100644
--- a/docs/cn/quantilediscretizerpredictstreamop.md
+++ b/docs/cn/quantilediscretizerpredictstreamop.md
@@ -6,11 +6,17 @@
 ## 参数说明
 <!-- This is the start of auto-generated parameter info -->
 <!-- DO NOT EDIT THIS PART!!! -->
+
 | 名称 | 中文名称 | 描述 | 类型 | 是否必须？ | 默认值 |
 | --- | --- | --- | --- | --- | --- |
 | selectedCols | 选择的列名 | 计算列对应的列名列表 | String[] | ✓ |  |
 | reservedCols | 算法保留列名 | 算法保留列 | String[] |  | null |
-| outputCols | 输出结果列列名数组 | 输出结果列列名数组，可选，默认null | String[] |  | null |<!-- This is the end of auto-generated parameter info -->
+| outputCols | 输出结果列列名数组 | 输出结果列列名数组，可选，默认null | String[] |  | null |
+| handleInvalid | 未知Token处理策略 | 未知Token处理策略，"keep", "skip", "error" | String | | "keep" |
+| encode | 编码方式 | 编码方式，"INDEX", "VECTOR", "ASSEMBLED_VECTOR" | String |   |INDEX |
+| dropLast | 是否删除最后一个元素 | 是否删除最后一个元素 | Boolean |  | true |
+
+<!-- This is the end of auto-generated parameter info -->
 
 
 ## 脚本示例
diff --git a/docs/cn/quantilediscretizertrainbatchop.md b/docs/cn/quantilediscretizertrainbatchop.md
index f46b03568..a50f555cd 100644
--- a/docs/cn/quantilediscretizertrainbatchop.md
+++ b/docs/cn/quantilediscretizertrainbatchop.md
@@ -11,6 +11,7 @@
 | 名称 | 中文名称 | 描述 | 类型 | 是否必须？ | 默认值 |
 | --- | --- | --- | --- | --- | --- |
 | selectedCols | 选择的列名 | 计算列对应的列名列表 | String[] | ✓ |  |
+| leftOpen | 左开右闭 | 左开右闭 | Boolean | | true |
 | numBuckets | quantile个数 | quantile个数，对所有列有效。 | Integer |  | 2 |
 | numBucketsArray | quantile个数 | quantile个数，每一列对应数组中一个元素。 | Integer[] |  | null |<!-- This is the end of auto-generated parameter info -->
 
diff --git a/docs/en/bucketizer.md b/docs/en/bucketizer.md
index a392d4425..04d8ce908 100644
--- a/docs/en/bucketizer.md
+++ b/docs/en/bucketizer.md
@@ -6,15 +6,15 @@ Map a continuous variable into several buckets.
  and splitsArray should be set, and the lengths of them should be equal. In the case of multiple columns,
  each column used the corresponding splits.
 
- Split array must be strictly increasing and have at least three points. It's a string input with split points
- segments with delimiter ",".
-
 ## Parameters
 | Name | Description | Type | Required？ | Default Value |
 | --- | --- | --- | --- | --- |
-| handleInvalid | parameter for how to handle invalid data (NULL values) | String |  | "error" |
+| handleInvalid |  Strategy to handle unseen token when doing prediction, one of "keep", "skip" or "error" | String | | "keep" |
+| encode | Encode method，"INDEX", "VECTOR", "ASSEMBLED_VECTOR" | String |   |INDEX |
+| dropLast | drop last | Boolean |  | true |
+| leftOpen | left open | Boolean | | true |
+| cutsArray | Split points array, each of them is used for the corresponding selected column. | double[][] | ✓ |  |
 | selectedCols | Names of the columns used for processing | String[] |  |  |
-| splitsArray | Split points array, each of them is used for the corresponding selected column. | String[] |  |  |
 | outputCols | Names of the output columns | String[] |  | null |
 | reservedCols | Names of the columns to be retained in the output table | String[] |  | null |
 
@@ -33,7 +33,7 @@ data = np.array([
 df = pd.DataFrame({"double": data[:, 0], "bool": data[:, 1], "number": data[:, 2], "str": data[:, 3]})
 
 inOp = BatchOperator.fromDataframe(df, schemaStr='double double, bool boolean, number int, str string')
-bucketizer = Bucketizer().setSelectedCols(["double"]).setSplitsArray(["-Infinity:2:Infinity"])
+bucketizer = Bucketizer().setSelectedCols(["double"]).setCutsArray([[2]])
 bucketizer.transform(inOp).print()
 ```
 #### Results
diff --git a/docs/en/bucketizerbatchop.md b/docs/en/bucketizerbatchop.md
index 0d5690be3..7b4bb49f2 100644
--- a/docs/en/bucketizerbatchop.md
+++ b/docs/en/bucketizerbatchop.md
@@ -6,19 +6,20 @@ Map a continuous variable into several buckets.
  and splitsArray should be set, and the lengths of them should be equal. In the case of multiple columns,
  each column used the corresponding splits.
 
- Split array must be strictly increasing and have at least three points. It's a string input with split points
- segments with delimiter ",".
-
 ## Parameters
 | Name | Description | Type | Required？ | Default Value |
 | --- | --- | --- | --- | --- |
-| handleInvalid | parameter for how to handle invalid data (NULL values) | String |  | "error" |
-| selectedCols | Names of the columns used for processing | String[] |  |  |
-| splitsArray | Split points array, each of them is used for the corresponding selected column. | String[] |  |  |
+| handleInvalid |  Strategy to handle unseen token when doing prediction, one of "keep", "skip" or "error" | String | | "keep" |
+| encode | Encode method，"INDEX", "VECTOR", "ASSEMBLED_VECTOR" | String |   |INDEX |
+| dropLast | drop last | Boolean |  | true |
+| leftOpen | left open | Boolean | | true |
+| cutsArray | Split points array, each of them is used for the corresponding selected column. | double[][] | ✓ |  |
+| selectedCols | Names of the columns used for processing | String[] | ✓ |  |
 | outputCols | Names of the output columns | String[] |  | null |
 | reservedCols | Names of the columns to be retained in the output table | String[] |  | null |
 
 
+
 ## Script Example
 #### Code
 ```
@@ -35,10 +36,10 @@ df = pd.DataFrame({"double": data[:, 0], "bool": data[:, 1], "number": data[:, 2
 inOp1 = BatchOperator.fromDataframe(df, schemaStr='double double, bool boolean, number int, str string')
 inOp2 = StreamOperator.fromDataframe(df, schemaStr='double double, bool boolean, number int, str string')
 
-bucketizer = BucketizerBatchOp().setSelectedCols(["double"]).setSplitsArray(["-Infinity:2:Infinity"])
+bucketizer = BucketizerBatchOp().setSelectedCols(["double"])..setCutsArray([[2]])
 bucketizer.linkFrom(inOp1).print()
 
-bucketizer = BucketizerStreamOp().setSelectedCols(["double"]).setSplitsArray(["-Infinity:2:Infinity"])
+bucketizer = BucketizerStreamOp().setSelectedCols(["double"]).setCutsArray([[2]])
 bucketizer.linkFrom(inOp2).print()
 
 StreamOperator.execute()
diff --git a/docs/en/bucketizerstreamop.md b/docs/en/bucketizerstreamop.md
index 6b12abb0c..9cdf0a5d8 100644
--- a/docs/en/bucketizerstreamop.md
+++ b/docs/en/bucketizerstreamop.md
@@ -8,13 +8,17 @@ Map a continuous variable into several buckets.
 ## Parameters
 | Name | Description | Type | Required？ | Default Value |
 | --- | --- | --- | --- | --- |
-| handleInvalid | parameter for how to handle invalid data (NULL values) | String |  | "error" |
-| selectedCols | Names of the columns used for processing | String[] |  |  |
-| splitsArray | Split points array, each of them is used for the corresponding selected column. | String[] |  |  |
+| handleInvalid |  Strategy to handle unseen token when doing prediction, one of "keep", "skip" or "error" | String | | "keep" |
+| encode | Encode method，"INDEX", "VECTOR", "ASSEMBLED_VECTOR" | String |   |INDEX |
+| dropLast | drop last | Boolean |  | true |
+| leftOpen | left open | Boolean | | true |
+| cutsArray | Split points array, each of them is used for the corresponding selected column. | double[][] | ✓ |  |
+| selectedCols | Names of the columns used for processing | String[] | ✓ |  |
 | outputCols | Names of the output columns | String[] |  | null |
 | reservedCols | Names of the columns to be retained in the output table | String[] |  | null |
 
 
+
 ## Script Example
 #### Code
 ```
diff --git a/docs/en/onehotencoder.md b/docs/en/onehotencoder.md
index ecb03b780..f764b7107 100644
--- a/docs/en/onehotencoder.md
+++ b/docs/en/onehotencoder.md
@@ -4,63 +4,55 @@ One hot pipeline op.
 ## Parameters
 | Name | Description | Type | Required？ | Default Value |
 | --- | --- | --- | --- | --- |
+| discreteThresholdsArray | discrete thresholds array | Integer[] |  | |
+| discreteThresholds | discrete thresholds array | Integer |  | Integer.MIN_VALUE |
+| selectedCols | Names of the columns used for processing | String[] |  |  |
+| handleInvalid |  Strategy to handle unseen token when doing prediction, one of "keep", "skip" or "error" | String | | "keep" |
+| encode | Encode method，"INDEX", "VECTOR", "ASSEMBLED_VECTOR" | String |   |INDEX |
 | dropLast | drop last | Boolean |  | true |
-| ignoreNull | ignore null | Boolean |  | false |
-| selectedCols | Names of the columns used for processing | String[] | ✓ |  |
+| selectedCols | Names of the columns used for processing | String[] |  |  |
+| outputCols | Names of the output columns | String[] |  | null |
 | reservedCols | Names of the columns to be retained in the output table | String[] |  | null |
-| outputCol | Name of the output column | String | ✓ |  |
 
 
 ## Script Example
 #### Script
 ```python
+import numpy as np
+import pandas as pd
 data = np.array([
-    ["assisbragasm", 1],
-    ["assiseduc", 1],
-    ["assist", 1],
-    ["assiseduc", 1],
-    ["assistebrasil", 1],
-    ["assiseduc", 1],
-    ["assistebrasil", 1],
-    ["assistencialgsamsung", 1]
+    [1.1, True, "2", "A"],
+    [1.1, False, "2", "B"],
+    [1.1, True, "1", "B"],
+    [2.2, True, "1", "A"]
 ])
-
-# load data
-df = pd.DataFrame({"query": data[:, 0], "weight": data[:, 1]})
-
-inOp = dataframeToOperator(df, schemaStr='query string, weight long', op_type='batch')
-
-# one hot train
-one_hot = OneHotEncoder()\
-    .setSelectedCols(["query"])\
-    .setDropLast(False)\
-    .setIgnoreNull(False)\
-    .setOutputCol("predicted_r")\
-    .setReservedCols(["weight"])
-    
-    
-model = one_hot.fit(inOp)
-model.transform(inOp).print()
-
-# stream predict
-inOp2 = dataframeToOperator(df, schemaStr='query string, weight long', op_type='stream')
-model.transform(inOp2).print()
-
+df = pd.DataFrame({"double": data[:, 0], "bool": data[:, 1], "number": data[:, 2], "str": data[:, 3]})
+
+inOp1 = BatchOperator.fromDataframe(df, schemaStr='double double, bool boolean, number int, str string')
+inOp2 = StreamOperator.fromDataframe(df, schemaStr='double double, bool boolean, number int, str string')
+
+onehot = OneHotTrainBatchOp().setSelectedCols(["double", "bool", "number", "str"]).setDiscreteThresholds(2)
+predictBatch = OneHotPredictBatchOp().setSelectedCols(["double", "bool"]).setEncode("ASSEMBLED_VECTOR").setOutputCols(["pred"]).setDropLast(False)
+onehot.linkFrom(inOp1)
+predictBatch.linkFrom(onehot, inOp1)
+[model,predict] = collectToDataframes(onehot, predictBatch)
+print(model)
+print(predict)
+
+predictStream = OneHotPredictStreamOp(onehot).setSelectedCols(["double", "bool"]).setEncode("ASSEMBLED_VECTOR").setOutputCols(["vec"])
+predictStream.linkFrom(inOp2)
+predictStream.print(refreshInterval=-1)
 StreamOperator.execute()
 ```
 
 #### Result
 
 ```python
-  weight predicted_r
-0       1    $6$4:1.0
-1       1    $6$3:1.0
-2       1    $6$2:1.0
-3       1    $6$3:1.0
-4       1    $6$1:1.0
-5       1    $6$3:1.0
-6       1    $6$1:1.0
-7       1    $6$0:1.0
+   double   bool  number str            pred
+0     1.1   True       2   A  $6$0:1.0 3:1.0
+1     1.1  False       2   B  $6$0:1.0 5:1.0
+2     1.1   True       1   B  $6$0:1.0 3:1.0
+3     2.2   True       1   A  $6$2:1.0 3:1.0
 
 ```
 
diff --git a/docs/en/onehotpredictbatchop.md b/docs/en/onehotpredictbatchop.md
index 9b4177045..9b848fbcd 100644
--- a/docs/en/onehotpredictbatchop.md
+++ b/docs/en/onehotpredictbatchop.md
@@ -5,61 +5,50 @@ One-hot batch operator maps a serial of columns of category indices to a column
 ## Parameters
 | Name | Description | Type | Required？ | Default Value |
 | --- | --- | --- | --- | --- |
+| handleInvalid |  Strategy to handle unseen token when doing prediction, one of "keep", "skip" or "error" | String | | "keep" |
+| encode | Encode method，"INDEX", "VECTOR", "ASSEMBLED_VECTOR" | String |   |INDEX |
+| dropLast | drop last | Boolean |  | true |
+| selectedCols | Names of the columns used for processing | String[] |  |  |
+| outputCols | Names of the output columns | String[] |  | null |
 | reservedCols | Names of the columns to be retained in the output table | String[] |  | null |
-| outputCol | Name of the output column | String | ✓ |  |
 
 
 ## Script Example
 #### Script
 ```python
+import numpy as np
+import pandas as pd
 data = np.array([
-    ["assisbragasm", 1],
-    ["assiseduc", 1],
-    ["assist", 1],
-    ["assiseduc", 1],
-    ["assistebrasil", 1],
-    ["assiseduc", 1],
-    ["assistebrasil", 1],
-    ["assistencialgsamsung", 1]
+    [1.1, True, "2", "A"],
+    [1.1, False, "2", "B"],
+    [1.1, True, "1", "B"],
+    [2.2, True, "1", "A"]
 ])
-
-# load data
-df = pd.DataFrame({"query": data[:, 0], "weight": data[:, 1]})
-
-inOp = dataframeToOperator(df, schemaStr='query string, weight long', op_type='batch')
-
-# one hot train
-one_hot = OneHotTrainBatchOp().setSelectedCols(["query"]).setDropLast(False).setIgnoreNull(False)
-model = inOp.link(one_hot)
-
-# batch predict
-predictor = OneHotPredictBatchOp().setOutputCol("predicted_r").setReservedCols(["weight"])
-print(BatchOperator.collectToDataframe(predictor.linkFrom(model, inOp)))
-
-# stream predict
-inOp2 = dataframeToOperator(df, schemaStr='query string, weight long', op_type='stream')
-predictor = OneHotPredictStreamOp(model).setOutputCol("predicted_r").setReservedCols(["weight"])
-predictor.linkFrom(inOp2).print()
-
+df = pd.DataFrame({"double": data[:, 0], "bool": data[:, 1], "number": data[:, 2], "str": data[:, 3]})
+
+inOp1 = BatchOperator.fromDataframe(df, schemaStr='double double, bool boolean, number int, str string')
+inOp2 = StreamOperator.fromDataframe(df, schemaStr='double double, bool boolean, number int, str string')
+
+onehot = OneHotTrainBatchOp().setSelectedCols(["double", "bool", "number", "str"]).setDiscreteThresholds(2)
+predictBatch = OneHotPredictBatchOp().setSelectedCols(["double", "bool"]).setEncode("ASSEMBLED_VECTOR").setOutputCols(["pred"]).setDropLast(False)
+onehot.linkFrom(inOp1)
+predictBatch.linkFrom(onehot, inOp1)
+[model,predict] = collectToDataframes(onehot, predictBatch)
+print(model)
+print(predict)
+
+predictStream = OneHotPredictStreamOp(onehot).setSelectedCols(["double", "bool"]).setEncode("ASSEMBLED_VECTOR").setOutputCols(["vec"])
+predictStream.linkFrom(inOp2)
+predictStream.print(refreshInterval=-1)
 StreamOperator.execute()
 ```
 #### Result
 
 ```python
-  weight predicted_r
-0       1    $6$4:1.0
-1       1    $6$3:1.0
-2       1    $6$2:1.0
-3       1    $6$3:1.0
-4       1    $6$1:1.0
-5       1    $6$3:1.0
-6       1    $6$1:1.0
-7       1    $6$0:1.0
+   double   bool  number str            pred
+0     1.1   True       2   A  $6$0:1.0 3:1.0
+1     1.1  False       2   B  $6$0:1.0 5:1.0
+2     1.1   True       1   B  $6$0:1.0 3:1.0
+3     2.2   True       1   A  $6$2:1.0 3:1.0
 
 ```
-
-
-
-
-
-
diff --git a/docs/en/onehotpredictstreamop.md b/docs/en/onehotpredictstreamop.md
index 948bf1f05..6041127d5 100644
--- a/docs/en/onehotpredictstreamop.md
+++ b/docs/en/onehotpredictstreamop.md
@@ -6,56 +6,50 @@
 ## Parameters
 | Name | Description | Type | Required？ | Default Value |
 | --- | --- | --- | --- | --- |
+| handleInvalid |  Strategy to handle unseen token when doing prediction, one of "keep", "skip" or "error" | String | | "keep" |
+| encode | Encode method，"INDEX", "VECTOR", "ASSEMBLED_VECTOR" | String |   |INDEX |
+| dropLast | drop last | Boolean |  | true |
+| selectedCols | Names of the columns used for processing | String[] |  |  |
+| outputCols | Names of the output columns | String[] |  | null |
 | reservedCols | Names of the columns to be retained in the output table | String[] |  | null |
-| outputCol | Name of the output column | String | ✓ |  |
-
 
 ## Script Example
 #### Script
 ```python
+import numpy as np
+import pandas as pd
 data = np.array([
-    ["assisbragasm", 1],
-    ["assiseduc", 1],
-    ["assist", 1],
-    ["assiseduc", 1],
-    ["assistebrasil", 1],
-    ["assiseduc", 1],
-    ["assistebrasil", 1],
-    ["assistencialgsamsung", 1]
+    [1.1, True, "2", "A"],
+    [1.1, False, "2", "B"],
+    [1.1, True, "1", "B"],
+    [2.2, True, "1", "A"]
 ])
-
-# load data
-df = pd.DataFrame({"query": data[:, 0], "weight": data[:, 1]})
-
-inOp = dataframeToOperator(df, schemaStr='query string, weight long', op_type='batch')
-
-# one hot train
-one_hot = OneHotTrainBatchOp().setSelectedCols(["query"]).setDropLast(False).setIgnoreNull(False)
-model = inOp.link(one_hot)
-
-# batch predict
-predictor = OneHotPredictBatchOp().setOutputCol("predicted_r").setReservedCols(["weight"])
-print(BatchOperator.collectToDataframe(predictor.linkFrom(model, inOp)))
-
-# stream predict
-inOp2 = dataframeToOperator(df, schemaStr='query string, weight long', op_type='stream')
-predictor = OneHotPredictStreamOp(model).setOutputCol("predicted_r").setReservedCols(["weight"])
-predictor.linkFrom(inOp2).print()
-
+df = pd.DataFrame({"double": data[:, 0], "bool": data[:, 1], "number": data[:, 2], "str": data[:, 3]})
+
+inOp1 = BatchOperator.fromDataframe(df, schemaStr='double double, bool boolean, number int, str string')
+inOp2 = StreamOperator.fromDataframe(df, schemaStr='double double, bool boolean, number int, str string')
+
+onehot = OneHotTrainBatchOp().setSelectedCols(["double", "bool", "number", "str"]).setDiscreteThresholds(2)
+predictBatch = OneHotPredictBatchOp().setSelectedCols(["double", "bool"]).setEncode("ASSEMBLED_VECTOR").setOutputCols(["pred"]).setDropLast(False)
+onehot.linkFrom(inOp1)
+predictBatch.linkFrom(onehot, inOp1)
+[model,predict] = collectToDataframes(onehot, predictBatch)
+print(model)
+print(predict)
+
+predictStream = OneHotPredictStreamOp(onehot).setSelectedCols(["double", "bool"]).setEncode("ASSEMBLED_VECTOR").setOutputCols(["vec"])
+predictStream.linkFrom(inOp2)
+predictStream.print(refreshInterval=-1)
 StreamOperator.execute()
 ```
 #### Result
 
 ```python
-  weight predicted_r
-0       1    $6$4:1.0
-1       1    $6$3:1.0
-2       1    $6$2:1.0
-3       1    $6$3:1.0
-4       1    $6$1:1.0
-5       1    $6$3:1.0
-6       1    $6$1:1.0
-7       1    $6$0:1.0
+   double   bool  number str            pred
+0     1.1   True       2   A  $6$0:1.0 3:1.0
+1     1.1  False       2   B  $6$0:1.0 5:1.0
+2     1.1   True       1   B  $6$0:1.0 3:1.0
+3     2.2   True       1   A  $6$2:1.0 3:1.0
 
 ```
 
diff --git a/docs/en/onehottrainbatchop.md b/docs/en/onehottrainbatchop.md
index 8216d5a1f..42345e273 100644
--- a/docs/en/onehottrainbatchop.md
+++ b/docs/en/onehottrainbatchop.md
@@ -6,57 +6,47 @@ One-hot maps a serial of columns of category indices to a column of
 ## Parameters
 | Name | Description | Type | Required？ | Default Value |
 | --- | --- | --- | --- | --- |
-| dropLast | drop last | Boolean |  | true |
-| ignoreNull | ignore null | Boolean |  | false |
-| selectedCols | Names of the columns used for processing | String[] | ✓ |  |
-
+| discreteThresholdsArray | discrete thresholds array | Integer[] |  | |
+| discreteThresholds | discrete thresholds array | Integer |  | Integer.MIN_VALUE |
+| selectedCols | Names of the columns used for processing | String[] |  |  |
 
 ## Script Example
 #### Script
 ```python
+import numpy as np
+import pandas as pd
 data = np.array([
-    ["assisbragasm", 1],
-    ["assiseduc", 1],
-    ["assist", 1],
-    ["assiseduc", 1],
-    ["assistebrasil", 1],
-    ["assiseduc", 1],
-    ["assistebrasil", 1],
-    ["assistencialgsamsung", 1]
+    [1.1, True, "2", "A"],
+    [1.1, False, "2", "B"],
+    [1.1, True, "1", "B"],
+    [2.2, True, "1", "A"]
 ])
-
-# load data
-df = pd.DataFrame({"query": data[:, 0], "weight": data[:, 1]})
-
-inOp = dataframeToOperator(df, schemaStr='query string, weight long', op_type='batch')
-
-# one hot train
-one_hot = OneHotTrainBatchOp().setSelectedCols(["query"]).setDropLast(False).setIgnoreNull(False)
-model = inOp.link(one_hot)
-
-# batch predict
-predictor = OneHotPredictBatchOp().setOutputCol("predicted_r").setReservedCols(["weight"])
-print(BatchOperator.collectToDataframe(predictor.linkFrom(model, inOp)))
-
-# stream predict
-inOp2 = dataframeToOperator(df, schemaStr='query string, weight long', op_type='stream')
-predictor = OneHotPredictStreamOp(model).setOutputCol("predicted_r").setReservedCols(["weight"])
-predictor.linkFrom(inOp2).print()
-
+df = pd.DataFrame({"double": data[:, 0], "bool": data[:, 1], "number": data[:, 2], "str": data[:, 3]})
+
+inOp1 = BatchOperator.fromDataframe(df, schemaStr='double double, bool boolean, number int, str string')
+inOp2 = StreamOperator.fromDataframe(df, schemaStr='double double, bool boolean, number int, str string')
+
+onehot = OneHotTrainBatchOp().setSelectedCols(["double", "bool", "number", "str"]).setDiscreteThresholds(2)
+predictBatch = OneHotPredictBatchOp().setSelectedCols(["double", "bool"]).setEncode("ASSEMBLED_VECTOR").setOutputCols(["pred"]).setDropLast(False)
+onehot.linkFrom(inOp1)
+predictBatch.linkFrom(onehot, inOp1)
+[model,predict] = collectToDataframes(onehot, predictBatch)
+print(model)
+print(predict)
+
+predictStream = OneHotPredictStreamOp(onehot).setSelectedCols(["double", "bool"]).setEncode("ASSEMBLED_VECTOR").setOutputCols(["vec"])
+predictStream.linkFrom(inOp2)
+predictStream.print(refreshInterval=-1)
 StreamOperator.execute()
 ```
 #### Result
 
 ```python
-  weight predicted_r
-0       1    $6$4:1.0
-1       1    $6$3:1.0
-2       1    $6$2:1.0
-3       1    $6$3:1.0
-4       1    $6$1:1.0
-5       1    $6$3:1.0
-6       1    $6$1:1.0
-7       1    $6$0:1.0
+   double   bool  number str            pred
+0     1.1   True       2   A  $6$0:1.0 3:1.0
+1     1.1  False       2   B  $6$0:1.0 5:1.0
+2     1.1   True       1   B  $6$0:1.0 3:1.0
+3     2.2   True       1   A  $6$2:1.0 3:1.0
 
 ```
 
diff --git a/docs/en/quantilediscretizer.md b/docs/en/quantilediscretizer.md
index b972a5d4a..d422e0045 100644
--- a/docs/en/quantilediscretizer.md
+++ b/docs/en/quantilediscretizer.md
@@ -9,10 +9,13 @@ Quantile discretizer calculate the q-quantile as the interval, output the interv
 | selectedCols | Names of the columns used for processing | String[] | ✓ |  |
 | numBuckets | number of buckets | Integer |  | 2 |
 | numBucketsArray | Array of num bucket | Integer[] |  | null |
+| leftOpen | left open | Boolean | | true |
 | selectedCols | Names of the columns used for processing | String[] | ✓ |  |
 | reservedCols | Names of the columns to be retained in the output table | String[] |  | null |
 | outputCols | Names of the output columns | String[] |  | null |
-
+| handleInvalid |  Strategy to handle unseen token when doing prediction, one of "keep", "skip" or "error" | String | | "keep" |
+| encode | Encode method，"INDEX", "VECTOR", "ASSEMBLED_VECTOR" | String |   |INDEX |
+| dropLast | drop last | Boolean |  | true |
 
 ## Script Example
 
diff --git a/docs/en/quantilediscretizerpredictbatchop.md b/docs/en/quantilediscretizerpredictbatchop.md
index 21db72570..0999d200d 100644
--- a/docs/en/quantilediscretizerpredictbatchop.md
+++ b/docs/en/quantilediscretizerpredictbatchop.md
@@ -7,6 +7,9 @@ The batch operator that predict the data using the quantile discretizer model.
 | selectedCols | Names of the columns used for processing | String[] | ✓ |  |
 | reservedCols | Names of the columns to be retained in the output table | String[] |  | null |
 | outputCols | Names of the output columns | String[] |  | null |
+| handleInvalid |  Strategy to handle unseen token when doing prediction, one of "keep", "skip" or "error" | String | | "keep" |
+| encode | Encode method，"INDEX", "VECTOR", "ASSEMBLED_VECTOR" | String |   |INDEX |
+| dropLast | drop last | Boolean |  | true |
 
 
 ## Script Example
diff --git a/docs/en/quantilediscretizerpredictstreamop.md b/docs/en/quantilediscretizerpredictstreamop.md
index d0f75aff6..13efed22a 100644
--- a/docs/en/quantilediscretizerpredictstreamop.md
+++ b/docs/en/quantilediscretizerpredictstreamop.md
@@ -7,7 +7,9 @@ The stream operator that predict the data using the quantile discretizer model.
 | selectedCols | Names of the columns used for processing | String[] | ✓ |  |
 | reservedCols | Names of the columns to be retained in the output table | String[] |  | null |
 | outputCols | Names of the output columns | String[] |  | null |
-
+| handleInvalid |  Strategy to handle unseen token when doing prediction, one of "keep", "skip" or "error" | String | | "keep" |
+| encode | Encode method，"INDEX", "VECTOR", "ASSEMBLED_VECTOR" | String |   |INDEX |
+| dropLast | drop last | Boolean |  | true |
 
 ## Script Example
 
diff --git a/docs/en/quantilediscretizertrainbatchop.md b/docs/en/quantilediscretizertrainbatchop.md
index 31b07edfe..1b5399b98 100644
--- a/docs/en/quantilediscretizertrainbatchop.md
+++ b/docs/en/quantilediscretizertrainbatchop.md
@@ -7,6 +7,7 @@ Fit a quantile discretizer model.
 | selectedCols | Names of the columns used for processing | String[] | ✓ |  |
 | numBuckets | number of buckets | Integer |  | 2 |
 | numBucketsArray | Array of num bucket | Integer[] |  | null |
+| leftOpen | left open | Boolean | | true |
 
 
 ## Script Example
diff --git a/pyalink/adult.ipynb b/pyalink/adult.ipynb
index a70a6e786..6736fa151 100644
--- a/pyalink/adult.ipynb
+++ b/pyalink/adult.ipynb
@@ -12,20 +12,18 @@
      "output_type": "stream",
      "text": [
       "\n",
-      "Use one of the following command to start using pyalink:\n",
-      "使用以下一条命令来开始使用 pyalink：\n",
+      "Use one of the following commands to start using PyAlink:\n",
       " - useLocalEnv(parallelism, flinkHome=None, config=None)\n",
       " - useRemoteEnv(host, port, parallelism, flinkHome=None, localIp=\"localhost\", config=None)\n",
       "Call resetEnv() to reset environment and switch to another.\n",
-      "使用 resetEnv() 来重置运行环境，并切换到另一个。\n",
       "\n",
-      "JVM listening on 127.0.0.1:57247\n"
+      "JVM listening on 127.0.0.1:64158\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "JavaObject id=o6"
+       "MLEnv(benv=<pyflink.dataset.execution_environment.ExecutionEnvironment object at 0x120796f60>, btenv=<pyflink.table.table_environment.BatchTableEnvironment object at 0x111e94dd8>, senv=<pyflink.datastream.stream_execution_environment.StreamExecutionEnvironment object at 0x120796d68>, stenv=<pyflink.table.table_environment.StreamTableEnvironment object at 0x1208069b0>)"
       ]
      },
      "execution_count": 1,
@@ -84,7 +82,7 @@
     "numerialColNames = [\"age\", \"fnlwgt\", \"education_num\", \"capital_gain\",\n",
     "                    \"capital_loss\", \"hours_per_week\"]\n",
     "onehot = OneHotEncoder().setSelectedCols(categoricalColNames) \\\n",
-    "        .setOutputCol(\"output\").setReservedCols(numerialColNames + [\"label\"])\n",
+    "        .setOutputCols([\"output\"]).setReservedCols(numerialColNames + [\"label\"])\n",
     "assembler = VectorAssembler().setSelectedCols([\"output\"] + numerialColNames) \\\n",
     "        .setOutputCol(\"vec\").setReservedCols([\"label\"])\n",
     "pipeline = Pipeline().add(onehot).add(assembler)"
@@ -129,20 +127,20 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "AUC: 0.9071346253140332\n",
-      "KS: 0.6508855101121852\n",
-      "PRC: 0.7654668375809972\n",
-      "Precision: 0.7311696264543784\n",
-      "Recall: 0.609105981379926\n",
-      "F1: 0.6645794197453558\n",
-      "ConfusionMatrix: [[4776, 1756], [3065, 22964]]\n",
+      "AUC: 0.9066240193960077\n",
+      "KS: 0.6495268264606959\n",
+      "PRC: 0.7662328278289783\n",
+      "Precision: 0.733230531996916\n",
+      "Recall: 0.6064277515623008\n",
+      "F1: 0.6638280050258272\n",
+      "ConfusionMatrix: [[4755, 1730], [3086, 22990]]\n",
       "LabelArray: ['>50K', '<=50K']\n",
-      "LogLoss: 0.31880016560096547\n",
+      "LogLoss: 0.3192012545654014\n",
       "TotalSamples: 32561\n",
       "ActualLabelProportion: [0.2408095574460244, 0.7591904425539756]\n",
       "ActualLabelFrequency: [7841, 24720]\n",
-      "Accuracy: 0.8519394367494856\n",
-      "Kappa: 0.5705912048680206\n"
+      "Accuracy: 0.8520929946868954\n",
+      "Kappa: 0.5701036372627706\n"
      ]
     }
    ],
@@ -180,7 +178,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.4"
+   "version": "3.7.0"
   }
  },
  "nbformat": 4,