Skip to content

Commit

Permalink
Merge pull request #583 from bluesjjw/Auto-ML-jeremy
Browse files Browse the repository at this point in the history
 wrapper of spark.ml transformer 0a38e82
  • Loading branch information
paynie authored Dec 14, 2018
2 parents fa00dab + d194a94 commit 256b413
Show file tree
Hide file tree
Showing 5 changed files with 214 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*
* Tencent is pleased to support the open source community by making Angel available.
*
* Copyright (C) 2017-2018 THL A29 Limited, a Tencent company. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
*
* https://opensource.org/licenses/Apache-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*
*/


package com.tencent.angel.spark.ml.automl.feature

import org.apache.spark.ml.PipelineStage
import org.apache.spark.ml.Transformer

object PipelineBuilder {

def declareFields(pipeline: Array[PipelineStage]): Unit = {

}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/*
* Tencent is pleased to support the open source community by making Angel available.
*
* Copyright (C) 2017-2018 THL A29 Limited, a Tencent company. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
*
* https://opensource.org/licenses/Apache-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*
*/


package com.tencent.angel.spark.ml.automl.feature

import org.apache.spark.ml.Transformer

import scala.collection.mutable.ArrayBuffer

abstract class TransformerWrapper {

val transformer: Transformer
var parentTransformer: Transformer

val requiredInputCols: Array[String]
val requiredOutputCols: Array[String]

val inputCols: ArrayBuffer[String]
val outputCols: ArrayBuffer[String]

var parentCols: Array[String]

def getTransformer: Transformer = transformer

def setParent(parent: Transformer) = parentTransformer = parent

def hasInputCol: Boolean

def hasOutputCol: Boolean

def getInputCols: Array[String] = inputCols.toArray

def getOutputCols: Array[String] = outputCols.toArray

def addInputCol(col: String): Unit = inputCols += col

def addOutputCol(col: String): Unit = outputCols += col

def setParentCols: Array[String] = parentCols

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/*
* Tencent is pleased to support the open source community by making Angel available.
*
* Copyright (C) 2017-2018 THL A29 Limited, a Tencent company. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
*
* https://opensource.org/licenses/Apache-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*
*/


package com.tencent.angel.spark.ml.automl.feature.preprocess

import com.tencent.angel.spark.ml.automl.feature.TransformerWrapper
import org.apache.spark.ml.Transformer

import scala.collection.mutable.ArrayBuffer

class SamplerWrapper(fraction: Double) extends TransformerWrapper {

override val transformer: Transformer = new Sampler(fraction)
override var parentTransformer: Transformer = _

override val requiredInputCols: Array[String] = null
override val requiredOutputCols: Array[String] = null

override val inputCols: ArrayBuffer[String] = new ArrayBuffer[String]()
override val outputCols: ArrayBuffer[String] = new ArrayBuffer[String]()

override var parentCols: Array[String] = _

override def hasInputCol: Boolean = true

override def hasOutputCol: Boolean = false

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*
* Tencent is pleased to support the open source community by making Angel available.
*
* Copyright (C) 2017-2018 THL A29 Limited, a Tencent company. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
*
* https://opensource.org/licenses/Apache-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*
*/

package com.tencent.angel.spark.ml.automl.feature.preprocess

import com.tencent.angel.spark.ml.automl.feature.TransformerWrapper
import org.apache.spark.ml.Transformer

import scala.collection.mutable.ArrayBuffer
import org.apache.spark.ml.feature.StopWordsRemover

class StopWordsRemoverWrapper extends TransformerWrapper {

override val transformer: Transformer = new StopWordsRemover()
override var parentTransformer: Transformer = _

override val requiredInputCols: Array[String] = Array("words")
override val requiredOutputCols: Array[String] = Array("filteredwords")

override val inputCols: ArrayBuffer[String] = _
override val outputCols: ArrayBuffer[String] = _

override var parentCols: Array[String] = _

override def hasInputCol: Boolean = true

override def hasOutputCol: Boolean = true
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/*
* Tencent is pleased to support the open source community by making Angel available.
*
* Copyright (C) 2017-2018 THL A29 Limited, a Tencent company. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
*
* https://opensource.org/licenses/Apache-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*
*/


package com.tencent.angel.spark.ml.automl.feature.preprocess

import com.tencent.angel.spark.ml.automl.feature.TransformerWrapper
import org.apache.spark.ml.Transformer

import scala.collection.mutable.ArrayBuffer
import org.apache.spark.ml.feature.Tokenizer

class TokenizerWrapper extends TransformerWrapper {

override val transformer: Transformer = new Tokenizer()
override var parentTransformer: Transformer = _

override val requiredInputCols: Array[String] = Array("sentence")
override val requiredOutputCols: Array[String] = Array("words")

override val inputCols: ArrayBuffer[String] = _
override val outputCols: ArrayBuffer[String] = _

override var parentCols: Array[String] = _

override def hasInputCol: Boolean = true

override def hasOutputCol: Boolean = true
}

0 comments on commit 256b413

Please sign in to comment.