Skip to content

Commit

Permalink
Refine pca, feature selector model info. see #117
Browse files Browse the repository at this point in the history
  • Loading branch information
cainingnk committed Jul 23, 2020
1 parent 16bf20c commit 70b30af
Show file tree
Hide file tree
Showing 27 changed files with 1,169 additions and 539 deletions.
Original file line number Diff line number Diff line change
@@ -1,15 +1,26 @@
package com.alibaba.alink.operator.batch.feature;

import com.alibaba.alink.common.lazy.WithModelInfoBatchOp;
import com.alibaba.alink.common.utils.DataSetConversionUtil;
import com.alibaba.alink.operator.batch.BatchOperator;
import com.alibaba.alink.operator.common.feature.ChiSqSelectorModelDataConverter;
import com.alibaba.alink.operator.common.feature.ChisqSelectorModelInfo;
import com.alibaba.alink.operator.common.feature.ChisqSelectorModelInfoBatchOp;
import com.alibaba.alink.operator.common.feature.ChisqSelectorUtil;
import com.alibaba.alink.operator.common.statistics.ChiSquareTestUtil;
import com.alibaba.alink.operator.batch.BatchOperator;
import org.apache.flink.ml.api.misc.param.Params;
import com.alibaba.alink.params.feature.ChiSqSelectorParams;
import org.apache.flink.util.Preconditions;

import org.apache.flink.api.java.DataSet;
import org.apache.flink.ml.api.misc.param.Params;
import org.apache.flink.types.Row;

/**
* chi-square selector for table.
*/
public final class ChiSqSelectorBatchOp extends BatchOperator<ChiSqSelectorBatchOp>
implements ChiSqSelectorParams<ChiSqSelectorBatchOp> {
implements ChiSqSelectorParams<ChiSqSelectorBatchOp>,
WithModelInfoBatchOp<ChisqSelectorModelInfo, ChiSqSelectorBatchOp, ChisqSelectorModelInfoBatchOp> {

private static final long serialVersionUID = 942267749590810559L;

public ChiSqSelectorBatchOp() {
super(null);
Expand All @@ -32,23 +43,23 @@ public ChiSqSelectorBatchOp linkFrom(BatchOperator<?>... inputs) {
double fdr = getParams().get(FDR);
double fwe = getParams().get(FWE);

setOutputTable(ChiSquareTestUtil.selector(in, selectedColNames, labelColName,
selectorType, numTopFeatures, percentile, fpr, fdr, fwe));
DataSet<Row> chiSquareTest =
ChiSquareTestUtil.test(in, selectedColNames, labelColName);

return this;
}
DataSet<Row> model = chiSquareTest.mapPartition(
new ChisqSelectorUtil.ChiSquareSelector(selectedColNames, selectorType, numTopFeatures, percentile, fpr, fdr, fwe))
.name("FilterFeature")
.setParallelism(1);

setOutputTable(DataSetConversionUtil.toTable(in.getMLEnvironmentId(), model, new ChiSqSelectorModelDataConverter().getModelSchema()));

public String[] collectResult() {
Preconditions.checkArgument(null != this.getOutputTable(), "Please link from or link to.");
return this;
}

int[] indices = new ChiSqSelectorModelDataConverter().load(this.collect());

String[] selectedColNames = new String[indices.length];
for (int i = 0; i < indices.length; i++) {
selectedColNames[i] = this.getSelectedCols()[i];
}
return selectedColNames;
@Override
public ChisqSelectorModelInfoBatchOp getModelInfoBatchOp() {
return new ChisqSelectorModelInfoBatchOp(getParams()).linkFrom(this);
}

}
Loading

0 comments on commit 70b30af

Please sign in to comment.