From 45d5a84959294cd4cbaae674f6e0d7e3c758cebd Mon Sep 17 00:00:00 2001 From: Helw150 Date: Mon, 30 Jul 2018 16:02:47 -0400 Subject: [PATCH 01/14] Multiple Eval Functions --- .../dmlc/xgboost4j/java/BoosterResults.java | 43 ++++++++++ .../java/ml/dmlc/xgboost4j/java/XGBoost.java | 65 ++++++++++++-- .../ml/dmlc/xgboost4j/scala/XGBoost.scala | 84 ++++++++++++++++--- 3 files changed, 170 insertions(+), 22 deletions(-) create mode 100644 jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/BoosterResults.java diff --git a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/BoosterResults.java b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/BoosterResults.java new file mode 100644 index 000000000000..893ca6c7d54f --- /dev/null +++ b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/BoosterResults.java @@ -0,0 +1,43 @@ +package ml.dmlc.xgboost4j.java; + +import java.io.*; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import com.esotericsoftware.kryo.Kryo; +import com.esotericsoftware.kryo.KryoSerializable; +import com.esotericsoftware.kryo.io.Input; +import com.esotericsoftware.kryo.io.Output; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +public class BoosterResults implements Serializable, KryoSerializable { + private Booster booster; + private String[] logInfos; + + public BoosterResults(Booster booster, String[] logInfos) { + this.booster = booster; + this.logInfos = logInfos; + } + + public Booster getBooster() { + return this.booster; + } + + public String[] getLogInfos() { + return this.logInfos; + } + + @Override + public void write(Kryo kryo, Output output) { + kryo.writeObject(output, booster); + kryo.writeObject(output, logInfos); + } + + @Override + public void read(Kryo kryo, Input input) { + booster = kryo.readObject(input, Booster.class); + logInfos = kryo.readObject(input, String[].class); + } +} diff --git a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/XGBoost.java b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/XGBoost.java index df030105d4d1..0e1fa6d08561 100644 --- a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/XGBoost.java +++ b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/XGBoost.java @@ -76,7 +76,7 @@ public static Booster train( Map watches, IObjective obj, IEvaluation eval) throws XGBoostError { - return train(dtrain, params, round, watches, null, obj, eval, 0); + return train(dtrain, params, round, watches, null, obj, new IEvaluation[]{eval}, 0); } /** @@ -105,7 +105,7 @@ public static Booster train( IObjective obj, IEvaluation eval, int earlyStoppingRound) throws XGBoostError { - return train(dtrain, params, round, watches, metrics, obj, eval, earlyStoppingRound, null); + return train(dtrain, params, round, watches, metrics, obj, new IEvaluation[]{eval}, earlyStoppingRound, null); } /** @@ -134,14 +134,28 @@ public static Booster train( float[][] metrics, IObjective obj, IEvaluation eval, - int earlyStoppingRound, - Booster booster) throws XGBoostError { + int earlyStoppingRound, + Booster booster) throws XGBoostError { + return train(dtrain, params, round, watches, metrics, obj, new IEvaluation[]{eval}, earlyStoppingRound, booster); + } + + public static BoosterResults trainWithResults( + DMatrix dtrain, + Map params, + int round, + Map watches, + float[][] metrics, + IObjective obj, + IEvaluation[] evals, + int earlyStoppingRound, + Booster booster) throws XGBoostError { //collect eval matrixs String[] evalNames; DMatrix[] evalMats; List names = new ArrayList(); List mats = new ArrayList(); + List logLines = new ArrayList(); for (Map.Entry evalEntry : watches.entrySet()) { names.add(evalEntry.getKey()); @@ -187,11 +201,16 @@ public static Booster train( //evaluation if (evalMats.length > 0) { float[] metricsOut = new float[evalMats.length]; - String evalInfo; - if (eval != null) { - evalInfo = booster.evalSet(evalMats, evalNames, eval, metricsOut); + String evalInfo = ""; + if (evals != null && evals.length > 1) { + for (int i = 0; i < evals.length; i++) { + String evalLine = booster.evalSet(evalMats, evalNames, evals[i]); + evalInfo = evalInfo + " " + evalLine; + } + logLines.add(evalInfo); } else { evalInfo = booster.evalSet(evalMats, evalNames, iter, metricsOut); + logLines.add(evalInfo); } for (int i = 0; i < metricsOut.length; i++) { metrics[i][iter] = metricsOut[i]; @@ -207,14 +226,42 @@ public static Booster train( Rabit.trackerPrint(String.format( "early stopping after %d decreasing rounds", earlyStoppingRound)); break; - } + } + if (Rabit.getRank() == 0) { Rabit.trackerPrint(evalInfo + '\n'); } } booster.saveRabitCheckpoint(); } - return booster; + BoosterResults results = new BoosterResults(booster, + logLines.toArray(new String[logLines.size()])); + return results; + } + + /** + * Train a booster with given parameters. + * + * @param dtrain Data to be trained. + * @param params Booster params. + * @param round Number of boosting iterations. + * @param watches a group of items to be evaluated during training, this allows user to watch + * performance on the validation set. + * @param obj customized objective (set to null if not used) + * @param eval customized evaluation (set to null if not used) + * @return trained booster + * @throws XGBoostError native error + */ + public static Booster train( + DMatrix dtrain, + Map params, + int round, + Map watches, + IObjective obj, + IEvaluation[] evals) throws XGBoostError { + + BoosterResults results = trainWithResults(dtrain, params, round, watches, obj, evals); + return results.getBooster(); } /** diff --git a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala index 609d7b2cde8c..401baf8f81fb 100644 --- a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala +++ b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala @@ -18,7 +18,7 @@ package ml.dmlc.xgboost4j.scala import java.io.InputStream -import ml.dmlc.xgboost4j.java.{Booster => JBooster, XGBoost => JXGBoost, XGBoostError} +import ml.dmlc.xgboost4j.java.{Booster => JBooster, XGBoost => JXGBoost, XGBoostError, , BoosterResults, IEvaluation} import scala.collection.JavaConverters._ /** @@ -26,6 +26,32 @@ import scala.collection.JavaConverters._ */ object XGBoost { + @throws(classOf[XGBoostError]) + def trainWithResults( + dtrain: DMatrix, + params: Map[String, Any], + round: Int, + watches: Map[String, DMatrix] = Map[String, DMatrix](), + obj: ObjectiveTrait = null, + evals: Array[IEvaluation] = null, + earlyStoppingRound: Int = 0, + booster: Booster = null): BoosterResults = { + + + val jWatches = watches.map{case (name, matrix) => (name, matrix.jDMatrix)} + val jBooster = if (booster == null) { + null + } else { + booster.booster + } + val xgboostResults = JXGBoost.trainWithResults( + dtrain.jDMatrix, + // we have to filter null value for customized obj and eval + params.filter(_._2 != null).mapValues(_.toString.asInstanceOf[AnyRef]).asJava, + round, jWatches, metrics, obj, eval, earlyStoppingRound, jBooster) + xgboostResults + } + /** * Train a booster given parameters. * @@ -40,7 +66,7 @@ object XGBoost { * after a specified number of consecutive * increases in any evaluation metric. * @param obj customized objective - * @param eval customized evaluation + * @param evals customized evaluation * @param booster train from scratch if set to null; train from an existing booster if not null. * @return The trained booster. */ @@ -52,22 +78,54 @@ object XGBoost { watches: Map[String, DMatrix] = Map(), metrics: Array[Array[Float]] = null, obj: ObjectiveTrait = null, - eval: EvalTrait = null, + evals: Array[IEvaluation] = null earlyStoppingRound: Int = 0, booster: Booster = null): Booster = { - val jWatches = watches.mapValues(_.jDMatrix).asJava - val jBooster = if (booster == null) { - null +): Booster = { + + val xgboostResults = trainWithResults(dtrain, params, round, watches, obj, evals, earlyStoppingRound, booster) + if (booster == null) { + new Booster(xgboostResults.getBooster()) } else { - booster.booster + // Avoid creating a new SBooster with the same JBooster + booster } - val xgboostInJava = JXGBoost.train( - dtrain.jDMatrix, - // we have to filter null value for customized obj and eval - params.filter(_._2 != null).mapValues(_.toString.asInstanceOf[AnyRef]).asJava, - round, jWatches, metrics, obj, eval, earlyStoppingRound, jBooster) + } + + /** + * Train a booster given parameters. + * + * @param dtrain Data to be trained. + * @param params Parameters. + * @param round Number of boosting iterations. + * @param watches a group of items to be evaluated during training, this allows user to watch + * performance on the validation set. + * @param metrics array containing the evaluation metrics for each matrix in watches for each + * iteration + * @param earlyStoppingRound if non-zero, training would be stopped + * after a specified number of consecutive + * increases in any evaluation metric. + * @param obj customized objective + * @param eval customized evaluation + * @param booster train from scratch if set to null; train from an existing booster if not null. + * @return The trained booster. + */ + @throws(classOf[XGBoostError]) + def train( + dtrain: DMatrix, + params: Map[String, Any], + round: Int, + watches: Map[String, DMatrix] = Map(), + metrics: Array[Array[Float]] = null, + obj: ObjectiveTrait = null, + eval: IEvaluation = null + earlyStoppingRound: Int = 0, + booster: Booster = null): Booster = { +): Booster = { + + val xgboostResults = trainWithResults(dtrain, params, round, watches, obj, Array(eval), earlyStoppingRound, booster) if (booster == null) { - new Booster(xgboostInJava) + new Booster(xgboostResults.getBooster()) } else { // Avoid creating a new SBooster with the same JBooster booster From 170a6c4e888e15c0270a629274c554f43dd84911 Mon Sep 17 00:00:00 2001 From: Helw150 Date: Mon, 30 Jul 2018 16:15:12 -0400 Subject: [PATCH 02/14] Forgot Metrics Out --- .../xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/XGBoost.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/XGBoost.java b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/XGBoost.java index 0e1fa6d08561..2fb9a1112563 100644 --- a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/XGBoost.java +++ b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/XGBoost.java @@ -204,7 +204,7 @@ public static BoosterResults trainWithResults( String evalInfo = ""; if (evals != null && evals.length > 1) { for (int i = 0; i < evals.length; i++) { - String evalLine = booster.evalSet(evalMats, evalNames, evals[i]); + String evalLine = booster.evalSet(evalMats, evalNames, evals[i], metricsOut); evalInfo = evalInfo + " " + evalLine; } logLines.add(evalInfo); From 40fc3d1d1a66b03b45d2b056793fcf6f09575dad Mon Sep 17 00:00:00 2001 From: Helw150 Date: Tue, 31 Jul 2018 10:46:24 -0400 Subject: [PATCH 03/14] Stupid Typos --- .../src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala index 401baf8f81fb..6a974859a139 100644 --- a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala +++ b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala @@ -18,7 +18,7 @@ package ml.dmlc.xgboost4j.scala import java.io.InputStream -import ml.dmlc.xgboost4j.java.{Booster => JBooster, XGBoost => JXGBoost, XGBoostError, , BoosterResults, IEvaluation} +import ml.dmlc.xgboost4j.java.{Booster => JBooster, XGBoost => JXGBoost, XGBoostError, BoosterResults, IEvaluation} import scala.collection.JavaConverters._ /** @@ -121,8 +121,6 @@ object XGBoost { eval: IEvaluation = null earlyStoppingRound: Int = 0, booster: Booster = null): Booster = { -): Booster = { - val xgboostResults = trainWithResults(dtrain, params, round, watches, obj, Array(eval), earlyStoppingRound, booster) if (booster == null) { new Booster(xgboostResults.getBooster()) From 257ba632c75245e83fb7c92b65f5b84360e4fb81 Mon Sep 17 00:00:00 2001 From: Helw150 Date: Tue, 31 Jul 2018 14:44:37 -0400 Subject: [PATCH 04/14] More Typos --- .../src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala index 6a974859a139..afcaf2a67f8a 100644 --- a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala +++ b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala @@ -78,10 +78,9 @@ object XGBoost { watches: Map[String, DMatrix] = Map(), metrics: Array[Array[Float]] = null, obj: ObjectiveTrait = null, - evals: Array[IEvaluation] = null + evals: Array[IEvaluation] = null, earlyStoppingRound: Int = 0, booster: Booster = null): Booster = { -): Booster = { val xgboostResults = trainWithResults(dtrain, params, round, watches, obj, evals, earlyStoppingRound, booster) if (booster == null) { From 3b3c2b1b99a3db9c91c20992bf321a876b774ce4 Mon Sep 17 00:00:00 2001 From: Helw150 Date: Wed, 1 Aug 2018 12:41:06 -0400 Subject: [PATCH 05/14] Format with ScalaFmt and google-java-format --- .../java/ml/dmlc/xgboost4j/java/XGBoost.java | 215 ++++++++++-------- .../ml/dmlc/xgboost4j/scala/XGBoost.scala | 81 +++++-- 2 files changed, 181 insertions(+), 115 deletions(-) diff --git a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/XGBoost.java b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/XGBoost.java index 2fb9a1112563..f2196cb349e0 100644 --- a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/XGBoost.java +++ b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/XGBoost.java @@ -36,107 +36,123 @@ public class XGBoost { * @param modelPath booster modelPath (model generated by booster.saveModel) * @throws XGBoostError native error */ - public static Booster loadModel(String modelPath) - throws XGBoostError { + public static Booster loadModel(String modelPath) throws XGBoostError { return Booster.loadModel(modelPath); } /** - * Load a new Booster model from a file opened as input stream. - * The assumption is the input stream only contains one XGBoost Model. - * This can be used to load existing booster models saved by other xgboost bindings. + * Load a new Booster model from a file opened as input stream. The assumption is the input stream + * only contains one XGBoost Model. This can be used to load existing booster models saved by + * other xgboost bindings. * - * @param in The input stream of the file, - * will be closed after this function call. + * @param in The input stream of the file, will be closed after this function call. * @return The create boosted * @throws XGBoostError * @throws IOException */ - public static Booster loadModel(InputStream in) - throws XGBoostError, IOException { + public static Booster loadModel(InputStream in) throws XGBoostError, IOException { return Booster.loadModel(in); } /** * Train a booster given parameters. * - * @param dtrain Data to be trained. - * @param params Parameters. - * @param round Number of boosting iterations. + * @param dtrain Data to be trained. + * @param params Parameters. + * @param round Number of boosting iterations. * @param watches a group of items to be evaluated during training, this allows user to watch - * performance on the validation set. - * @param obj customized objective - * @param eval customized evaluation + * performance on the validation set. + * @param obj customized objective + * @param eval customized evaluation * @return The trained booster. */ public static Booster train( - DMatrix dtrain, - Map params, - int round, - Map watches, - IObjective obj, - IEvaluation eval) throws XGBoostError { - return train(dtrain, params, round, watches, null, obj, new IEvaluation[]{eval}, 0); + DMatrix dtrain, + Map params, + int round, + Map watches, + IObjective obj, + IEvaluation eval) + throws XGBoostError { + return train(dtrain, params, round, watches, null, obj, new IEvaluation[] {eval}, 0); } /** * Train a booster given parameters. * - * @param dtrain Data to be trained. - * @param params Parameters. - * @param round Number of boosting iterations. + * @param dtrain Data to be trained. + * @param params Parameters. + * @param round Number of boosting iterations. * @param watches a group of items to be evaluated during training, this allows user to watch - * performance on the validation set. + * performance on the validation set. * @param metrics array containing the evaluation metrics for each matrix in watches for each - * iteration - * @param earlyStoppingRound if non-zero, training would be stopped - * after a specified number of consecutive - * increases in any evaluation metric. - * @param obj customized objective - * @param eval customized evaluation + * iteration + * @param earlyStoppingRound if non-zero, training would be stopped after a specified number of + * consecutive increases in any evaluation metric. + * @param obj customized objective + * @param eval customized evaluation * @return The trained booster. */ public static Booster train( - DMatrix dtrain, - Map params, - int round, - Map watches, - float[][] metrics, - IObjective obj, - IEvaluation eval, - int earlyStoppingRound) throws XGBoostError { - return train(dtrain, params, round, watches, metrics, obj, new IEvaluation[]{eval}, earlyStoppingRound, null); + DMatrix dtrain, + Map params, + int round, + Map watches, + float[][] metrics, + IObjective obj, + IEvaluation eval, + int earlyStoppingRound) + throws XGBoostError { + return train( + dtrain, + params, + round, + watches, + metrics, + obj, + new IEvaluation[] {eval}, + earlyStoppingRound, + null); } /** * Train a booster given parameters. * - * @param dtrain Data to be trained. - * @param params Parameters. - * @param round Number of boosting iterations. + * @param dtrain Data to be trained. + * @param params Parameters. + * @param round Number of boosting iterations. * @param watches a group of items to be evaluated during training, this allows user to watch - * performance on the validation set. + * performance on the validation set. * @param metrics array containing the evaluation metrics for each matrix in watches for each - * iteration - * @param earlyStoppingRound if non-zero, training would be stopped - * after a specified number of consecutive - * increases in any evaluation metric. - * @param obj customized objective - * @param eval customized evaluation + * iteration + * @param earlyStoppingRound if non-zero, training would be stopped after a specified number of + * consecutive increases in any evaluation metric. + * @param obj customized objective + * @param eval customized evaluation * @param booster train from scratch if set to null; train from an existing booster if not null. * @return The trained booster. */ public static Booster train( - DMatrix dtrain, - Map params, - int round, - Map watches, - float[][] metrics, - IObjective obj, - IEvaluation eval, - int earlyStoppingRound, - Booster booster) throws XGBoostError { - return train(dtrain, params, round, watches, metrics, obj, new IEvaluation[]{eval}, earlyStoppingRound, booster); + DMatrix dtrain, + Map params, + int round, + Map watches, + float[][] metrics, + IObjective obj, + IEvaluation eval, + int earlyStoppingRound, + Booster booster) + throws XGBoostError { + return train( + dtrain, + params, + round, + watches, + metrics, + obj, + new IEvaluation[] {eval}, + earlyStoppingRound, + booster); } public static BoosterResults trainWithResults( @@ -148,9 +164,10 @@ public static BoosterResults trainWithResults( IObjective obj, IEvaluation[] evals, int earlyStoppingRound, - Booster booster) throws XGBoostError { + Booster booster) + throws XGBoostError { - //collect eval matrixs + // collect eval matrixs String[] evalNames; DMatrix[] evalMats; List names = new ArrayList(); @@ -166,7 +183,7 @@ public static BoosterResults trainWithResults( evalMats = mats.toArray(new DMatrix[mats.size()]); metrics = metrics == null ? new float[evalNames.length][round] : metrics; - //collect all data matrixs + // collect all data matrixs DMatrix[] allMats; if (evalMats.length > 0) { allMats = new DMatrix[evalMats.length + 1]; @@ -177,7 +194,7 @@ public static BoosterResults trainWithResults( allMats[0] = dtrain; } - //initialize booster + // initialize booster if (booster == null) { // Start training on a new booster booster = new Booster(params, allMats); @@ -187,7 +204,7 @@ public static BoosterResults trainWithResults( booster.setParams(params); } - //begin to train + // begin to train for (int iter = booster.getVersion() / 2; iter < round; iter++) { if (booster.getVersion() % 2 == 0) { if (obj != null) { @@ -198,13 +215,13 @@ public static BoosterResults trainWithResults( booster.saveRabitCheckpoint(); } - //evaluation + // evaluation if (evalMats.length > 0) { float[] metricsOut = new float[evalMats.length]; String evalInfo = ""; if (evals != null && evals.length > 1) { for (int i = 0; i < evals.length; i++) { - String evalLine = booster.evalSet(evalMats, evalNames, evals[i], metricsOut); + String evalLine = booster.evalSet(evalMats, evalNames, evals[i], metricsOut); evalInfo = evalInfo + " " + evalLine; } logLines.add(evalInfo); @@ -223,19 +240,19 @@ public static BoosterResults trainWithResults( } if (!decreasing) { - Rabit.trackerPrint(String.format( - "early stopping after %d decreasing rounds", earlyStoppingRound)); + Rabit.trackerPrint( + String.format("early stopping after %d decreasing rounds", earlyStoppingRound)); break; - } - + } + if (Rabit.getRank() == 0) { Rabit.trackerPrint(evalInfo + '\n'); } } booster.saveRabitCheckpoint(); } - BoosterResults results = new BoosterResults(booster, - logLines.toArray(new String[logLines.size()])); + BoosterResults results = + new BoosterResults(booster, logLines.toArray(new String[logLines.size()])); return results; } @@ -244,11 +261,11 @@ public static BoosterResults trainWithResults( * * @param dtrain Data to be trained. * @param params Booster params. - * @param round Number of boosting iterations. + * @param round Number of boosting iterations. * @param watches a group of items to be evaluated during training, this allows user to watch - * performance on the validation set. - * @param obj customized objective (set to null if not used) - * @param eval customized evaluation (set to null if not used) + * performance on the validation set. + * @param obj customized objective (set to null if not used) + * @param eval customized evaluation (set to null if not used) * @return trained booster * @throws XGBoostError native error */ @@ -258,7 +275,8 @@ public static Booster train( int round, Map watches, IObjective obj, - IEvaluation[] evals) throws XGBoostError { + IEvaluation[] evals) + throws XGBoostError { BoosterResults results = trainWithResults(dtrain, params, round, watches, obj, evals); return results.getBooster(); @@ -267,13 +285,13 @@ public static Booster train( /** * Cross-validation with given parameters. * - * @param data Data to be trained. - * @param params Booster params. - * @param round Number of boosting iterations. - * @param nfold Number of folds in CV. + * @param data Data to be trained. + * @param params Booster params. + * @param round Number of boosting iterations. + * @param nfold Number of folds in CV. * @param metrics Evaluation metrics to be watched in CV. - * @param obj customized objective (set to null if not used) - * @param eval customized evaluation (set to null if not used) + * @param obj customized objective (set to null if not used) + * @param eval customized evaluation (set to null if not used) * @return evaluation history * @throws XGBoostError native error */ @@ -284,7 +302,8 @@ public static String[] crossValidation( int nfold, String[] metrics, IObjective obj, - IEvaluation eval) throws XGBoostError { + IEvaluation eval) + throws XGBoostError { CVPack[] cvPacks = makeNFold(data, nfold, params, metrics); String[] evalHist = new String[round]; String[] results = new String[cvPacks.length]; @@ -314,15 +333,16 @@ public static String[] crossValidation( /** * make an n-fold array of CVPack from random indices * - * @param data original data - * @param nfold num of folds - * @param params booster parameters + * @param data original data + * @param nfold num of folds + * @param params booster parameters * @param evalMetrics Evaluation metrics * @return CV package array * @throws XGBoostError native error */ - private static CVPack[] makeNFold(DMatrix data, int nfold, Map params, - String[] evalMetrics) throws XGBoostError { + private static CVPack[] makeNFold( + DMatrix data, int nfold, Map params, String[] evalMetrics) + throws XGBoostError { List samples = genRandPermutationNums(0, (int) data.rowNum()); int step = samples.size() / nfold; int[] testSlice = new int[step]; @@ -350,7 +370,7 @@ private static CVPack[] makeNFold(DMatrix data, int nfold, Map p DMatrix dtrain = data.slice(trainSlice); DMatrix dtest = data.slice(testSlice); CVPack cvPack = new CVPack(dtrain, dtest, params); - //set eval types + // set eval types if (evalMetrics != null) { for (String type : evalMetrics) { cvPack.booster.setParam("eval_metric", type); @@ -421,15 +441,14 @@ private static class CVPack { * create an cross validation package * * @param dtrain train data - * @param dtest test data + * @param dtest test data * @param params parameters * @throws XGBoostError native error */ - public CVPack(DMatrix dtrain, DMatrix dtest, Map params) - throws XGBoostError { - dmats = new DMatrix[]{dtrain, dtest}; + public CVPack(DMatrix dtrain, DMatrix dtest, Map params) throws XGBoostError { + dmats = new DMatrix[] {dtrain, dtest}; booster = new Booster(params, dmats); - names = new String[]{"train", "test"}; + names = new String[] {"train", "test"}; this.dtrain = dtrain; this.dtest = dtest; } @@ -447,7 +466,7 @@ public void update(int iter) throws XGBoostError { /** * update one iteration * - * @param obj customized objective + * @param obj customized objective * @throws XGBoostError native error */ public void update(IObjective obj) throws XGBoostError { diff --git a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala index afcaf2a67f8a..96bf15e0e4d3 100644 --- a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala +++ b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala @@ -18,7 +18,13 @@ package ml.dmlc.xgboost4j.scala import java.io.InputStream -import ml.dmlc.xgboost4j.java.{Booster => JBooster, XGBoost => JXGBoost, XGBoostError, BoosterResults, IEvaluation} +import ml.dmlc.xgboost4j.java.{ + Booster => JBooster, + XGBoost => JXGBoost, + XGBoostError, + BoosterResults, + IEvaluation +} import scala.collection.JavaConverters._ /** @@ -35,10 +41,12 @@ object XGBoost { obj: ObjectiveTrait = null, evals: Array[IEvaluation] = null, earlyStoppingRound: Int = 0, - booster: Booster = null): BoosterResults = { - + booster: Booster = null + ): BoosterResults = { - val jWatches = watches.map{case (name, matrix) => (name, matrix.jDMatrix)} + val jWatches = watches.map { + case (name, matrix) => (name, matrix.jDMatrix) + } val jBooster = if (booster == null) { null } else { @@ -47,8 +55,18 @@ object XGBoost { val xgboostResults = JXGBoost.trainWithResults( dtrain.jDMatrix, // we have to filter null value for customized obj and eval - params.filter(_._2 != null).mapValues(_.toString.asInstanceOf[AnyRef]).asJava, - round, jWatches, metrics, obj, eval, earlyStoppingRound, jBooster) + params + .filter(_._2 != null) + .mapValues(_.toString.asInstanceOf[AnyRef]) + .asJava, + round, + jWatches, + metrics, + obj, + eval, + earlyStoppingRound, + jBooster + ) xgboostResults } @@ -80,9 +98,19 @@ object XGBoost { obj: ObjectiveTrait = null, evals: Array[IEvaluation] = null, earlyStoppingRound: Int = 0, - booster: Booster = null): Booster = { - - val xgboostResults = trainWithResults(dtrain, params, round, watches, obj, evals, earlyStoppingRound, booster) + booster: Booster = null + ): Booster = { + + val xgboostResults = trainWithResults( + dtrain, + params, + round, + watches, + obj, + evals, + earlyStoppingRound, + booster + ) if (booster == null) { new Booster(xgboostResults.getBooster()) } else { @@ -91,7 +119,7 @@ object XGBoost { } } - /** + /** * Train a booster given parameters. * * @param dtrain Data to be trained. @@ -117,10 +145,20 @@ object XGBoost { watches: Map[String, DMatrix] = Map(), metrics: Array[Array[Float]] = null, obj: ObjectiveTrait = null, - eval: IEvaluation = null + eval: IEvaluation = null, earlyStoppingRound: Int = 0, - booster: Booster = null): Booster = { - val xgboostResults = trainWithResults(dtrain, params, round, watches, obj, Array(eval), earlyStoppingRound, booster) + booster: Booster = null + ): Booster = { + val xgboostResults = trainWithResults( + dtrain, + params, + round, + watches, + obj, + Array(eval), + earlyStoppingRound, + booster + ) if (booster == null) { new Booster(xgboostResults.getBooster()) } else { @@ -149,11 +187,20 @@ object XGBoost { nfold: Int = 5, metrics: Array[String] = null, obj: ObjectiveTrait = null, - eval: EvalTrait = null): Array[String] = { + eval: EvalTrait = null + ): Array[String] = { JXGBoost.crossValidation( - data.jDMatrix, params.map{ case (key: String, value) => (key, value.toString)}. - toMap[String, AnyRef].asJava, - round, nfold, metrics, obj, eval) + data.jDMatrix, + params + .map { case (key: String, value) => (key, value.toString) } + .toMap[String, AnyRef] + .asJava, + round, + nfold, + metrics, + obj, + eval + ) } /** From 51071c2057fa89abbbc5dc78bab37a6bb28fb7bf Mon Sep 17 00:00:00 2001 From: Helw150 Date: Wed, 1 Aug 2018 17:44:48 -0400 Subject: [PATCH 06/14] CI Fixes --- .../ml/dmlc/xgboost4j/scala/XGBoost.scala | 67 ++++--------------- 1 file changed, 13 insertions(+), 54 deletions(-) diff --git a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala index 96bf15e0e4d3..2ff8fddaa296 100644 --- a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala +++ b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala @@ -38,15 +38,14 @@ object XGBoost { params: Map[String, Any], round: Int, watches: Map[String, DMatrix] = Map[String, DMatrix](), + metrics: Array[Array[Float]] = null, obj: ObjectiveTrait = null, evals: Array[IEvaluation] = null, earlyStoppingRound: Int = 0, booster: Booster = null ): BoosterResults = { - val jWatches = watches.map { - case (name, matrix) => (name, matrix.jDMatrix) - } + val jWatches = watches.mapValues(_.jDMatrix).asJava val jBooster = if (booster == null) { null } else { @@ -63,7 +62,7 @@ object XGBoost { jWatches, metrics, obj, - eval, + evals, earlyStoppingRound, jBooster ) @@ -84,7 +83,7 @@ object XGBoost { * after a specified number of consecutive * increases in any evaluation metric. * @param obj customized objective - * @param evals customized evaluation + * @param eval customized evaluation or evaluations * @param booster train from scratch if set to null; train from an existing booster if not null. * @return The trained booster. */ @@ -96,66 +95,26 @@ object XGBoost { watches: Map[String, DMatrix] = Map(), metrics: Array[Array[Float]] = null, obj: ObjectiveTrait = null, - evals: Array[IEvaluation] = null, + eval: Either[IEvaluation, Array[IEvaluation]] = null, earlyStoppingRound: Int = 0, booster: Booster = null ): Booster = { - - val xgboostResults = trainWithResults( - dtrain, - params, - round, - watches, - obj, - evals, - earlyStoppingRound, - booster - ) - if (booster == null) { - new Booster(xgboostResults.getBooster()) - } else { - // Avoid creating a new SBooster with the same JBooster - booster + val evals: Array[IEvaluation] = { + eval match { + case single: IEvaluation => Array(eval) + case multiple: Array[IEvaluation] => eval + case _ => null + } } - } - /** - * Train a booster given parameters. - * - * @param dtrain Data to be trained. - * @param params Parameters. - * @param round Number of boosting iterations. - * @param watches a group of items to be evaluated during training, this allows user to watch - * performance on the validation set. - * @param metrics array containing the evaluation metrics for each matrix in watches for each - * iteration - * @param earlyStoppingRound if non-zero, training would be stopped - * after a specified number of consecutive - * increases in any evaluation metric. - * @param obj customized objective - * @param eval customized evaluation - * @param booster train from scratch if set to null; train from an existing booster if not null. - * @return The trained booster. - */ - @throws(classOf[XGBoostError]) - def train( - dtrain: DMatrix, - params: Map[String, Any], - round: Int, - watches: Map[String, DMatrix] = Map(), - metrics: Array[Array[Float]] = null, - obj: ObjectiveTrait = null, - eval: IEvaluation = null, - earlyStoppingRound: Int = 0, - booster: Booster = null - ): Booster = { val xgboostResults = trainWithResults( dtrain, params, round, watches, + metrics, obj, - Array(eval), + evals, earlyStoppingRound, booster ) From 3a722036dd48c45c4d2bcca409623c5ed6dbcb97 Mon Sep 17 00:00:00 2001 From: Helw150 Date: Wed, 1 Aug 2018 18:34:50 -0400 Subject: [PATCH 07/14] Unformat Java --- .../java/ml/dmlc/xgboost4j/java/XGBoost.java | 211 ++++++++---------- 1 file changed, 97 insertions(+), 114 deletions(-) diff --git a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/XGBoost.java b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/XGBoost.java index f2196cb349e0..7da9e2b4b416 100644 --- a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/XGBoost.java +++ b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/XGBoost.java @@ -36,123 +36,109 @@ public class XGBoost { * @param modelPath booster modelPath (model generated by booster.saveModel) * @throws XGBoostError native error */ - public static Booster loadModel(String modelPath) throws XGBoostError { + public static Booster loadModel(String modelPath) + throws XGBoostError { return Booster.loadModel(modelPath); } /** - * Load a new Booster model from a file opened as input stream. The assumption is the input stream - * only contains one XGBoost Model. This can be used to load existing booster models saved by - * other xgboost bindings. + * Load a new Booster model from a file opened as input stream. + * The assumption is the input stream only contains one XGBoost Model. + * This can be used to load existing booster models saved by other xgboost bindings. * - * @param in The input stream of the file, will be closed after this function call. + * @param in The input stream of the file, + * will be closed after this function call. * @return The create boosted * @throws XGBoostError * @throws IOException */ - public static Booster loadModel(InputStream in) throws XGBoostError, IOException { + public static Booster loadModel(InputStream in) + throws XGBoostError, IOException { return Booster.loadModel(in); } /** * Train a booster given parameters. * - * @param dtrain Data to be trained. - * @param params Parameters. - * @param round Number of boosting iterations. + * @param dtrain Data to be trained. + * @param params Parameters. + * @param round Number of boosting iterations. * @param watches a group of items to be evaluated during training, this allows user to watch - * performance on the validation set. - * @param obj customized objective - * @param eval customized evaluation + * performance on the validation set. + * @param obj customized objective + * @param eval customized evaluation * @return The trained booster. */ public static Booster train( - DMatrix dtrain, - Map params, - int round, - Map watches, - IObjective obj, - IEvaluation eval) - throws XGBoostError { - return train(dtrain, params, round, watches, null, obj, new IEvaluation[] {eval}, 0); + DMatrix dtrain, + Map params, + int round, + Map watches, + IObjective obj, + IEvaluation eval) throws XGBoostError { + return train(dtrain, params, round, watches, null, obj, new IEvaluation[]{eval}, 0); } /** * Train a booster given parameters. * - * @param dtrain Data to be trained. - * @param params Parameters. - * @param round Number of boosting iterations. + * @param dtrain Data to be trained. + * @param params Parameters. + * @param round Number of boosting iterations. * @param watches a group of items to be evaluated during training, this allows user to watch - * performance on the validation set. + * performance on the validation set. * @param metrics array containing the evaluation metrics for each matrix in watches for each - * iteration - * @param earlyStoppingRound if non-zero, training would be stopped after a specified number of - * consecutive increases in any evaluation metric. - * @param obj customized objective - * @param eval customized evaluation + * iteration + * @param earlyStoppingRound if non-zero, training would be stopped + * after a specified number of consecutive + * increases in any evaluation metric. + * @param obj customized objective + * @param eval customized evaluation * @return The trained booster. */ public static Booster train( - DMatrix dtrain, - Map params, - int round, - Map watches, - float[][] metrics, - IObjective obj, - IEvaluation eval, - int earlyStoppingRound) - throws XGBoostError { - return train( - dtrain, - params, - round, - watches, - metrics, - obj, - new IEvaluation[] {eval}, - earlyStoppingRound, - null); + DMatrix dtrain, + Map params, + int round, + Map watches, + float[][] metrics, + IObjective obj, + IEvaluation eval, + int earlyStoppingRound) throws XGBoostError { + return train(dtrain, params, round, watches, metrics, obj, + new IEvaluation[]{eval}, earlyStoppingRound, null); } /** * Train a booster given parameters. * - * @param dtrain Data to be trained. - * @param params Parameters. - * @param round Number of boosting iterations. + * @param dtrain Data to be trained. + * @param params Parameters. + * @param round Number of boosting iterations. * @param watches a group of items to be evaluated during training, this allows user to watch - * performance on the validation set. + * performance on the validation set. * @param metrics array containing the evaluation metrics for each matrix in watches for each - * iteration - * @param earlyStoppingRound if non-zero, training would be stopped after a specified number of - * consecutive increases in any evaluation metric. - * @param obj customized objective - * @param eval customized evaluation + * iteration + * @param earlyStoppingRound if non-zero, training would be stopped + * after a specified number of consecutive + * increases in any evaluation metric. + * @param obj customized objective + * @param eval customized evaluation * @param booster train from scratch if set to null; train from an existing booster if not null. * @return The trained booster. */ public static Booster train( - DMatrix dtrain, - Map params, - int round, - Map watches, - float[][] metrics, - IObjective obj, - IEvaluation eval, - int earlyStoppingRound, - Booster booster) - throws XGBoostError { - return train( - dtrain, - params, - round, - watches, - metrics, - obj, - new IEvaluation[] {eval}, - earlyStoppingRound, - booster); + DMatrix dtrain, + Map params, + int round, + Map watches, + float[][] metrics, + IObjective obj, + IEvaluation eval, + int earlyStoppingRound, + Booster booster) throws XGBoostError { + return train(dtrain, params, round, watches, metrics, obj, + new IEvaluation[]{eval}, earlyStoppingRound, booster); } public static BoosterResults trainWithResults( @@ -164,10 +150,9 @@ public static BoosterResults trainWithResults( IObjective obj, IEvaluation[] evals, int earlyStoppingRound, - Booster booster) - throws XGBoostError { + Booster booster) throws XGBoostError { - // collect eval matrixs + //collect eval matrixs String[] evalNames; DMatrix[] evalMats; List names = new ArrayList(); @@ -183,7 +168,7 @@ public static BoosterResults trainWithResults( evalMats = mats.toArray(new DMatrix[mats.size()]); metrics = metrics == null ? new float[evalNames.length][round] : metrics; - // collect all data matrixs + //collect all data matrixs DMatrix[] allMats; if (evalMats.length > 0) { allMats = new DMatrix[evalMats.length + 1]; @@ -194,7 +179,7 @@ public static BoosterResults trainWithResults( allMats[0] = dtrain; } - // initialize booster + //initialize booster if (booster == null) { // Start training on a new booster booster = new Booster(params, allMats); @@ -204,7 +189,7 @@ public static BoosterResults trainWithResults( booster.setParams(params); } - // begin to train + //begin to train for (int iter = booster.getVersion() / 2; iter < round; iter++) { if (booster.getVersion() % 2 == 0) { if (obj != null) { @@ -215,7 +200,7 @@ public static BoosterResults trainWithResults( booster.saveRabitCheckpoint(); } - // evaluation + //evaluation if (evalMats.length > 0) { float[] metricsOut = new float[evalMats.length]; String evalInfo = ""; @@ -240,8 +225,8 @@ public static BoosterResults trainWithResults( } if (!decreasing) { - Rabit.trackerPrint( - String.format("early stopping after %d decreasing rounds", earlyStoppingRound)); + Rabit.trackerPrint(String.format( + "early stopping after %d decreasing rounds", earlyStoppingRound)); break; } @@ -251,8 +236,8 @@ public static BoosterResults trainWithResults( } booster.saveRabitCheckpoint(); } - BoosterResults results = - new BoosterResults(booster, logLines.toArray(new String[logLines.size()])); + BoosterResults results = new BoosterResults(booster, + logLines.toArray(new String[logLines.size()])); return results; } @@ -261,11 +246,11 @@ public static BoosterResults trainWithResults( * * @param dtrain Data to be trained. * @param params Booster params. - * @param round Number of boosting iterations. + * @param round Number of boosting iterations. * @param watches a group of items to be evaluated during training, this allows user to watch - * performance on the validation set. - * @param obj customized objective (set to null if not used) - * @param eval customized evaluation (set to null if not used) + * performance on the validation set. + * @param obj customized objective (set to null if not used) + * @param eval customized evaluation (set to null if not used) * @return trained booster * @throws XGBoostError native error */ @@ -275,8 +260,7 @@ public static Booster train( int round, Map watches, IObjective obj, - IEvaluation[] evals) - throws XGBoostError { + IEvaluation[] evals) throws XGBoostError { BoosterResults results = trainWithResults(dtrain, params, round, watches, obj, evals); return results.getBooster(); @@ -285,13 +269,13 @@ public static Booster train( /** * Cross-validation with given parameters. * - * @param data Data to be trained. - * @param params Booster params. - * @param round Number of boosting iterations. - * @param nfold Number of folds in CV. + * @param data Data to be trained. + * @param params Booster params. + * @param round Number of boosting iterations. + * @param nfold Number of folds in CV. * @param metrics Evaluation metrics to be watched in CV. - * @param obj customized objective (set to null if not used) - * @param eval customized evaluation (set to null if not used) + * @param obj customized objective (set to null if not used) + * @param eval customized evaluation (set to null if not used) * @return evaluation history * @throws XGBoostError native error */ @@ -302,8 +286,7 @@ public static String[] crossValidation( int nfold, String[] metrics, IObjective obj, - IEvaluation eval) - throws XGBoostError { + IEvaluation eval) throws XGBoostError { CVPack[] cvPacks = makeNFold(data, nfold, params, metrics); String[] evalHist = new String[round]; String[] results = new String[cvPacks.length]; @@ -333,16 +316,15 @@ public static String[] crossValidation( /** * make an n-fold array of CVPack from random indices * - * @param data original data - * @param nfold num of folds - * @param params booster parameters + * @param data original data + * @param nfold num of folds + * @param params booster parameters * @param evalMetrics Evaluation metrics * @return CV package array * @throws XGBoostError native error */ - private static CVPack[] makeNFold( - DMatrix data, int nfold, Map params, String[] evalMetrics) - throws XGBoostError { + private static CVPack[] makeNFold(DMatrix data, int nfold, Map params, + String[] evalMetrics) throws XGBoostError { List samples = genRandPermutationNums(0, (int) data.rowNum()); int step = samples.size() / nfold; int[] testSlice = new int[step]; @@ -370,7 +352,7 @@ private static CVPack[] makeNFold( DMatrix dtrain = data.slice(trainSlice); DMatrix dtest = data.slice(testSlice); CVPack cvPack = new CVPack(dtrain, dtest, params); - // set eval types + //set eval types if (evalMetrics != null) { for (String type : evalMetrics) { cvPack.booster.setParam("eval_metric", type); @@ -441,14 +423,15 @@ private static class CVPack { * create an cross validation package * * @param dtrain train data - * @param dtest test data + * @param dtest test data * @param params parameters * @throws XGBoostError native error */ - public CVPack(DMatrix dtrain, DMatrix dtest, Map params) throws XGBoostError { - dmats = new DMatrix[] {dtrain, dtest}; + public CVPack(DMatrix dtrain, DMatrix dtest, Map params) + throws XGBoostError { + dmats = new DMatrix[]{dtrain, dtest}; booster = new Booster(params, dmats); - names = new String[] {"train", "test"}; + names = new String[]{"train", "test"}; this.dtrain = dtrain; this.dtest = dtest; } @@ -466,7 +449,7 @@ public void update(int iter) throws XGBoostError { /** * update one iteration * - * @param obj customized objective + * @param obj customized objective * @throws XGBoostError native error */ public void update(IObjective obj) throws XGBoostError { From bc2620b176382e7673ef208b349d5478f4138d62 Mon Sep 17 00:00:00 2001 From: Helw150 Date: Wed, 8 Aug 2018 11:09:32 -0400 Subject: [PATCH 08/14] Many CI Fixes --- .../java/ml/dmlc/xgboost4j/java/XGBoost.java | 28 ++++++--- .../ml/dmlc/xgboost4j/scala/XGBoost.scala | 61 +++++++++++++++++-- 2 files changed, 75 insertions(+), 14 deletions(-) diff --git a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/XGBoost.java b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/XGBoost.java index 7da9e2b4b416..f7f9c3eaee34 100644 --- a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/XGBoost.java +++ b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/XGBoost.java @@ -76,7 +76,7 @@ public static Booster train( Map watches, IObjective obj, IEvaluation eval) throws XGBoostError { - return train(dtrain, params, round, watches, null, obj, new IEvaluation[]{eval}, 0); + return train(dtrain, params, round, watches, null, obj, eval, 0); } /** @@ -106,7 +106,7 @@ public static Booster train( IEvaluation eval, int earlyStoppingRound) throws XGBoostError { return train(dtrain, params, round, watches, metrics, obj, - new IEvaluation[]{eval}, earlyStoppingRound, null); + eval, earlyStoppingRound, null); } /** @@ -137,8 +137,13 @@ public static Booster train( IEvaluation eval, int earlyStoppingRound, Booster booster) throws XGBoostError { - return train(dtrain, params, round, watches, metrics, obj, - new IEvaluation[]{eval}, earlyStoppingRound, booster); + if(eval != null) { + return trainWithMultipleEvals(dtrain, params, round, watches, metrics, obj, + new IEvaluation[]{eval}, earlyStoppingRound, booster); + } else { + return trainWithMultipleEvals(dtrain, params, round, watches, metrics, obj, + null, earlyStoppingRound, booster); + } } public static BoosterResults trainWithResults( @@ -204,7 +209,7 @@ public static BoosterResults trainWithResults( if (evalMats.length > 0) { float[] metricsOut = new float[evalMats.length]; String evalInfo = ""; - if (evals != null && evals.length > 1) { + if (evals != null && evals.length > 0) { for (int i = 0; i < evals.length; i++) { String evalLine = booster.evalSet(evalMats, evalNames, evals[i], metricsOut); evalInfo = evalInfo + " " + evalLine; @@ -251,18 +256,25 @@ public static BoosterResults trainWithResults( * performance on the validation set. * @param obj customized objective (set to null if not used) * @param eval customized evaluation (set to null if not used) + * @param earlyStoppingRound if non-zero, training would be stopped + * after a specified number of consecutive + * increases in any evaluation metric. * @return trained booster * @throws XGBoostError native error */ - public static Booster train( + public static Booster trainWithMultipleEvals( DMatrix dtrain, Map params, int round, Map watches, + float[][] metrics, IObjective obj, - IEvaluation[] evals) throws XGBoostError { + IEvaluation[] evals, + int earlyStoppingRound, + Booster booster) throws XGBoostError { - BoosterResults results = trainWithResults(dtrain, params, round, watches, obj, evals); + BoosterResults results = trainWithResults(dtrain, params, round, watches, metrics, + obj, evals, earlyStoppingRound, booster); return results.getBooster(); } diff --git a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala index 2ff8fddaa296..191ffed6258f 100644 --- a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala +++ b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala @@ -83,7 +83,7 @@ object XGBoost { * after a specified number of consecutive * increases in any evaluation metric. * @param obj customized objective - * @param eval customized evaluation or evaluations + * @param eval customized evaluation * @param booster train from scratch if set to null; train from an existing booster if not null. * @return The trained booster. */ @@ -95,15 +95,15 @@ object XGBoost { watches: Map[String, DMatrix] = Map(), metrics: Array[Array[Float]] = null, obj: ObjectiveTrait = null, - eval: Either[IEvaluation, Array[IEvaluation]] = null, + eval: IEvaluation = null, earlyStoppingRound: Int = 0, booster: Booster = null ): Booster = { val evals: Array[IEvaluation] = { - eval match { - case single: IEvaluation => Array(eval) - case multiple: Array[IEvaluation] => eval - case _ => null + if (eval != null) { + Array(eval) + } else { + null } } @@ -126,6 +126,55 @@ object XGBoost { } } + /** + * Train a booster given parameters. + * + * @param dtrain Data to be trained. + * @param params Parameters. + * @param round Number of boosting iterations. + * @param watches a group of items to be evaluated during training, this allows user to watch + * performance on the validation set. + * @param metrics array containing the evaluation metrics for each matrix in watches for each + * iteration + * @param earlyStoppingRound if non-zero, training would be stopped + * after a specified number of consecutive + * increases in any evaluation metric. + * @param obj customized objective + * @param evals customized evaluations + * @param booster train from scratch if set to null; train from an existing booster if not null. + * @return The trained booster. + */ + @throws(classOf[XGBoostError]) + def trainWithMultipleEvals( + dtrain: DMatrix, + params: Map[String, Any], + round: Int, + watches: Map[String, DMatrix] = Map[String, DMatrix](), + metrics: Array[Array[Float]] = null, + obj: ObjectiveTrait = null, + evals: Array[IEvaluation] = null, + earlyStoppingRound: Int = 0, + booster: Booster = null + ): Booster = { + val xgboostResults = trainWithResults( + dtrain, + params, + round, + watches, + metrics, + obj, + evals, + earlyStoppingRound, + booster + ) + if (booster == null) { + new Booster(xgboostResults.getBooster()) + } else { + // Avoid creating a new SBooster with the same JBooster + booster + } + } + /** * Cross-validation with given parameters. * From 484d6db0423606a22b36746b97b82086b4b3707c Mon Sep 17 00:00:00 2001 From: Helw150 Date: Thu, 27 Sep 2018 16:11:32 +0400 Subject: [PATCH 09/14] Java Comment Changes --- .../src/main/java/ml/dmlc/xgboost4j/java/XGBoost.java | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/XGBoost.java b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/XGBoost.java index f7f9c3eaee34..ff95dd6959b7 100644 --- a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/XGBoost.java +++ b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/XGBoost.java @@ -137,7 +137,7 @@ public static Booster train( IEvaluation eval, int earlyStoppingRound, Booster booster) throws XGBoostError { - if(eval != null) { + if (eval != null) { return trainWithMultipleEvals(dtrain, params, round, watches, metrics, obj, new IEvaluation[]{eval}, earlyStoppingRound, booster); } else { @@ -164,6 +164,13 @@ public static BoosterResults trainWithResults( List mats = new ArrayList(); List logLines = new ArrayList(); + if (evals != null && evals.length == 0) { + throw new XGBoostError("Evaluation function array is empty, but not null."); + } else if (earlyStoppingRound != 0 && evals != null && evals.length > 1) { + Rabit.trackerPrint("Multiple evaluation functions provided, disabling early stopping."); + earlyStoppingRound = 0; + } + for (Map.Entry evalEntry : watches.entrySet()) { names.add(evalEntry.getKey()); mats.add(evalEntry.getValue()); @@ -209,7 +216,7 @@ public static BoosterResults trainWithResults( if (evalMats.length > 0) { float[] metricsOut = new float[evalMats.length]; String evalInfo = ""; - if (evals != null && evals.length > 0) { + if (evals != null) { for (int i = 0; i < evals.length; i++) { String evalLine = booster.evalSet(evalMats, evalNames, evals[i], metricsOut); evalInfo = evalInfo + " " + evalLine; From 3eaf4404d7d9fd4b5b3a8d0a7a0cbfc787838c01 Mon Sep 17 00:00:00 2001 From: Helw150 Date: Tue, 2 Oct 2018 14:51:08 +0400 Subject: [PATCH 10/14] Scala Comment Changes --- .../ml/dmlc/xgboost4j/scala/XGBoost.scala | 58 ++++--------------- 1 file changed, 11 insertions(+), 47 deletions(-) diff --git a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala index 191ffed6258f..f3e8f784527f 100644 --- a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala +++ b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala @@ -18,13 +18,7 @@ package ml.dmlc.xgboost4j.scala import java.io.InputStream -import ml.dmlc.xgboost4j.java.{ - Booster => JBooster, - XGBoost => JXGBoost, - XGBoostError, - BoosterResults, - IEvaluation -} +import ml.dmlc.xgboost4j.java.{Booster => JBooster, XGBoost => JXGBoost, XGBoostError, BoosterResults, IEvaluation} import scala.collection.JavaConverters._ /** @@ -42,8 +36,7 @@ object XGBoost { obj: ObjectiveTrait = null, evals: Array[IEvaluation] = null, earlyStoppingRound: Int = 0, - booster: Booster = null - ): BoosterResults = { + booster: Booster = null): BoosterResults = { val jWatches = watches.mapValues(_.jDMatrix).asJava val jBooster = if (booster == null) { @@ -51,21 +44,14 @@ object XGBoost { } else { booster.booster } - val xgboostResults = JXGBoost.trainWithResults( - dtrain.jDMatrix, - // we have to filter null value for customized obj and eval - params + + // we have to filter null value for customized obj and eval + val jFilteredParams = params .filter(_._2 != null) .mapValues(_.toString.asInstanceOf[AnyRef]) .asJava, - round, - jWatches, - metrics, - obj, - evals, - earlyStoppingRound, - jBooster - ) + + val xgboostResults = JXGBoost.trainWithResults(dtrain.jDMatrix, jFilteredParams, round, jWatches, metrics, obj, evals, earlyStoppingRound, jBooster) xgboostResults } @@ -107,17 +93,7 @@ object XGBoost { } } - val xgboostResults = trainWithResults( - dtrain, - params, - round, - watches, - metrics, - obj, - evals, - earlyStoppingRound, - booster - ) + val xgboostResults = trainWithResults(dtrain, params, round, watches, metrics, obj, evals, earlyStoppingRound, booster) if (booster == null) { new Booster(xgboostResults.getBooster()) } else { @@ -154,19 +130,8 @@ object XGBoost { obj: ObjectiveTrait = null, evals: Array[IEvaluation] = null, earlyStoppingRound: Int = 0, - booster: Booster = null - ): Booster = { - val xgboostResults = trainWithResults( - dtrain, - params, - round, - watches, - metrics, - obj, - evals, - earlyStoppingRound, - booster - ) + booster: Booster = null): Booster = { + val xgboostResults = trainWithResults(dtrain, params, round, watches, metrics, obj, evals, earlyStoppingRound, booster) if (booster == null) { new Booster(xgboostResults.getBooster()) } else { @@ -195,8 +160,7 @@ object XGBoost { nfold: Int = 5, metrics: Array[String] = null, obj: ObjectiveTrait = null, - eval: EvalTrait = null - ): Array[String] = { + eval: EvalTrait = null): Array[String] = { JXGBoost.crossValidation( data.jDMatrix, params From d67e8cf7e6c82b893322ce40a6fc201fee492341 Mon Sep 17 00:00:00 2001 From: Helw150 Date: Tue, 2 Oct 2018 15:08:33 +0400 Subject: [PATCH 11/14] IEvaluation to EvalTrait --- .../main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala index f3e8f784527f..8df1533a09b1 100644 --- a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala +++ b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala @@ -18,7 +18,7 @@ package ml.dmlc.xgboost4j.scala import java.io.InputStream -import ml.dmlc.xgboost4j.java.{Booster => JBooster, XGBoost => JXGBoost, XGBoostError, BoosterResults, IEvaluation} +import ml.dmlc.xgboost4j.java.{Booster => JBooster, XGBoost => JXGBoost, XGBoostError, BoosterResults} import scala.collection.JavaConverters._ /** @@ -34,7 +34,7 @@ object XGBoost { watches: Map[String, DMatrix] = Map[String, DMatrix](), metrics: Array[Array[Float]] = null, obj: ObjectiveTrait = null, - evals: Array[IEvaluation] = null, + evals: Array[EvalTrait] = null, earlyStoppingRound: Int = 0, booster: Booster = null): BoosterResults = { @@ -81,11 +81,11 @@ object XGBoost { watches: Map[String, DMatrix] = Map(), metrics: Array[Array[Float]] = null, obj: ObjectiveTrait = null, - eval: IEvaluation = null, + eval: EvalTrait = null, earlyStoppingRound: Int = 0, booster: Booster = null ): Booster = { - val evals: Array[IEvaluation] = { + val evals: Array[EvalTrait] = { if (eval != null) { Array(eval) } else { @@ -128,7 +128,7 @@ object XGBoost { watches: Map[String, DMatrix] = Map[String, DMatrix](), metrics: Array[Array[Float]] = null, obj: ObjectiveTrait = null, - evals: Array[IEvaluation] = null, + evals: Array[EvalTrait] = null, earlyStoppingRound: Int = 0, booster: Booster = null): Booster = { val xgboostResults = trainWithResults(dtrain, params, round, watches, metrics, obj, evals, earlyStoppingRound, booster) From 2c07ec014b6b24d7e8215bb74b257bc598ca5d15 Mon Sep 17 00:00:00 2001 From: Helw150 Date: Tue, 2 Oct 2018 15:27:29 +0400 Subject: [PATCH 12/14] Residual Comma --- .../src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala index 8df1533a09b1..cc6573b068e2 100644 --- a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala +++ b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala @@ -49,7 +49,7 @@ object XGBoost { val jFilteredParams = params .filter(_._2 != null) .mapValues(_.toString.asInstanceOf[AnyRef]) - .asJava, + .asJava val xgboostResults = JXGBoost.trainWithResults(dtrain.jDMatrix, jFilteredParams, round, jWatches, metrics, obj, evals, earlyStoppingRound, jBooster) xgboostResults From 5daced54c07cf9e4d9f0b832acbaaf5ef303ea64 Mon Sep 17 00:00:00 2001 From: Helw150 Date: Tue, 2 Oct 2018 21:02:53 +0400 Subject: [PATCH 13/14] Line length fixes --- .../ml/dmlc/xgboost4j/scala/XGBoost.scala | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala index cc6573b068e2..fac380801f45 100644 --- a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala +++ b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala @@ -51,7 +51,17 @@ object XGBoost { .mapValues(_.toString.asInstanceOf[AnyRef]) .asJava - val xgboostResults = JXGBoost.trainWithResults(dtrain.jDMatrix, jFilteredParams, round, jWatches, metrics, obj, evals, earlyStoppingRound, jBooster) + val xgboostResults = JXGBoost.trainWithResults( + dtrain.jDMatrix, + jFilteredParams, + round, + jWatches, + metrics, + obj, + evals, + earlyStoppingRound, + jBooster) + xgboostResults } @@ -93,7 +103,8 @@ object XGBoost { } } - val xgboostResults = trainWithResults(dtrain, params, round, watches, metrics, obj, evals, earlyStoppingRound, booster) + val xgboostResults = trainWithResults(dtrain, params, round, watches, metrics, + obj, evals, earlyStoppingRound, booster) if (booster == null) { new Booster(xgboostResults.getBooster()) } else { @@ -131,7 +142,8 @@ object XGBoost { evals: Array[EvalTrait] = null, earlyStoppingRound: Int = 0, booster: Booster = null): Booster = { - val xgboostResults = trainWithResults(dtrain, params, round, watches, metrics, obj, evals, earlyStoppingRound, booster) + val xgboostResults = trainWithResults(dtrain, params, round, watches, metrics, + obj, evals, earlyStoppingRound, booster) if (booster == null) { new Booster(xgboostResults.getBooster()) } else { From 4762c3f7c257654b5135b1ac14f8a46afde3a11b Mon Sep 17 00:00:00 2001 From: Helw150 Date: Tue, 2 Oct 2018 21:35:37 +0400 Subject: [PATCH 14/14] Inheritence fix --- .../src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala index fac380801f45..fed449f3cd18 100644 --- a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala +++ b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala @@ -58,7 +58,7 @@ object XGBoost { jWatches, metrics, obj, - evals, + evals.toArray, earlyStoppingRound, jBooster)