Skip to content

Commit

Permalink
[BLOCKING] [jvm-packages] add gpu_hist and enable gpu scheduling (#5171)
Browse files Browse the repository at this point in the history
* [jvm-packages] add gpu_hist tree method

* change updater hist to grow_quantile_histmaker

* add gpu scheduling

* pass correct parameters to xgboost library

* remove debug info

* add use.cuda for pom

* add CI for gpu_hist for jvm

* add gpu unit tests

* use gpu node to build jvm

* use nvidia-docker

* Add CLI interface to create_jni.py using argparse

Co-authored-by: Hyunsu Cho <chohyu01@cs.washington.edu>
  • Loading branch information
wbo4958 and hcho3 authored Jul 27, 2020
1 parent 6347fa1 commit 8943eb4
Show file tree
Hide file tree
Showing 18 changed files with 542 additions and 122 deletions.
42 changes: 42 additions & 0 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ pipeline {
'build-gpu-cuda10.1': { BuildCUDA(cuda_version: '10.1') },
'build-gpu-cuda10.2': { BuildCUDA(cuda_version: '10.2') },
'build-gpu-cuda11.0': { BuildCUDA(cuda_version: '11.0') },
'build-jvm-packages-gpu-cuda10.0': { BuildJVMPackagesWithCUDA(spark_version: '3.0.0', cuda_version: '10.0') },
'build-jvm-packages': { BuildJVMPackages(spark_version: '3.0.0') },
'build-jvm-doc': { BuildJVMDoc() }
])
Expand All @@ -94,6 +95,7 @@ pipeline {
'test-python-mgpu-cuda10.2': { TestPythonGPU(host_cuda_version: '10.2', multi_gpu: true) },
'test-cpp-gpu-cuda10.2': { TestCppGPU(artifact_cuda_version: '10.2', host_cuda_version: '10.2') },
'test-cpp-gpu-cuda11.0': { TestCppGPU(artifact_cuda_version: '11.0', host_cuda_version: '11.0') },
'test-jvm-jdk8-cuda10.0': { CrossTestJVMwithJDKGPU(artifact_cuda_version: '10.0', host_cuda_version: '10.0') },
'test-jvm-jdk8': { CrossTestJVMwithJDK(jdk_version: '8', spark_version: '3.0.0') },
'test-jvm-jdk11': { CrossTestJVMwithJDK(jdk_version: '11') },
'test-jvm-jdk12': { CrossTestJVMwithJDK(jdk_version: '12') },
Expand Down Expand Up @@ -282,6 +284,28 @@ def BuildCUDA(args) {
}
}

def BuildJVMPackagesWithCUDA(args) {
node('linux && gpu') {
unstash name: 'srcs'
echo "Build XGBoost4J-Spark with Spark ${args.spark_version}, CUDA ${args.cuda_version}"
def container_type = "jvm_gpu_build"
def docker_binary = "nvidia-docker"
def docker_args = "--build-arg CUDA_VERSION=${args.cuda_version}"
def arch_flag = ""
if (env.BRANCH_NAME != 'master' && !(env.BRANCH_NAME.startsWith('release'))) {
arch_flag = "-DGPU_COMPUTE_VER=75"
}
// Use only 4 CPU cores
def docker_extra_params = "CI_DOCKER_EXTRA_PARAMS_INIT='--cpuset-cpus 0-3'"
sh """
${docker_extra_params} ${dockerRun} ${container_type} ${docker_binary} ${docker_args} tests/ci_build/build_jvm_packages.sh ${args.spark_version} -Duse.cuda=ON $arch_flag
"""
echo "Stashing XGBoost4J JAR with CUDA ${args.cuda_version} ..."
stash name: 'xgboost4j_jar_gpu', includes: "jvm-packages/xgboost4j/target/*.jar,jvm-packages/xgboost4j-spark/target/*.jar,jvm-packages/xgboost4j-example/target/*.jar"
deleteDir()
}
}

def BuildJVMPackages(args) {
node('linux && cpu') {
unstash name: 'srcs'
Expand Down Expand Up @@ -386,6 +410,24 @@ def TestCppGPU(args) {
}
}

def CrossTestJVMwithJDKGPU(args) {
def nodeReq = 'linux && mgpu'
node(nodeReq) {
unstash name: "xgboost4j_jar_gpu"
unstash name: 'srcs'
if (args.spark_version != null) {
echo "Test XGBoost4J on a machine with JDK ${args.jdk_version}, Spark ${args.spark_version}, CUDA ${args.host_cuda_version}"
} else {
echo "Test XGBoost4J on a machine with JDK ${args.jdk_version}, CUDA ${args.host_cuda_version}"
}
def container_type = "gpu_jvm"
def docker_binary = "nvidia-docker"
def docker_args = "--build-arg CUDA_VERSION=${args.host_cuda_version}"
sh "${dockerRun} ${container_type} ${docker_binary} ${docker_args} tests/ci_build/test_jvm_gpu_cross.sh"
deleteDir()
}
}

def CrossTestJVMwithJDK(args) {
node('linux && cpu') {
unstash name: 'xgboost4j_jar'
Expand Down
8 changes: 8 additions & 0 deletions doc/jvm/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,14 @@ If you are on Mac OS and using a compiler that supports OpenMP, you need to go t
in order to get the benefit of multi-threading.

Building with GPU support
-------------------------
If you want to build XGBoost4J that supports distributed GPU training, run

.. code-block:: bash
mvn -Duse.cuda=ON install
********
Contents
********
Expand Down
16 changes: 15 additions & 1 deletion jvm-packages/create_jni.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
#!/usr/bin/env python
import errno
import argparse
import glob
import os
import shutil
import subprocess
import sys
from contextlib import contextmanager


# Monkey-patch the API inconsistency between Python2.X and 3.X.
if sys.platform.startswith("linux"):
sys.platform = "linux"
Expand All @@ -20,6 +20,7 @@
"USE_S3": "OFF",

"USE_CUDA": "OFF",
"USE_NCCL": "OFF",
"JVM_BINDINGS": "ON"
}

Expand Down Expand Up @@ -68,6 +69,10 @@ def normpath(path):


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--use-cuda', type=str, choices=['ON', 'OFF'], default='OFF')
cli_args = parser.parse_args()

if sys.platform == "darwin":
# Enable of your compiler supports OpenMP.
CONFIG["USE_OPENMP"] = "OFF"
Expand All @@ -88,12 +93,21 @@ def normpath(path):
else:
maybe_parallel_build = ""

if cli_args.use_cuda == 'ON':
CONFIG['USE_CUDA'] = 'ON'
CONFIG['USE_NCCL'] = 'ON'

args = ["-D{0}:BOOL={1}".format(k, v) for k, v in CONFIG.items()]

# if enviorment set rabit_mock
if os.getenv("RABIT_MOCK", None) is not None:
args.append("-DRABIT_MOCK:BOOL=ON")

# if enviorment set GPU_ARCH_FLAG
gpu_arch_flag = os.getenv("GPU_ARCH_FLAG", None)
if gpu_arch_flag is not None:
args.append("%s" % gpu_arch_flag)

run("cmake .. " + " ".join(args) + maybe_generator)
run("cmake --build . --config Release" + maybe_parallel_build)

Expand Down
87 changes: 78 additions & 9 deletions jvm-packages/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
<scala.version>2.12.8</scala.version>
<scala.binary.version>2.12</scala.binary.version>
<hadoop.version>2.7.3</hadoop.version>
<use.cuda>OFF</use.cuda>
</properties>
<repositories>
<repository>
Expand All @@ -52,7 +53,65 @@
<module>xgboost4j-spark</module>
<module>xgboost4j-flink</module>
</modules>

<profiles>
<profile>
<!-- default active profile excluding gpu related test suites -->
<id>default</id>
<activation>
<activeByDefault>true</activeByDefault>
</activation>
<build>
<plugins>
<plugin>
<groupId>org.scalatest</groupId>
<artifactId>scalatest-maven-plugin</artifactId>
<configuration>
<tagsToExclude>ml.dmlc.xgboost4j.java.GpuTestSuite</tagsToExclude>
</configuration>
</plugin>
</plugins>
</build>
</profile>

<!-- gpu profile with both cpu and gpu test suites -->
<profile>
<id>gpu</id>
<activation>
<property>
<name>use.cuda</name>
<value>ON</value>
</property>
</activation>
<build>
<plugins>
<plugin>
<groupId>org.scalatest</groupId>
<artifactId>scalatest-maven-plugin</artifactId>
</plugin>
</plugins>
</build>
</profile>

<!-- gpu-with-gpu-tests profile with only gpu test suites -->
<profile>
<id>gpu-with-gpu-tests</id>
<properties>
<use.cuda>ON</use.cuda>
</properties>
<build>
<plugins>
<plugin>
<groupId>org.scalatest</groupId>
<artifactId>scalatest-maven-plugin</artifactId>
<configuration>
<tagsToInclude>ml.dmlc.xgboost4j.java.GpuTestSuite</tagsToInclude>
</configuration>
</plugin>
</plugins>
</build>
</profile>

<profile>
<id>release</id>
<build>
Expand Down Expand Up @@ -242,6 +301,25 @@
<filtering>true</filtering>
</resource>
</resources>

<pluginManagement>
<plugins>
<plugin>
<groupId>org.scalatest</groupId>
<artifactId>scalatest-maven-plugin</artifactId>
<version>1.0</version>
<executions>
<execution>
<id>test</id>
<goals>
<goal>test</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</pluginManagement>

<plugins>
<plugin>
<groupId>org.scalastyle</groupId>
Expand Down Expand Up @@ -336,15 +414,6 @@
<plugin>
<groupId>org.scalatest</groupId>
<artifactId>scalatest-maven-plugin</artifactId>
<version>1.0</version>
<executions>
<execution>
<id>test</id>
<goals>
<goal>test</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
<extensions>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,20 @@ object SparkMLlibPipeline {

def main(args: Array[String]): Unit = {

if (args.length != 3) {
println("Usage: SparkMLlibPipeline input_path native_model_path pipeline_model_path")
if (args.length != 3 && args.length != 4) {
println("Usage: SparkMLlibPipeline input_path native_model_path pipeline_model_path " +
"[cpu|gpu]")
sys.exit(1)
}

val inputPath = args(0)
val nativeModelPath = args(1)
val pipelineModelPath = args(2)

val (treeMethod, numWorkers) = if (args.length == 4 && args(3) == "gpu") {
("gpu_hist", 1)
} else ("auto", 2)

val spark = SparkSession
.builder()
.appName("XGBoost4J-Spark Pipeline Example")
Expand Down Expand Up @@ -76,7 +81,8 @@ object SparkMLlibPipeline {
"objective" -> "multi:softprob",
"num_class" -> 3,
"num_round" -> 100,
"num_workers" -> 2
"num_workers" -> numWorkers,
"tree_method" -> treeMethod
)
)
booster.setFeaturesCol("features")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,14 @@ object SparkTraining {
def main(args: Array[String]): Unit = {
if (args.length < 1) {
// scalastyle:off
println("Usage: program input_path")
println("Usage: program input_path [cpu|gpu]")
sys.exit(1)
}

val (treeMethod, numWorkers) = if (args.length == 2 && args(1) == "gpu") {
("gpu_hist", 1)
} else ("auto", 2)

val spark = SparkSession.builder().getOrCreate()
val inputPath = args(0)
val schema = new StructType(Array(
Expand Down Expand Up @@ -68,7 +73,8 @@ object SparkTraining {
"objective" -> "multi:softprob",
"num_class" -> 3,
"num_round" -> 100,
"num_workers" -> 2,
"num_workers" -> numWorkers,
"tree_method" -> treeMethod,
"eval_sets" -> Map("eval1" -> eval1, "eval2" -> eval2))
val xgbClassifier = new XGBoostClassifier(xgbParam).
setFeaturesCol("features").
Expand Down
Loading

0 comments on commit 8943eb4

Please sign in to comment.