diff --git a/Jenkinsfile b/Jenkinsfile index b30cd6ac9c09..e40e24d06824 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -75,6 +75,7 @@ pipeline { 'build-gpu-cuda10.1': { BuildCUDA(cuda_version: '10.1') }, 'build-gpu-cuda10.2': { BuildCUDA(cuda_version: '10.2') }, 'build-gpu-cuda11.0': { BuildCUDA(cuda_version: '11.0') }, + 'build-jvm-packages-gpu-cuda10.0': { BuildJVMPackagesWithCUDA(spark_version: '3.0.0', cuda_version: '10.0') }, 'build-jvm-packages': { BuildJVMPackages(spark_version: '3.0.0') }, 'build-jvm-doc': { BuildJVMDoc() } ]) @@ -94,6 +95,7 @@ pipeline { 'test-python-mgpu-cuda10.2': { TestPythonGPU(host_cuda_version: '10.2', multi_gpu: true) }, 'test-cpp-gpu-cuda10.2': { TestCppGPU(artifact_cuda_version: '10.2', host_cuda_version: '10.2') }, 'test-cpp-gpu-cuda11.0': { TestCppGPU(artifact_cuda_version: '11.0', host_cuda_version: '11.0') }, + 'test-jvm-jdk8-cuda10.0': { CrossTestJVMwithJDKGPU(artifact_cuda_version: '10.0', host_cuda_version: '10.0') }, 'test-jvm-jdk8': { CrossTestJVMwithJDK(jdk_version: '8', spark_version: '3.0.0') }, 'test-jvm-jdk11': { CrossTestJVMwithJDK(jdk_version: '11') }, 'test-jvm-jdk12': { CrossTestJVMwithJDK(jdk_version: '12') }, @@ -282,6 +284,28 @@ def BuildCUDA(args) { } } +def BuildJVMPackagesWithCUDA(args) { + node('linux && cpu_build') { + unstash name: 'srcs' + echo "Build XGBoost4J-Spark with Spark ${args.spark_version}, CUDA ${args.cuda_version}" + def container_type = "jvm_gpu_build" + def docker_binary = "docker" + def docker_args = "--build-arg CUDA_VERSION=${args.cuda_version}" + def arch_flag = "" + if (env.BRANCH_NAME != 'master' && !(env.BRANCH_NAME.startsWith('release'))) { + arch_flag = "-DGPU_COMPUTE_VER=75" + } + // Use only 4 CPU cores + def docker_extra_params = "CI_DOCKER_EXTRA_PARAMS_INIT='--cpuset-cpus 0-3'" + sh """ + ${docker_extra_params} ${dockerRun} ${container_type} ${docker_binary} ${docker_args} tests/ci_build/build_jvm_packages.sh ${args.spark_version} -Duse.cuda=ON + """ + echo "Stashing XGBoost4J JAR with CUDA ${args.cuda_version} ..." + stash name: 'xgboost4j_jar_gpu', includes: "jvm-packages/xgboost4j/target/*.jar,jvm-packages/xgboost4j-spark/target/*.jar,jvm-packages/xgboost4j-example/target/*.jar" + deleteDir() + } +} + def BuildJVMPackages(args) { node('linux && cpu') { unstash name: 'srcs' @@ -386,6 +410,24 @@ def TestCppGPU(args) { } } +def CrossTestJVMwithJDKGPU(args) { + def nodeReq = 'linux && mgpu' + node(nodeReq) { + unstash name: "xgboost4j_jar_gpu" + unstash name: 'srcs' + if (args.spark_version != null) { + echo "Test XGBoost4J on a machine with JDK ${args.jdk_version}, Spark ${args.spark_version}, CUDA ${args.host_cuda_version}" + } else { + echo "Test XGBoost4J on a machine with JDK ${args.jdk_version}, CUDA ${args.host_cuda_version}" + } + def container_type = "gpu_jvm" + def docker_binary = "nvidia-docker" + def docker_args = "--build-arg CUDA_VERSION=${args.host_cuda_version}" + sh "${dockerRun} ${container_type} ${docker_binary} ${docker_args} tests/ci_build/test_jvm_gpu_cross.sh" + deleteDir() + } +} + def CrossTestJVMwithJDK(args) { node('linux && cpu') { unstash name: 'xgboost4j_jar' diff --git a/tests/ci_build/Dockerfile.gpu_jvm b/tests/ci_build/Dockerfile.gpu_jvm new file mode 100644 index 000000000000..acd7b9b86188 --- /dev/null +++ b/tests/ci_build/Dockerfile.gpu_jvm @@ -0,0 +1,51 @@ +ARG CUDA_VERSION +FROM nvidia/cuda:$CUDA_VERSION-runtime-ubuntu16.04 +ARG JDK_VERSION=8 +ARG SPARK_VERSION=3.0.0 + +# Environment +ENV DEBIAN_FRONTEND noninteractive + +# Install all basic requirements +RUN \ + apt-get update && \ + apt-get install -y software-properties-common && \ + add-apt-repository ppa:openjdk-r/ppa && \ + apt-get update && \ + apt-get install -y tar unzip wget openjdk-$JDK_VERSION-jdk libgomp1 && \ + # Python + wget -O Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ + bash Miniconda3.sh -b -p /opt/python && \ + /opt/python/bin/pip install awscli && \ + # Maven + wget https://archive.apache.org/dist/maven/maven-3/3.6.1/binaries/apache-maven-3.6.1-bin.tar.gz && \ + tar xvf apache-maven-3.6.1-bin.tar.gz -C /opt && \ + ln -s /opt/apache-maven-3.6.1/ /opt/maven && \ + # Spark + wget https://archive.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop2.7.tgz && \ + tar xvf spark-$SPARK_VERSION-bin-hadoop2.7.tgz -C /opt && \ + ln -s /opt/spark-$SPARK_VERSION-bin-hadoop2.7 /opt/spark + +ENV PATH=/opt/python/bin:/opt/spark/bin:/opt/maven/bin:$PATH + +# Install Python packages +RUN \ + pip install numpy scipy pandas scikit-learn + +ENV GOSU_VERSION 1.10 + +# Install lightweight sudo (not bound to TTY) +RUN set -ex; \ + wget -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" && \ + chmod +x /usr/local/bin/gosu && \ + gosu nobody true + +# Set default JDK version +RUN update-java-alternatives -v -s java-1.$JDK_VERSION.0-openjdk-amd64 + +# Default entry-point to use if running locally +# It will preserve attributes of created files +COPY entrypoint.sh /scripts/ + +WORKDIR /workspace +ENTRYPOINT ["/scripts/entrypoint.sh"] diff --git a/tests/ci_build/Dockerfile.jvm_gpu_build b/tests/ci_build/Dockerfile.jvm_gpu_build new file mode 100644 index 000000000000..ed6c3d689a90 --- /dev/null +++ b/tests/ci_build/Dockerfile.jvm_gpu_build @@ -0,0 +1,63 @@ +ARG CUDA_VERSION +FROM nvidia/cuda:$CUDA_VERSION-devel-centos6 +ARG CUDA_VERSION + +# Environment +ENV DEBIAN_FRONTEND noninteractive +ENV DEVTOOLSET_URL_ROOT http://vault.centos.org/6.9/sclo/x86_64/rh/devtoolset-4/ + +# Install all basic requirements +RUN \ + yum -y update && \ + yum install -y tar unzip wget xz git centos-release-scl yum-utils java-1.8.0-openjdk-devel && \ + yum-config-manager --enable centos-sclo-rh-testing && \ + yum -y update && \ + yum install -y $DEVTOOLSET_URL_ROOT/devtoolset-4-gcc-5.3.1-6.1.el6.x86_64.rpm \ + $DEVTOOLSET_URL_ROOT/devtoolset-4-gcc-c++-5.3.1-6.1.el6.x86_64.rpm \ + $DEVTOOLSET_URL_ROOT/devtoolset-4-binutils-2.25.1-8.el6.x86_64.rpm \ + $DEVTOOLSET_URL_ROOT/devtoolset-4-runtime-4.1-3.sc1.el6.x86_64.rpm \ + $DEVTOOLSET_URL_ROOT/devtoolset-4-libstdc++-devel-5.3.1-6.1.el6.x86_64.rpm && \ + # Python + wget -O Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ + bash Miniconda3.sh -b -p /opt/python && \ + # CMake + wget -nv -nc https://cmake.org/files/v3.13/cmake-3.13.0-Linux-x86_64.sh --no-check-certificate && \ + bash cmake-3.13.0-Linux-x86_64.sh --skip-license --prefix=/usr && \ + # Maven + wget https://archive.apache.org/dist/maven/maven-3/3.6.1/binaries/apache-maven-3.6.1-bin.tar.gz && \ + tar xvf apache-maven-3.6.1-bin.tar.gz -C /opt && \ + ln -s /opt/apache-maven-3.6.1/ /opt/maven + +# NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html) +RUN \ + export CUDA_SHORT=`echo $CUDA_VERSION | egrep -o '[0-9]+\.[0-9]'` && \ + export NCCL_VERSION=2.4.8-1 && \ + wget https://developer.download.nvidia.com/compute/machine-learning/repos/rhel7/x86_64/nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \ + rpm -i nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \ + yum -y update && \ + yum install -y libnccl-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-devel-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-static-${NCCL_VERSION}+cuda${CUDA_SHORT} && \ + rm -f nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm; + +ENV PATH=/opt/python/bin:/opt/maven/bin:$PATH +ENV CC=/opt/rh/devtoolset-4/root/usr/bin/gcc +ENV CXX=/opt/rh/devtoolset-4/root/usr/bin/c++ +ENV CPP=/opt/rh/devtoolset-4/root/usr/bin/cpp + +# Install Python packages +RUN \ + pip install numpy pytest scipy scikit-learn wheel kubernetes urllib3==1.22 awscli + +ENV GOSU_VERSION 1.10 + +# Install lightweight sudo (not bound to TTY) +RUN set -ex; \ + wget -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" && \ + chmod +x /usr/local/bin/gosu && \ + gosu nobody true + +# Default entry-point to use if running locally +# It will preserve attributes of created files +COPY entrypoint.sh /scripts/ + +WORKDIR /workspace +ENTRYPOINT ["/scripts/entrypoint.sh"] diff --git a/tests/ci_build/build_jvm_packages.sh b/tests/ci_build/build_jvm_packages.sh index 8190aa1e10ea..f1128ccbb3bd 100755 --- a/tests/ci_build/build_jvm_packages.sh +++ b/tests/ci_build/build_jvm_packages.sh @@ -3,12 +3,14 @@ set -e set -x -if [ $# -ne 1 ]; then - echo "Usage: $0 [spark version]" - exit 1 -fi - spark_version=$1 +use_cuda=$2 + +gpu_options="" +if [ "x$use_cuda" == "x-Duse.cuda=ON" ]; then + # Since building jvm for CPU will do unit tests, let's bypass it when building for GPU + gpu_options=" -Dmaven.test.skip=true -DskipTests $use_cuda " +fi # Initialize local Maven repository ./tests/ci_build/initialize_maven.sh @@ -16,7 +18,7 @@ spark_version=$1 rm -rf build/ cd jvm-packages export RABIT_MOCK=ON -mvn --no-transfer-progress package -Dspark.version=${spark_version} +mvn --no-transfer-progress package -Dspark.version=${spark_version} $gpu_options set +x set +e diff --git a/tests/ci_build/test_jvm_gpu_cross.sh b/tests/ci_build/test_jvm_gpu_cross.sh new file mode 100755 index 000000000000..51ccfa32be51 --- /dev/null +++ b/tests/ci_build/test_jvm_gpu_cross.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +set -e +set -x + + +nvidia-smi + +ls /usr/local/ + +# Initialize local Maven repository +./tests/ci_build/initialize_maven.sh + +# Get version number of XGBoost4J and other auxiliary information +cd jvm-packages +xgboost4j_version=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout) +scala_binary_version=$(mvn help:evaluate -Dexpression=scala.binary.version -q -DforceStdout) + +python3 xgboost4j-tester/get_iris.py +xgb_jars="./xgboost4j/target/xgboost4j_${scala_binary_version}-${xgboost4j_version}.jar,./xgboost4j-spark/target/xgboost4j-spark_${scala_binary_version}-${xgboost4j_version}.jar" +example_jar="./xgboost4j-example/target/xgboost4j-example_${scala_binary_version}-${xgboost4j_version}.jar" + +echo "Run SparkTraining locally ... " +spark-submit \ + --master 'local[1]' \ + --class ml.dmlc.xgboost4j.scala.example.spark.SparkTraining \ + --jars $xgb_jars \ + $example_jar \ + ${PWD}/iris.csv gpu \ + +echo "Run SparkMLlibPipeline locally ... " +spark-submit \ + --master 'local[1]' \ + --class ml.dmlc.xgboost4j.scala.example.spark.SparkMLlibPipeline \ + --jars $xgb_jars \ + $example_jar \ + ${PWD}/iris.csv ${PWD}/native_model ${PWD}/pipeline_model gpu \ + +set +x +set +e