diff --git a/Jenkinsfile b/Jenkinsfile index a827b1a61290..b30cd6ac9c09 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -6,6 +6,9 @@ // Command to run command inside a docker container dockerRun = 'tests/ci_build/ci_build.sh' +// Which CUDA version to use when building reference distribution wheel +ref_cuda_ver = '10.0' + import groovy.transform.Field @Field @@ -65,8 +68,13 @@ pipeline { 'build-cpu': { BuildCPU() }, 'build-cpu-rabit-mock': { BuildCPUMock() }, 'build-cpu-non-omp': { BuildCPUNonOmp() }, + // Build reference, distribution-ready Python wheel with CUDA 10.0 + // using CentOS 6 image 'build-gpu-cuda10.0': { BuildCUDA(cuda_version: '10.0') }, + // The build-gpu-* builds below use Ubuntu image 'build-gpu-cuda10.1': { BuildCUDA(cuda_version: '10.1') }, + 'build-gpu-cuda10.2': { BuildCUDA(cuda_version: '10.2') }, + 'build-gpu-cuda11.0': { BuildCUDA(cuda_version: '11.0') }, 'build-jvm-packages': { BuildJVMPackages(spark_version: '3.0.0') }, 'build-jvm-doc': { BuildJVMDoc() } ]) @@ -80,11 +88,12 @@ pipeline { script { parallel ([ 'test-python-cpu': { TestPythonCPU() }, - 'test-python-gpu-cuda10.0': { TestPythonGPU(cuda_version: '10.0') }, - 'test-python-gpu-cuda10.1': { TestPythonGPU(cuda_version: '10.1') }, - 'test-python-mgpu-cuda10.1': { TestPythonGPU(cuda_version: '10.1', multi_gpu: true) }, - 'test-cpp-gpu': { TestCppGPU(cuda_version: '10.1') }, - 'test-cpp-mgpu': { TestCppGPU(cuda_version: '10.1', multi_gpu: true) }, + 'test-python-gpu-cuda10.0': { TestPythonGPU(host_cuda_version: '10.0') }, + 'test-python-gpu-cuda10.2': { TestPythonGPU(host_cuda_version: '10.2') }, + 'test-python-gpu-cuda11.0': { TestPythonGPU(artifact_cuda_version: '11.0', host_cuda_version: '11.0') }, + 'test-python-mgpu-cuda10.2': { TestPythonGPU(host_cuda_version: '10.2', multi_gpu: true) }, + 'test-cpp-gpu-cuda10.2': { TestCppGPU(artifact_cuda_version: '10.2', host_cuda_version: '10.2') }, + 'test-cpp-gpu-cuda11.0': { TestCppGPU(artifact_cuda_version: '11.0', host_cuda_version: '11.0') }, 'test-jvm-jdk8': { CrossTestJVMwithJDK(jdk_version: '8', spark_version: '3.0.0') }, 'test-jvm-jdk11': { CrossTestJVMwithJDK(jdk_version: '11') }, 'test-jvm-jdk12': { CrossTestJVMwithJDK(jdk_version: '12') }, @@ -123,6 +132,10 @@ def checkoutSrcs() { } } +def GetCUDABuildContainerType(cuda_version) { + return (cuda_version == ref_cuda_ver) ? 'gpu_build_centos6' : 'gpu_build' +} + def ClangTidy() { node('linux && cpu_build') { unstash name: 'srcs' @@ -244,7 +257,7 @@ def BuildCUDA(args) { node('linux && cpu_build') { unstash name: 'srcs' echo "Build with CUDA ${args.cuda_version}" - def container_type = "gpu_build" + def container_type = GetCUDABuildContainerType(args.cuda_version) def docker_binary = "docker" def docker_args = "--build-arg CUDA_VERSION=${args.cuda_version}" def arch_flag = "" @@ -254,20 +267,17 @@ def BuildCUDA(args) { sh """ ${dockerRun} ${container_type} ${docker_binary} ${docker_args} tests/ci_build/build_via_cmake.sh -DUSE_CUDA=ON -DUSE_NCCL=ON -DOPEN_MP:BOOL=ON -DHIDE_CXX_SYMBOLS=ON ${arch_flag} ${dockerRun} ${container_type} ${docker_binary} ${docker_args} bash -c "cd python-package && rm -rf dist/* && python setup.py bdist_wheel --universal" - ${dockerRun} ${container_type} ${docker_binary} ${docker_args} python3 tests/ci_build/rename_whl.py python-package/dist/*.whl ${commit_id} manylinux2010_x86_64 + ${dockerRun} ${container_type} ${docker_binary} ${docker_args} python tests/ci_build/rename_whl.py python-package/dist/*.whl ${commit_id} manylinux2010_x86_64 """ - // Stash wheel for CUDA 10.0 target - if (args.cuda_version == '10.0') { - echo 'Stashing Python wheel...' - stash name: 'xgboost_whl_cuda10', includes: 'python-package/dist/*.whl' - if (env.BRANCH_NAME == 'master' || env.BRANCH_NAME.startsWith('release')) { - echo 'Uploading Python wheel...' - path = ("${BRANCH_NAME}" == 'master') ? '' : "${BRANCH_NAME}/" - s3Upload bucket: 'xgboost-nightly-builds', path: path, acl: 'PublicRead', workingDir: 'python-package/dist', includePathPattern:'**/*.whl' - } - echo 'Stashing C++ test executable (testxgboost)...' - stash name: 'xgboost_cpp_tests', includes: 'build/testxgboost' + echo 'Stashing Python wheel...' + stash name: "xgboost_whl_cuda${args.cuda_version}", includes: 'python-package/dist/*.whl' + if (args.cuda_version == ref_cuda_ver && (env.BRANCH_NAME == 'master' || env.BRANCH_NAME.startsWith('release'))) { + echo 'Uploading Python wheel...' + path = ("${BRANCH_NAME}" == 'master') ? '' : "${BRANCH_NAME}/" + s3Upload bucket: 'xgboost-nightly-builds', path: path, acl: 'PublicRead', workingDir: 'python-package/dist', includePathPattern:'**/*.whl' } + echo 'Stashing C++ test executable (testxgboost)...' + stash name: "xgboost_cpp_tests_cuda${args.cuda_version}", includes: 'build/testxgboost' deleteDir() } } @@ -308,7 +318,7 @@ def BuildJVMDoc() { def TestPythonCPU() { node('linux && cpu') { - unstash name: 'xgboost_whl_cuda10' + unstash name: "xgboost_whl_cuda${ref_cuda_ver}" unstash name: 'srcs' unstash name: 'xgboost_cli' echo "Test Python CPU" @@ -322,15 +332,16 @@ def TestPythonCPU() { } def TestPythonGPU(args) { - nodeReq = (args.multi_gpu) ? 'linux && mgpu' : 'linux && gpu' + def nodeReq = (args.multi_gpu) ? 'linux && mgpu' : 'linux && gpu' + def artifact_cuda_version = (args.artifact_cuda_version) ?: ref_cuda_ver node(nodeReq) { - unstash name: 'xgboost_whl_cuda10' - unstash name: 'xgboost_cpp_tests' + unstash name: "xgboost_whl_cuda${artifact_cuda_version}" + unstash name: "xgboost_cpp_tests_cuda${artifact_cuda_version}" unstash name: 'srcs' - echo "Test Python GPU: CUDA ${args.cuda_version}" + echo "Test Python GPU: CUDA ${args.host_cuda_version}" def container_type = "gpu" def docker_binary = "nvidia-docker" - def docker_args = "--build-arg CUDA_VERSION=${args.cuda_version}" + def docker_args = "--build-arg CUDA_VERSION=${args.host_cuda_version}" if (args.multi_gpu) { echo "Using multiple GPUs" sh """ @@ -361,21 +372,16 @@ def TestCppRabit() { } def TestCppGPU(args) { - nodeReq = (args.multi_gpu) ? 'linux && mgpu' : 'linux && gpu' + def nodeReq = 'linux && mgpu' + def artifact_cuda_version = (args.artifact_cuda_version) ?: ref_cuda_ver node(nodeReq) { - unstash name: 'xgboost_cpp_tests' + unstash name: "xgboost_cpp_tests_cuda${artifact_cuda_version}" unstash name: 'srcs' - echo "Test C++, CUDA ${args.cuda_version}" + echo "Test C++, CUDA ${args.host_cuda_version}" def container_type = "gpu" def docker_binary = "nvidia-docker" - def docker_args = "--build-arg CUDA_VERSION=${args.cuda_version}" - if (args.multi_gpu) { - echo "Using multiple GPUs" - sh "${dockerRun} ${container_type} ${docker_binary} ${docker_args} build/testxgboost --gtest_filter=*.MGPU_*" - } else { - echo "Using a single GPU" - sh "${dockerRun} ${container_type} ${docker_binary} ${docker_args} build/testxgboost --gtest_filter=-*.MGPU_*" - } + def docker_args = "--build-arg CUDA_VERSION=${args.host_cuda_version}" + sh "${dockerRun} ${container_type} ${docker_binary} ${docker_args} build/testxgboost" deleteDir() } } diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 21c802bc1a7e..840a7f713156 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -9,7 +9,9 @@ if (USE_CUDA) file(GLOB_RECURSE CUDA_SOURCES *.cu *.cuh) target_sources(objxgboost PRIVATE ${CUDA_SOURCES}) target_compile_definitions(objxgboost PRIVATE -DXGBOOST_USE_CUDA=1) - target_include_directories(objxgboost PRIVATE ${xgboost_SOURCE_DIR}/cub/) + if (CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.0) + target_include_directories(objxgboost PRIVATE ${xgboost_SOURCE_DIR}/cub/) + endif (CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.0) target_compile_options(objxgboost PRIVATE $<$:--expt-extended-lambda> $<$:--expt-relaxed-constexpr> diff --git a/tests/ci_build/Dockerfile.gpu_build b/tests/ci_build/Dockerfile.gpu_build index 8a614db81ceb..8a741fc8775f 100644 --- a/tests/ci_build/Dockerfile.gpu_build +++ b/tests/ci_build/Dockerfile.gpu_build @@ -1,53 +1,30 @@ ARG CUDA_VERSION -FROM nvidia/cuda:$CUDA_VERSION-devel-centos6 +FROM nvidia/cuda:$CUDA_VERSION-devel-ubuntu16.04 +ARG CUDA_VERSION # Environment ENV DEBIAN_FRONTEND noninteractive -ENV DEVTOOLSET_URL_ROOT http://vault.centos.org/6.9/sclo/x86_64/rh/devtoolset-4/ +SHELL ["/bin/bash", "-c"] # Use Bash as shell # Install all basic requirements RUN \ - yum -y update && \ - yum install -y tar unzip wget xz git centos-release-scl yum-utils && \ - yum-config-manager --enable centos-sclo-rh-testing && \ - yum -y update && \ - yum install -y $DEVTOOLSET_URL_ROOT/devtoolset-4-gcc-5.3.1-6.1.el6.x86_64.rpm \ - $DEVTOOLSET_URL_ROOT/devtoolset-4-gcc-c++-5.3.1-6.1.el6.x86_64.rpm \ - $DEVTOOLSET_URL_ROOT/devtoolset-4-binutils-2.25.1-8.el6.x86_64.rpm \ - $DEVTOOLSET_URL_ROOT/devtoolset-4-runtime-4.1-3.sc1.el6.x86_64.rpm \ - $DEVTOOLSET_URL_ROOT/devtoolset-4-libstdc++-devel-5.3.1-6.1.el6.x86_64.rpm && \ - # Python - wget -O Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ - bash Miniconda3.sh -b -p /opt/python && \ + apt-get update && \ + apt-get install -y tar unzip wget bzip2 libgomp1 git build-essential doxygen graphviz llvm libasan2 libidn11 liblz4-dev ninja-build && \ # CMake wget -nv -nc https://cmake.org/files/v3.13/cmake-3.13.0-Linux-x86_64.sh --no-check-certificate && \ bash cmake-3.13.0-Linux-x86_64.sh --skip-license --prefix=/usr && \ - # Ninja - mkdir -p /usr/local && \ - cd /usr/local/ && \ - wget -nv -nc https://github.com/ninja-build/ninja/archive/v1.10.0.tar.gz --no-check-certificate && \ - tar xf v1.10.0.tar.gz && mv ninja-1.10.0 ninja && rm -v v1.10.0.tar.gz && \ - cd ninja && \ - python ./configure.py --bootstrap + # Python + wget -O Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ + bash Miniconda3.sh -b -p /opt/python # NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html) RUN \ export CUDA_SHORT=`echo $CUDA_VERSION | egrep -o '[0-9]+\.[0-9]'` && \ - export NCCL_VERSION=2.4.8-1 && \ - wget https://developer.download.nvidia.com/compute/machine-learning/repos/rhel7/x86_64/nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \ - rpm -i nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \ - yum -y update && \ - yum install -y libnccl-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-devel-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-static-${NCCL_VERSION}+cuda${CUDA_SHORT} && \ - rm -f nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm; - -ENV PATH=/opt/python/bin:/usr/local/ninja:$PATH -ENV CC=/opt/rh/devtoolset-4/root/usr/bin/gcc -ENV CXX=/opt/rh/devtoolset-4/root/usr/bin/c++ -ENV CPP=/opt/rh/devtoolset-4/root/usr/bin/cpp + export NCCL_VERSION=2.7.5-1 && \ + apt-get update && \ + apt-get install -y --allow-downgrades --allow-change-held-packages libnccl2=${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-dev=${NCCL_VERSION}+cuda${CUDA_SHORT} -# Install Python packages -RUN \ - pip install numpy pytest scipy scikit-learn wheel kubernetes urllib3==1.22 +ENV PATH=/opt/python/bin:$PATH ENV GOSU_VERSION 1.10 diff --git a/tests/ci_build/Dockerfile.gpu_build_centos6 b/tests/ci_build/Dockerfile.gpu_build_centos6 new file mode 100644 index 000000000000..e755ce12afec --- /dev/null +++ b/tests/ci_build/Dockerfile.gpu_build_centos6 @@ -0,0 +1,62 @@ +ARG CUDA_VERSION +FROM nvidia/cuda:$CUDA_VERSION-devel-centos6 +ARG CUDA_VERSION + +# Environment +ENV DEBIAN_FRONTEND noninteractive +ENV DEVTOOLSET_URL_ROOT http://vault.centos.org/6.9/sclo/x86_64/rh/devtoolset-4/ + +# Install all basic requirements +RUN \ + yum -y update && \ + yum install -y tar unzip wget xz git centos-release-scl yum-utils && \ + yum-config-manager --enable centos-sclo-rh-testing && \ + yum -y update && \ + yum install -y $DEVTOOLSET_URL_ROOT/devtoolset-4-gcc-5.3.1-6.1.el6.x86_64.rpm \ + $DEVTOOLSET_URL_ROOT/devtoolset-4-gcc-c++-5.3.1-6.1.el6.x86_64.rpm \ + $DEVTOOLSET_URL_ROOT/devtoolset-4-binutils-2.25.1-8.el6.x86_64.rpm \ + $DEVTOOLSET_URL_ROOT/devtoolset-4-runtime-4.1-3.sc1.el6.x86_64.rpm \ + $DEVTOOLSET_URL_ROOT/devtoolset-4-libstdc++-devel-5.3.1-6.1.el6.x86_64.rpm && \ + # Python + wget -O Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ + bash Miniconda3.sh -b -p /opt/python && \ + # CMake + wget -nv -nc https://cmake.org/files/v3.13/cmake-3.13.0-Linux-x86_64.sh --no-check-certificate && \ + bash cmake-3.13.0-Linux-x86_64.sh --skip-license --prefix=/usr && \ + # Ninja + mkdir -p /usr/local && \ + cd /usr/local/ && \ + wget -nv -nc https://github.com/ninja-build/ninja/archive/v1.10.0.tar.gz --no-check-certificate && \ + tar xf v1.10.0.tar.gz && mv ninja-1.10.0 ninja && rm -v v1.10.0.tar.gz && \ + cd ninja && \ + python ./configure.py --bootstrap + +# NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html) +RUN \ + export CUDA_SHORT=`echo $CUDA_VERSION | egrep -o '[0-9]+\.[0-9]'` && \ + export NCCL_VERSION=2.4.8-1 && \ + wget https://developer.download.nvidia.com/compute/machine-learning/repos/rhel7/x86_64/nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \ + rpm -i nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \ + yum -y update && \ + yum install -y libnccl-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-devel-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-static-${NCCL_VERSION}+cuda${CUDA_SHORT} && \ + rm -f nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm; + +ENV PATH=/opt/python/bin:/usr/local/ninja:$PATH +ENV CC=/opt/rh/devtoolset-4/root/usr/bin/gcc +ENV CXX=/opt/rh/devtoolset-4/root/usr/bin/c++ +ENV CPP=/opt/rh/devtoolset-4/root/usr/bin/cpp + +ENV GOSU_VERSION 1.10 + +# Install lightweight sudo (not bound to TTY) +RUN set -ex; \ + wget -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" && \ + chmod +x /usr/local/bin/gosu && \ + gosu nobody true + +# Default entry-point to use if running locally +# It will preserve attributes of created files +COPY entrypoint.sh /scripts/ + +WORKDIR /workspace +ENTRYPOINT ["/scripts/entrypoint.sh"] diff --git a/tests/ci_build/ci_build.sh b/tests/ci_build/ci_build.sh index 5f0ed112039f..cc2a23091aa9 100755 --- a/tests/ci_build/ci_build.sh +++ b/tests/ci_build/ci_build.sh @@ -187,6 +187,10 @@ then # that is associated with the particular branch or pull request echo "docker tag ${DOCKER_IMG_NAME} ${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:${BRANCH_NAME}" docker tag "${DOCKER_IMG_NAME}" "${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:${BRANCH_NAME}" + + echo "python3 -m awscli ecr create-repository --repository-name ${DOCKER_IMG_NAME} --region ${DOCKER_CACHE_ECR_REGION} || true" + python3 -m awscli ecr create-repository --repository-name ${DOCKER_IMG_NAME} --region ${DOCKER_CACHE_ECR_REGION} || true + echo "docker push ${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:${BRANCH_NAME}" docker push "${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:${BRANCH_NAME}" if [[ $? != "0" ]]; then diff --git a/tests/cpp/CMakeLists.txt b/tests/cpp/CMakeLists.txt index 75948e5251f7..f092dc4020b8 100644 --- a/tests/cpp/CMakeLists.txt +++ b/tests/cpp/CMakeLists.txt @@ -19,8 +19,10 @@ target_link_libraries(testxgboost PRIVATE objxgboost) if (USE_CUDA) # OpenMP is mandatory for CUDA find_package(OpenMP REQUIRED) - target_include_directories(testxgboost PRIVATE - ${xgboost_SOURCE_DIR}/cub/) + if (CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.0) + target_include_directories(testxgboost PRIVATE + ${xgboost_SOURCE_DIR}/cub/) + endif (CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.0) target_compile_options(testxgboost PRIVATE $<$:--expt-extended-lambda> $<$:--expt-relaxed-constexpr> diff --git a/tests/cpp/test_serialization.cc b/tests/cpp/test_serialization.cc index e6bf1fef854a..9ab4c54166e8 100644 --- a/tests/cpp/test_serialization.cc +++ b/tests/cpp/test_serialization.cc @@ -148,8 +148,8 @@ void TestLearnerSerialization(Args args, FeatureMap const& fmap, std::shared_ptr // Binary is not tested, as it is NOT reproducible. class SerializationTest : public ::testing::Test { protected: - size_t constexpr static kRows = 10; - size_t constexpr static kCols = 10; + size_t constexpr static kRows = 15; + size_t constexpr static kCols = 15; std::shared_ptr p_dmat_; FeatureMap fmap_;