Merge remote-tracking branch 'upstream/branch-0.11' into fea-ext-arima

Nyrio · Oct 18, 2019 · 864d557 · 864d557
2 parents 6846e71 + 2776767
commit 864d557
Show file tree

Hide file tree

Showing 245 changed files with 4,146 additions and 2,193 deletions.
diff --git a/.gitignore b/.gitignore
@@ -18,6 +18,7 @@ build_prims/
 cuml.egg-info/
 dist/
 python/cuml/**/*.cpp
+python/external_repositories
 log
 .ipynb_checkpoints
 .DS_Store

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,15 +1,26 @@
 # cuML 0.11.0 (Date TBD)
 
 ## New Features
+- PR #1129: C++: Separate include folder for C++ API distribution
 
 - PR #1194: Initial ARIMA time-series modeling support.
 
 ## Improvements
+- PR #1170: Use git to clone subprojects instead of git submodules
+- PR #1225: setup.py clone dependencies like cmake and correct include paths
+- PR #1224: Refactored FIL to prepare for sparse trees
+- PR #1249: Include libcuml.so C API in installed targets
+- PR #1259: Conda dev environment updates and use libcumlprims current version in CI
+- PR #1264: Add -s flag to GPU CI pytest for better error printing
+- PR #1271: Updated the Ridge regression documentation
 
 ## Bug Fixes
+- PR #1212: Fix cmake git cloning always running configure in subprojects
+- PR #1261: Fix comms build errors due to cuml++ include folder changes
+- PR #1267: Update build.sh for recent change of building comms in main CMakeLists
+- PR #1278: Removed incorrect overloaded instance of eigJacobi
 
-
-# cuML 0.10.0 (Date TBD)
+# cuML 0.10.0 (16 Oct 2019)
 
 ## New Features
 - PR #1148: C++ benchmark tool for c++/CUDA code inside cuML
@@ -22,6 +33,7 @@
 - PR #1149 Add YYMMDD to version tag for nightly conda packages
 - PR #892: General Gram matrices prim
 - PR #912: Support Vector Machine
+- PR #1274: Updated the RF score function to use GPU predict
 
 ## Improvements
 - PR #961: High Peformance RF; HIST algo
@@ -33,15 +45,23 @@
 - PR #1086: Ensure RegressorMixin scorer uses device arrays
 - PR #1108: input_to_host_array function in input_utils for input processing to host arrays
 - PR #1114: K-means: Exposing useful params, removing unused params, proxying params in Dask
+- PR #1138: Implementing ANY_RANK semantics on irecv
 - PR #1142: prims: expose separate InType and OutType for unaryOp and binaryOp
 - PR #1115: Moving dask_make_blobs to cuml.dask.datasets. Adding conversion to dask.DataFrame
 - PR #1136: CUDA 10.1 CI updates
+- PR #1135: K-means: add boundary cases for kmeans||, support finer control with convergence
+- PR #1163: Some more correctness improvements. Better verbose printing
 - PR #1165: Adding except + in all remaining cython
+- PR #1186: Using LocalCUDACluster Pytest fixture
 - PR #1173: Docs: Barnes Hut TSNE documentation
 - PR #1176: Use new RMM API based on Cython
+- PR #1247: Improved MNMG RF error checking
 
 ## Bug Fixes
 
+- PR #1231: RF respect number of cuda streams from cuml handle
+- PR #1230: Rf bugfix memleak in regression
+- PR #1208: compile dbscan bug
 - PR #1016: Use correct libcumlprims version in GPU CI
 - PR #1040: Update version of numba in development conda yaml files
 - PR #1043: Updates to accomodate cuDF python code reorganization
@@ -59,7 +79,16 @@
 - PR #1132: DBSCAN Batching Bug Fix
 - PR #1162: DASK RF random seed bug fix
 - PR #1164: Fix check_dtype arg handling for input_to_dev_array
+- PR #1171: SVM prediction bug fix
 - PR #1177: Update dask and distributed to 2.5
+- PR #1204: Fix SVM crash on Turing
+- PR #1199: Replaced sprintf() with snprintf() in THROW()
+- PR #1205: Update dask-cuda in yml envs
+- PR #1211: Fixing Dask k-means transform bug and adding test
+- PR #1236: Improve fix for SMO solvers potential crash on Turing
+- PR #1251: Disable compiler optimization for CUDA 10.1 for distance prims
+- PR #1260: Small bugfix for major conversion in input_utils
+- PR #1276: Fix float64 prediction crash in test_random_forest
 
 # cuML 0.9.0 (21 Aug 2019)
 
@@ -94,6 +123,7 @@
 - PR #882: TSNE - T-Distributed Stochastic Neighbourhood Embedding
 - PR #624: Internals API & Graph Based Dimensionality Reductions Callback
 - PR #926: Wrapper for FIL
+- PR #994: Adding MPI comm impl for testing / benchmarking MNMG CUDA
 - PR #960: Enable using libcumlprims for MG algorithms/prims
 
 ## Improvements
@@ -146,6 +176,7 @@
 - PR #1018: Hint location of nccl in build.sh for CI
 - PR #1022: Using random_state to make K-Means MNMG tests deterministic
 - PR #1034: Fix typos and formatting issues in RF docs
+- PR #1052: Fix the rows_sample dtype to float
 
 # cuML 0.8.0 (27 June 2019)
 

diff --git a/build.sh b/build.sh
@@ -18,11 +18,10 @@ ARGS=$*
 # script, and that this script resides in the repo dir!
 REPODIR=$(cd $(dirname $0); pwd)
 
-VALIDARGS="clean deep-clean libcuml cuml prims bench -v -g -n --allgpuarch --multigpu -h --help"
+VALIDARGS="clean libcuml cuml prims bench -v -g -n --allgpuarch --singlegpu -h --help"
 HELP="$0 [<target> ...] [<flag> ...]
  where <target> is:
    clean         - remove all existing build artifacts and configuration (start over)
-   deep-clean    - same as 'clean' option, but also cleans up the faiss build
    libcuml       - build the cuml C++ code only. Also builds the C-wrapper library
                    around the C++ code.
    cuml          - build the cuml Python package
@@ -33,7 +32,7 @@ HELP="$0 [<target> ...] [<flag> ...]
    -g            - build for debug
    -n            - no install step
    --allgpuarch  - build for all supported GPU architectures
-   --multigpu    - Build cuml with multigpu support (requires libcumlMG and CUDA >=10.0)
+   --singlegpu   - Build cuml without multigpu support (multigpu requires libcumlprims)
    -h            - print this text
 
  default action (no args) is to build and install 'libcuml', 'cuml', and 'prims' targets only for the detected GPU arch
@@ -42,14 +41,15 @@ LIBCUML_BUILD_DIR=${REPODIR}/cpp/build
 CUML_COMMS_BUILD_DIR=${REPODIR}/cpp/comms/std/build
 CUML_BUILD_DIR=${REPODIR}/python/build
 FAISS_DIR=${REPODIR}/thirdparty/faiss
-BUILD_DIRS="${LIBCUML_BUILD_DIR} ${CUML_BUILD_DIR}"
+PYTHON_DEPS_CLONE=${REPODIR}/python/external_repositories
+BUILD_DIRS="${LIBCUML_BUILD_DIR} ${CUML_BUILD_DIR} ${PYTHON_DEPS_CLONE}"
 
 # Set defaults for vars modified by flags to this script
 VERBOSE=""
 BUILD_TYPE=Release
 INSTALL_TARGET=install
 BUILD_ALL_GPU_ARCH=0
-MULTIGPU=""
+SINGLEGPU=""
 CLEAN=0
 
 # Set defaults for vars that may not have been defined externally
@@ -91,10 +91,10 @@ fi
 if hasArg --allgpuarch; then
     BUILD_ALL_GPU_ARCH=1
 fi
-if hasArg --multigpu; then
-    MULTIGPU=--multigpu
+if hasArg --singlegpu; then
+    SINGLEGPU="--singlegpu"
 fi
-if hasArg deep-clean || hasArg clean; then
+if hasArg clean; then
     CLEAN=1
 fi
 
@@ -112,14 +112,6 @@ if (( ${CLEAN} == 1 )); then
     done
 fi
 
-# clean the faiss build also, if asked
-if hasArg deep-clean; then
-    cd ${FAISS_DIR}
-    make clean
-    cd gpu
-    make clean
-fi
-
 ################################################################################
 # Configure for building all C++ targets
 if (( ${NUMARGS} == 0 )) || hasArg libcuml || hasArg prims || hasArg bench; then
@@ -139,47 +131,43 @@ if (( ${NUMARGS} == 0 )) || hasArg libcuml || hasArg prims || hasArg bench; then
           -DBLAS_LIBRARIES=${INSTALL_PREFIX}/lib/libopenblas.so.0 \
           ${GPU_ARCH} \
           -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
-          -DPARALLEL_LEVEL=${PARALLEL_LEVEL} ..
+          -DBUILD_CUML_C_LIBRARY=ON \
+          -DPARALLEL_LEVEL=${PARALLEL_LEVEL} \
+          -DNCCL_PATH=${INSTALL_PREFIX} ..
 
 fi
 
 # Run all make targets at once
+
 MAKE_TARGETS=
-if (( ${NUMARGS} == 0 )) || hasArg libcuml; then
+if hasArg libcuml; then
     MAKE_TARGETS="${MAKE_TARGETS}cuml++ cuml ml ml_mg"
 fi
-if (( ${NUMARGS} == 0 )) || hasArg prims; then
+if hasArg prims; then
     MAKE_TARGETS="${MAKE_TARGETS} prims"
 fi
-if (( ${NUMARGS} == 0 )) || hasArg bench; then
+if hasArg bench; then
     MAKE_TARGETS="${MAKE_TARGETS} sg_benchmark"
 fi
 
-# build cumlcomms library
-if [ "${MAKE_TARGETS}" != "" ]; then
+# If `./build.sh cuml` is called, don't build C/C++ components
+if (( ${NUMARGS} == 0 )) || hasArg libcuml || hasArg prims || hasArg bench; then
+# If there are no targets specified when calling build.sh, it will
+# just call `make -j`. This avoids a lot of extra printing
     cd ${LIBCUML_BUILD_DIR}
     make -j${PARALLEL_LEVEL} ${MAKE_TARGETS} VERBOSE=${VERBOSE} ${INSTALL_TARGET}
 
-    mkdir -p ${CUML_COMMS_BUILD_DIR}
-    cd ${CUML_COMMS_BUILD_DIR}
-
-    cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \
-          -DWITH_UCX=OFF \
-          -DCUML_INSTALL_DIR=${INSTALL_PREFIX}/lib .. \
-          -DNCCL_PATH=${INSTALL_PREFIX} ..
-
-    cd ${CUML_COMMS_BUILD_DIR}
-    make -j${PARALLEL_LEVEL} VERBOSE=${VERBOSE} ${INSTALL_TARGET}
 fi
 
+
 # Build and (optionally) install the cuml Python package
 if (( ${NUMARGS} == 0 )) || hasArg cuml; then
 
     cd ${REPODIR}/python
     if [[ ${INSTALL_TARGET} != "" ]]; then
-  python setup.py build_ext --inplace ${MULTIGPU}
-  python setup.py install --single-version-externally-managed --record=record.txt ${MULTIGPU}
+  python setup.py build_ext --inplace ${SINGLEGPU}
+  python setup.py install --single-version-externally-managed --record=record.txt ${SINGLEGPU}
     else
-  python setup.py build_ext --inplace --library-dir=${LIBCUML_BUILD_DIR} ${MULTIGPU}
+  python setup.py build_ext --inplace --library-dir=${LIBCUML_BUILD_DIR} ${SINGLEGPU}
     fi
 fi
diff --git a/ci/checks/style.sh b/ci/checks/style.sh
@@ -40,4 +40,28 @@ else
   echo -e "\n\n>>>> PASSED: copyright check\n\n"
 fi
 
+# Check for a consistent #include syntax
+# TODO: keep adding more dirs as and when we update the syntax
+HASH_INCLUDE=`python cpp/scripts/include_checker.py \
+                     cpp/comms/mpi/include \
+                     cpp/comms/mpi/src \
+                     cpp/comms/std/include \
+                     cpp/comms/std/src \
+                     cpp/include \
+                     cpp/examples \
+                     2>&1`
+HASH_RETVAL=$?
+if [ "$RETVAL" = "0" ]; then
+  RETVAL=$HASH_RETVAL
+fi
+
+# Output results if failure otherwise show pass
+if [ "$HASH_RETVAL" != "0" ]; then
+  echo -e "\n\n>>>> FAILED: #include check; begin output\n\n"
+  echo -e "$HASH_INCLUDE"
+  echo -e "\n\n>>>> FAILED: #include check; end output\n\n"
+else
+  echo -e "\n\n>>>> PASSED: #include check\n\n"
+fi
+
 exit $RETVAL
diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
@@ -82,7 +82,7 @@ export LD_LIBRARY_PATH_CACHED=$LD_LIBRARY_PATH
 export LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH
 
 logger "Build libcuml..."
-$WORKSPACE/build.sh clean libcuml cuml prims bench --multigpu -v
+$WORKSPACE/build.sh clean libcuml cuml prims bench -v
 
 logger "Resetting LD_LIBRARY_PATH..."
 
@@ -107,7 +107,7 @@ GTEST_OUTPUT="xml:${WORKSPACE}/test-results/libcuml_cpp/" ./test/ml
 
 logger "Python pytest for cuml..."
 cd $WORKSPACE/python
-pytest --cache-clear --junitxml=${WORKSPACE}/junit-cuml.xml -v --ignore=cuml/test/test_trustworthiness.py
+pytest --cache-clear --junitxml=${WORKSPACE}/junit-cuml.xml -v -s
 
 ################################################################################
 # TEST - Run GoogleTest for ml-prims

diff --git a/conda/environments/cuml_dev_cuda10.0.yml b/conda/environments/cuml_dev_cuda10.0.yml
@@ -10,8 +10,8 @@ dependencies:
 - cmake=3.14.5
 - numba=0.45*
 - rapidsai/label/cuda10.0::cupy>=6*
-- cudf=0.10*
-- rmm=0.10*
+- cudf=0.11*
+- rmm=0.11*
 - cython>=0.29,<0.30
 - pytest>=4.6
 - scikit-learn>=0.21
@@ -20,8 +20,8 @@ dependencies:
 - dask=2.5.0
 - distributed=2.5.1
 - dask-ml
-- dask-cuda=0.9*
-- dask-cudf=0.10*
+- dask-cuda=0.11*
+- dask-cudf=0.11*
 - nccl>=2.4
-- libcumlprims=0.10*
+- libcumlprims=0.11*
 - statsmodels
diff --git a/conda/environments/cuml_dev_cuda10.1.yml b/conda/environments/cuml_dev_cuda10.1.yml
@@ -10,8 +10,8 @@ dependencies:
 - cmake=3.14.5
 - numba=0.45*
 - rapidsai/label/cuda10.1::cupy>=6*
-- cudf=0.10*
-- rmm=0.10*
+- cudf=0.11*
+- rmm=0.11*
 - cython>=0.29,<0.30
 - pytest>=4.6
 - scikit-learn>=0.21
@@ -20,8 +20,8 @@ dependencies:
 - dask=2.5.0
 - distributed=2.5.1
 - dask-ml
-- dask-cuda=0.9*
-- dask-cudf=0.10*
+- dask-cuda=0.11*
+- dask-cudf=0.11*
 - nccl>=2.4
-- libcumlprims=0.10*
+- libcumlprims=0.11*
 - statsmodels
diff --git a/conda/environments/cuml_dev_cuda9.2.yml b/conda/environments/cuml_dev_cuda9.2.yml
@@ -10,8 +10,8 @@ dependencies:
 - cmake=3.14.5
 - numba=0.45*
 - rapidsai/label/cuda9.2::cupy>=6*
-- cudf=0.10*
-- rmm=0.10*
+- cudf=0.11*
+- rmm=0.11*
 - cython>=0.29,<0.30
 - pytest>=4.6
 - scikit-learn>=0.21
@@ -20,8 +20,8 @@ dependencies:
 - dask=2.5.0
 - distributed=2.5.1
 - dask-ml
-- dask-cuda=0.9*
-- dask-cudf=0.10*
+- dask-cuda=0.11*
+- dask-cudf=0.11*
 - nccl>=2.4
-- libcumlprims=0.10*
+- libcumlprims=0.11*
 - statsmodels
diff --git a/conda/recipes/cuml/build.sh b/conda/recipes/cuml/build.sh
@@ -1,4 +1,4 @@
 #!/usr/bin/env bash
 
 # This assumes the script is executed from the root of the repo directory
-./build.sh cuml --multigpu
+./build.sh cuml
diff --git a/conda/recipes/cuml/meta.yaml b/conda/recipes/cuml/meta.yaml
@@ -32,13 +32,13 @@ requirements:
     - cmake>=3.14
     - cudf {{ minor_version }}
     - libcuml={{ version }}
-    - libcumlprims={{ minor_version }}
+    - libcumlprims {{ minor_version }}
     - cudatoolkit {{ cuda_version }}.*
   run:
     - python x.x
     - cudf {{ minor_version }}
     - libcuml={{ version }}
-    - libcumlprims={{ minor_version }}
+    - libcumlprims {{ minor_version }}
     - nccl 2.4.*
     - {{ pin_compatible('cudatoolkit', max_pin='x.x') }}
 

diff --git a/conda/recipes/libcuml/meta.yaml b/conda/recipes/libcuml/meta.yaml
@@ -34,8 +34,10 @@ requirements:
     - nccl 2.4.*
     - cudf {{ minor_version }}
     - cudatoolkit {{ cuda_version }}.*
+    - libcumlprims {{ minor_version }}
     - lapack
   run:
+    - libcumlprims {{ minor_version }}
     - cudf {{ minor_version }}
     - nccl 2.4.*
     - {{ pin_compatible('cudatoolkit', max_pin='x.x') }}