Skip to content

Commit

Permalink
Merge pull request #4 from rapidsai/branch-0.10
Browse files Browse the repository at this point in the history
Merge 0.10
  • Loading branch information
vishalmehta1991 authored Oct 7, 2019
2 parents 38aa355 + 39aed94 commit 9a5f47c
Show file tree
Hide file tree
Showing 27 changed files with 699 additions and 383 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,13 @@
- PR #1136: CUDA 10.1 CI updates
- PR #1163: Some more correctness improvements. Better verbose printing
- PR #1165: Adding except + in all remaining cython
- PR #1186: Using LocalCUDACluster Pytest fixture
- PR #1173: Docs: Barnes Hut TSNE documentation
- PR #1176: Use new RMM API based on Cython

## Bug Fixes

- PR #1208: compile dbscan bug
- PR #1016: Use correct libcumlprims version in GPU CI
- PR #1040: Update version of numba in development conda yaml files
- PR #1043: Updates to accomodate cuDF python code reorganization
Expand All @@ -49,7 +51,12 @@
- PR #1132: DBSCAN Batching Bug Fix
- PR #1162: DASK RF random seed bug fix
- PR #1164: Fix check_dtype arg handling for input_to_dev_array
- PR #1171: SVM prediction bug fix
- PR #1177: Update dask and distributed to 2.5
- PR #1204: Fix SVM crash on Turing
- PR #1199: Replaced sprintf() with snprintf() in THROW()
- PR #1205: Update dask-cuda in yml envs
- PR #1211: Fixing Dask k-means transform bug and adding test

# cuML 0.9.0 (21 Aug 2019)

Expand Down
2 changes: 1 addition & 1 deletion conda/environments/cuml_dev_cuda10.0.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ dependencies:
- dask=2.5.0
- distributed=2.5.1
- dask-ml
- dask-cuda=0.9*
- dask-cuda=0.10*
- dask-cudf=0.10*
- nccl>=2.4
- libcumlprims=0.10*
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/cuml_dev_cuda10.1.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ dependencies:
- dask=2.5.0
- distributed=2.5.1
- dask-ml
- dask-cuda=0.9*
- dask-cuda=0.10*
- dask-cudf=0.10*
- nccl>=2.4
- libcumlprims=0.10*
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/dbscan/vertexdeg/algo.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ void launcher(const ML::cumlHandle_impl &handle, Pack<value_t, index_t> data,

auto fused_op = [vd, n] __device__(index_t global_c_idx, bool in_neigh) {
// fused construction of vertex degree
index_t batch_vertex = fmod(global_c_idx, n);
index_t batch_vertex = global_c_idx % n;

if (sizeof(index_t) == 4) {
atomicAdd((unsigned int *)(vd + batch_vertex), in_neigh);
Expand Down
15 changes: 11 additions & 4 deletions cpp/src/svm/smosolver.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,20 +71,21 @@ class SmoSolver {
bool verbose = false;
SmoSolver(const cumlHandle_impl &handle, math_t C, math_t tol,
MLCommon::Matrix::GramMatrixBase<math_t> *kernel,
float cache_size = 200)
float cache_size = 200, int nochange_steps = 1000)
: handle(handle),
n_rows(n_rows),
C(C),
tol(tol),
kernel(kernel),
cache_size(cache_size),
nochange_steps(nochange_steps),
stream(handle.getStream()),
return_buff(handle.getDeviceAllocator(), stream, 2),
alpha(handle.getDeviceAllocator(), stream),
delta_alpha(handle.getDeviceAllocator(), stream),
f(handle.getDeviceAllocator(), stream) {}

#define SMO_WS_SIZE 1024
#define SMO_WS_SIZE 512
/**
* Solve the quadratic optimization problem.
*
Expand Down Expand Up @@ -229,19 +230,25 @@ class SmoSolver {
// Variables to track convergence of training
math_t diff_prev;
int n_small_diff;
int nochange_steps;

bool CheckStoppingCondition(math_t diff) {
// TODO improve stopping condition to detect oscillationsq, see Issue #947
// TODO improve stopping condition to detect oscillations, see Issue #947
bool keep_going = true;
if (abs(diff - diff_prev) < 0.001 * tol) {
n_small_diff++;
} else {
diff_prev = diff;
n_small_diff = 0;
}
if (diff < tol || n_small_diff > 10) {
if (n_small_diff > nochange_steps) {
if (verbose) {
std::cout << "SMO error: Stopping due to unchanged diff over "
<< nochange_steps << " consecutive steps\n";
}
keep_going = false;
}
if (diff < tol) keep_going = false;
// ASSERT(!isnan(diff), "SMO: NaN found during fitting")
if (isnan(diff)) {
std::cout << "SMO error: NaN found during fitting\n";
Expand Down
14 changes: 9 additions & 5 deletions cpp/src/svm/svc.cu
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,14 @@ template void svcFit<double>(const cumlHandle &handle, double *input,
template void svcPredict<float>(const cumlHandle &handle, float *input,
int n_rows, int n_cols,
MLCommon::Matrix::KernelParams &kernel_params,
const svmModel<float> &model, float *preds);
const svmModel<float> &model, float *preds,
float buffer_size);

template void svcPredict<double>(const cumlHandle &handle, double *input,
int n_rows, int n_cols,
MLCommon::Matrix::KernelParams &kernel_params,
const svmModel<double> &model, double *preds);
const svmModel<double> &model, double *preds,
double buffer_size);

template void svmFreeBuffers(const cumlHandle &handle, svmModel<float> &m);

Expand All @@ -61,9 +63,9 @@ template void svmFreeBuffers(const cumlHandle &handle, svmModel<double> &m);
template <typename math_t>
SVC<math_t>::SVC(cumlHandle &handle, math_t C, math_t tol,
Matrix::KernelParams kernel_params, math_t cache_size,
int max_iter, bool verbose)
int max_iter, int nochange_steps, bool verbose)
: handle(handle),
param(svmParameter{C, cache_size, max_iter, tol, verbose}),
param(svmParameter{C, cache_size, max_iter, nochange_steps, tol, verbose}),
kernel_params(kernel_params) {
model.n_support = 0;
model.dual_coefs = nullptr;
Expand All @@ -87,7 +89,9 @@ void SVC<math_t>::fit(math_t *input, int n_rows, int n_cols, math_t *labels) {
template <typename math_t>
void SVC<math_t>::predict(math_t *input, int n_rows, int n_cols,
math_t *preds) {
svcPredict(handle, input, n_rows, n_cols, kernel_params, model, preds);
math_t buffer_size = param.cache_size;
svcPredict(handle, input, n_rows, n_cols, kernel_params, model, preds,
buffer_size);
}

// Instantiate templates for the shared library
Expand Down
7 changes: 5 additions & 2 deletions cpp/src/svm/svc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,13 @@ void svcFit(const cumlHandle &handle, math_t *input, int n_rows, int n_cols,
* @param [in] model SVM model parameters
* @param [out] preds device pointer to store the predicted class labels.
* Size [n_rows]. Should be allocated on entry.
* @param [in] buffer_size size of temporary buffer in MiB
*/
template <typename math_t>
void svcPredict(const cumlHandle &handle, math_t *input, int n_rows, int n_cols,
MLCommon::Matrix::KernelParams &kernel_params,
const svmModel<math_t> &model, math_t *preds);
const svmModel<math_t> &model, math_t *preds,
math_t buffer_size);

/**
* Deallocate device buffers in the svmModel struct.
Expand Down Expand Up @@ -126,7 +128,8 @@ class SVC {
SVC(cumlHandle &handle, math_t C = 1, math_t tol = 1.0e-3,
MLCommon::Matrix::KernelParams kernel_params =
MLCommon::Matrix::KernelParams{MLCommon::Matrix::LINEAR, 3, 1, 0},
math_t cache_size = 200, int max_iter = -1, bool verbose = false);
math_t cache_size = 200, int max_iter = -1, int nochange_steps = 1000,
bool verbose = false);

~SVC();

Expand Down
58 changes: 51 additions & 7 deletions cpp/src/svm/svc_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,17 @@
#include <iostream>

#include <cublas_v2.h>
#include <thrust/copy.h>
#include <thrust/device_ptr.h>
#include <thrust/iterator/counting_iterator.h>
#include "common/cumlHandle.hpp"
#include "common/device_buffer.hpp"
#include "kernelcache.h"
#include "label/classlabels.h"
#include "linalg/cublas_wrappers.h"
#include "linalg/unary_op.h"
#include "matrix/kernelfactory.h"
#include "matrix/matrix.h"
#include "smosolver.h"
#include "svm_model.h"
#include "svm_parameter.h"
Expand Down Expand Up @@ -90,7 +94,7 @@ void svcFit(const cumlHandle &handle, math_t *input, int n_rows, int n_cols,
MLCommon::Matrix::KernelFactory<math_t>::create(
kernel_params, handle_impl.getCublasHandle());
SmoSolver<math_t> smo(handle_impl, param.C, param.tol, kernel,
param.cache_size);
param.cache_size, param.nochange_steps);
smo.verbose = param.verbose;
smo.Solve(input, n_rows, n_cols, y.data(), &(model.dual_coefs),
&(model.n_support), &(model.x_support), &(model.support_idx),
Expand All @@ -109,6 +113,13 @@ void svcFit(const cumlHandle &handle, math_t *input, int n_rows, int n_cols,
* We evaluate f(x_i), and then instead of taking the sign to return +/-1 labels,
* we map it to the original labels, and return those.
*
* We process the input vectors batchwise, and evaluate the full rows of kernel
* matrix K(x_i, x_j) for a batch (size n_batch * n_support). The maximum size
* of this buffer (i.e. the maximum batch_size) is controlled by the
* buffer_size input parameter. For models where n_support is large, increasing
* buffer_size might improve prediction performance.
*
*
* @tparam math_t floating point type
* @param handle the cuML handle
* @param [in] input device pointer for the input data in column major format,
Expand All @@ -119,42 +130,75 @@ void svcFit(const cumlHandle &handle, math_t *input, int n_rows, int n_cols,
* @param [in] model SVM model parameters
* @param [out] preds device pointer to store the predicted class labels.
* Size [n_rows]. Should be allocated on entry.
* @param [in] buffer_size size of temporary buffer in MiB
*/
template <typename math_t>
void svcPredict(const cumlHandle &handle, math_t *input, int n_rows, int n_cols,
MLCommon::Matrix::KernelParams &kernel_params,
const svmModel<math_t> &model, math_t *preds) {
const svmModel<math_t> &model, math_t *preds,
math_t buffer_size) {
ASSERT(n_cols == model.n_cols,
"Parameter n_cols: shall be the same that was used for fitting");
// We might want to query the available memory before selecting the batch size.
// We will need n_batch * n_support floats for the kernel matrix K.
#define N_PRED_BATCH 4096
const int N_PRED_BATCH = 4096;
int n_batch = N_PRED_BATCH < n_rows ? N_PRED_BATCH : n_rows;

// Limit the memory size of the prediction buffer
buffer_size = buffer_size * 1024 * 1024;
if (n_batch * model.n_support * sizeof(math_t) > buffer_size) {
n_batch = buffer_size / (model.n_support * sizeof(math_t));
if (n_batch < 1) n_batch = 1;
}

const cumlHandle_impl &handle_impl = handle.getImpl();
cudaStream_t stream = handle_impl.getStream();

MLCommon::device_buffer<math_t> K(handle_impl.getDeviceAllocator(), stream,
n_batch * model.n_support);
MLCommon::device_buffer<math_t> y(handle_impl.getDeviceAllocator(), stream,
n_rows);
MLCommon::device_buffer<math_t> x_rbf(handle_impl.getDeviceAllocator(),
stream);
MLCommon::device_buffer<int> idx(handle_impl.getDeviceAllocator(), stream);

cublasHandle_t cublas_handle = handle_impl.getCublasHandle();

MLCommon::Matrix::GramMatrixBase<math_t> *kernel =
MLCommon::Matrix::KernelFactory<math_t>::create(kernel_params,
cublas_handle);

if (kernel_params.kernel == MLCommon::Matrix::RBF) {
// Temporary buffers for the RBF kernel, see below
x_rbf.resize(n_batch * n_cols, stream);
idx.resize(n_batch, stream);
}
// We process the input data batchwise:
// - calculate the kernel values K[x_batch, x_support]
// - calculate y(x_batch) = K[x_batch, x_support] * dual_coeffs
for (int i = 0; i < n_rows; i += n_batch) {
if (i + n_batch >= n_rows) {
n_batch = n_rows - i;
}
kernel->evaluate(input + i, n_batch, n_cols, model.x_support,
model.n_support, K.data(), stream, n_rows, model.n_support,
n_batch);
math_t *x_ptr = nullptr;
int ld1 = 0;
if (kernel_params.kernel == MLCommon::Matrix::RBF) {
// The RBF kernel does not support ld parameters (See issue #1172)
// To come around this limitation, we copy the batch into a temporary
// buffer.
thrust::counting_iterator<int> first(i);
thrust::counting_iterator<int> last = first + n_batch;
thrust::device_ptr<int> idx_ptr(idx.data());
thrust::copy(thrust::cuda::par.on(stream), first, last, idx_ptr);
MLCommon::Matrix::copyRows(input, n_rows, n_cols, x_rbf.data(),
idx.data(), n_batch, stream, false);
x_ptr = x_rbf.data();
ld1 = n_batch;
} else {
x_ptr = input + i;
ld1 = n_rows;
}
kernel->evaluate(x_ptr, n_batch, n_cols, model.x_support, model.n_support,
K.data(), stream, ld1, model.n_support, n_batch);
math_t one = 1;
math_t null = 0;
CUBLAS_CHECK(MLCommon::LinAlg::cublasgemv(
Expand Down
25 changes: 14 additions & 11 deletions cpp/src/svm/svm_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,17 @@

cumlError_t cumlSpSvcFit(cumlHandle_t handle, float *input, int n_rows,
int n_cols, float *labels, float C, float cache_size,
int max_iter, float tol, int verbose,
cumlSvmKernelType kernel, int degree, float gamma,
float coef0, int *n_support, float *b,
int max_iter, int nochange_steps, float tol,
int verbose, cumlSvmKernelType kernel, int degree,
float gamma, float coef0, int *n_support, float *b,
float **dual_coefs, float **x_support,
int **support_idx, int *n_classes,
float **unique_labels) {
ML::SVM::svmParameter param;
param.C = C;
param.cache_size = cache_size;
param.max_iter = max_iter;
param.nochange_steps = nochange_steps;
param.tol = tol;
param.verbose = verbose;

Expand Down Expand Up @@ -76,16 +77,17 @@ cumlError_t cumlSpSvcFit(cumlHandle_t handle, float *input, int n_rows,

cumlError_t cumlDpSvcFit(cumlHandle_t handle, double *input, int n_rows,
int n_cols, double *labels, double C,
double cache_size, int max_iter, double tol,
int verbose, cumlSvmKernelType kernel, int degree,
double gamma, double coef0, int *n_support, double *b,
double **dual_coefs, double **x_support,
double cache_size, int max_iter, int nochange_steps,
double tol, int verbose, cumlSvmKernelType kernel,
int degree, double gamma, double coef0, int *n_support,
double *b, double **dual_coefs, double **x_support,
int **support_idx, int *n_classes,
double **unique_labels) {
ML::SVM::svmParameter param;
param.C = C;
param.cache_size = cache_size;
param.max_iter = max_iter;
param.nochange_steps = nochange_steps;
param.tol = tol;
param.verbose = verbose;

Expand Down Expand Up @@ -129,7 +131,8 @@ cumlError_t cumlSpSvcPredict(cumlHandle_t handle, float *input, int n_rows,
int n_cols, cumlSvmKernelType kernel, int degree,
float gamma, float coef0, int n_support, float b,
float *dual_coefs, float *x_support, int n_classes,
float *unique_labels, float *preds) {
float *unique_labels, float *preds,
float buffer_size) {
MLCommon::Matrix::KernelParams kernel_param;
kernel_param.kernel = (MLCommon::Matrix::KernelType)kernel;
kernel_param.degree = degree;
Expand All @@ -151,7 +154,7 @@ cumlError_t cumlSpSvcPredict(cumlHandle_t handle, float *input, int n_rows,
if (status == CUML_SUCCESS) {
try {
ML::SVM::svcPredict(*handle_ptr, input, n_rows, n_cols, kernel_param,
model, preds);
model, preds, buffer_size);
}
//TODO: Implement this
//catch (const MLCommon::Exception& e)
Expand All @@ -171,7 +174,7 @@ cumlError_t cumlDpSvcPredict(cumlHandle_t handle, double *input, int n_rows,
double gamma, double coef0, int n_support,
double b, double *dual_coefs, double *x_support,
int n_classes, double *unique_labels,
double *preds) {
double *preds, double buffer_size) {
MLCommon::Matrix::KernelParams kernel_param;
kernel_param.kernel = (MLCommon::Matrix::KernelType)kernel;
kernel_param.degree = degree;
Expand All @@ -193,7 +196,7 @@ cumlError_t cumlDpSvcPredict(cumlHandle_t handle, double *input, int n_rows,
if (status == CUML_SUCCESS) {
try {
ML::SVM::svcPredict(*handle_ptr, input, n_rows, n_cols, kernel_param,
model, preds);
model, preds, buffer_size);
}
//TODO: Implement this
//catch (const MLCommon::Exception& e)
Expand Down
Loading

0 comments on commit 9a5f47c

Please sign in to comment.