Skip to content

Commit

Permalink
Merge pull request rapidsai#2 from rapidsai/branch-0.10
Browse files Browse the repository at this point in the history
merge
  • Loading branch information
vishalmehta1991 authored Sep 28, 2019
2 parents ddce47d + 91c53fb commit be9e8c0
Show file tree
Hide file tree
Showing 63 changed files with 5,583 additions and 141 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
- PR #1113: prims: new batched make-symmetric-matrix primitive
- PR #1112: prims: new batched-gemv primitive
- PR #855: Added benchmark tools
- PR #1149 Add YYMMDD to version tag for nightly conda packages
- PR #892: General Gram matrices prim
- PR #912: Support Vector Machine

## Improvements
- PR #961: High Peformance RF; HIST algo
Expand All @@ -21,6 +24,7 @@
- PR #1114: K-means: Exposing useful params, removing unused params, proxying params in Dask
- PR #1115: Moving dask_make_blobs to cuml.dask.datasets. Adding conversion to dask.DataFrame
- PR #1136: CUDA 10.1 CI updates
- PR #1165: Adding except + in all remaining cython

## Bug Fixes

Expand All @@ -39,6 +43,7 @@
- PR #1106: Pinning Distributed version to match Dask for consistent CI results
- PR #1116: TSNE CUDA 10.1 Bug Fixes
- PR #1132: DBSCAN Batching Bug Fix
- PR #1164: Fix check_dtype arg handling for input_to_dev_array

# cuML 0.9.0 (21 Aug 2019)

Expand Down
5 changes: 5 additions & 0 deletions ci/cpu/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@ cd $WORKSPACE
export GIT_DESCRIBE_TAG=`git describe --abbrev=0 --tags`
export GIT_DESCRIBE_NUMBER=`git rev-list ${GIT_DESCRIBE_TAG}..HEAD --count`

# If nightly build, append current YYMMDD to version
if [[ "$BUILD_MODE" = "branch" && "$SOURCE_BRANCH" = branch-* ]] ; then
export VERSION_SUFFIX=`date +%y%m%d`
fi

################################################################################
# SETUP - Check environment
################################################################################
Expand Down
3 changes: 2 additions & 1 deletion conda/recipes/cuml/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

# Usage:
# conda build . -c defaults -c conda-forge -c numba -c rapidsai -c pytorch
{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') %}
{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
{% set git_revision_count=environ.get('GIT_DESCRIBE_NUMBER', 0) %}
{% set cuda_version='.'.join(environ.get('CUDA', 'unknown').split('.')[:2]) %}
Expand All @@ -22,6 +22,7 @@ build:
- CC
- CXX
- BUILD_ABI
- VERSION_SUFFIX

requirements:
build:
Expand Down
3 changes: 2 additions & 1 deletion conda/recipes/libcuml/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

# Usage:
# conda build . -c defaults -c conda-forge -c nvidia -c rapidsai -c pytorch
{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') %}
{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
{% set git_revision_count=environ.get('GIT_DESCRIBE_NUMBER', 0) %}
{% set cuda_version='.'.join(environ.get('CUDA', '9.2').split('.')[:2]) %}
Expand All @@ -24,6 +24,7 @@ build:
- CUDAHOSTCXX
- BUILD_ABI
- PARALLEL_LEVEL
- VERSION_SUFFIX

requirements:
build:
Expand Down
5 changes: 4 additions & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,8 @@ if(BUILD_CUML_CPP_LIBRARY)
src/random_projection/rproj.cu
src/solver/solver.cu
src/spectral/spectral.cu
src/svm/svc.cu
src/svm/ws_util.cu
src/tsne/tsne.cu
src/tsvd/tsvd.cu
src/umap/umap.cu
Expand Down Expand Up @@ -376,7 +378,8 @@ if(BUILD_CUML_C_LIBRARY)
src/common/cuml_api.cpp
src/dbscan/dbscan_api.cpp
src/glm/glm_api.cpp
src/holtwinters/holtwinters_api.cpp)
src/holtwinters/holtwinters_api.cpp
src/svm/svm_api.cpp)
target_link_libraries(${CUML_C_TARGET} ${CUML_CPP_TARGET})
endif(BUILD_CUML_C_LIBRARY)

Expand Down
152 changes: 152 additions & 0 deletions cpp/src/svm/kernelcache.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <cuda_utils.h>
#include <linalg/gemm.h>
#include "cache/cache.h"
#include "common/cumlHandle.hpp"
#include "common/host_buffer.hpp"
#include "matrix/grammatrix.h"
#include "matrix/matrix.h"
#include "ml_utils.h"

namespace ML {
namespace SVM {

/**
* @brief Buffer to store a kernel tile
*
* We calculate the kernel matrix for the vectors in the working set.
* For every vector x_i in the working set, we always calculate a full row of the
* kernel matrix K(x_j, x_i), j=1..n_rows.
*
* A kernel tile stores all the kernel rows for the working set, i.e. K(x_j, x_i)
* for all i in the working set, and j in 1..n_rows.
*
* The kernel values can be cached to avoid repeated calculation of the kernel
* function.
*/
template <typename math_t>
class KernelCache {
private:
const math_t *x; //!< pointer to the training vectors

MLCommon::device_buffer<math_t>
x_ws; //!< feature vectors in the current working set
MLCommon::device_buffer<int>
ws_cache_idx; //!< cache position of a workspace vectors
MLCommon::device_buffer<math_t> tile; //!< Kernel matrix tile

int n_rows; //!< number of rows in x
int n_cols; //!< number of columns in x
int n_ws; //!< number of elements in the working set

cublasHandle_t cublas_handle;

MLCommon::Matrix::GramMatrixBase<math_t> *kernel;

const cumlHandle_impl handle;

const int TPB = 256; //!< threads per block for kernels launched

MLCommon::Cache::Cache<math_t> cache;

cudaStream_t stream;

public:
/**
* Construct an object to manage kernel cache
*
* @param handle reference to cumlHandle implementation
* @param x device array of training vectors in column major format,
* size [n_rows x n_cols]
* @param n_rows number of training vectors
* @param n_cols number of features
* @param n_ws size of working set
* @param kernel pointer to kernel (default linear)
* @param cache_size (default 200 MiB)
*/
KernelCache(const cumlHandle_impl &handle, const math_t *x, int n_rows,
int n_cols, int n_ws,
MLCommon::Matrix::GramMatrixBase<math_t> *kernel,
float cache_size = 200)
: cache(handle.getDeviceAllocator(), handle.getStream(), n_rows,
cache_size),
kernel(kernel),
x(x),
n_rows(n_rows),
n_cols(n_cols),
n_ws(n_ws),
cublas_handle(handle.getCublasHandle()),
x_ws(handle.getDeviceAllocator(), handle.getStream(), n_ws * n_cols),
tile(handle.getDeviceAllocator(), handle.getStream(), n_ws * n_rows),
ws_cache_idx(handle.getDeviceAllocator(), handle.getStream(), n_ws) {
ASSERT(kernel != nullptr, "Kernel pointer required for KernelCache!");

stream = handle.getStream();
}

~KernelCache(){};

/**
* @brief Get all the kernel matrix rows for the working set.
* @param ws_idx indices of the working set
* @return pointer to the kernel tile [ n_rows x n_ws] K_j,i = K(x_j, x_q)
* where j=1..n_rows and q = ws_idx[i], j is the contiguous dimension
*/
math_t *GetTile(int *ws_idx) {
if (cache.GetSize() > 0) {
int n_cached;
cache.GetCacheIdxPartitioned(ws_idx, n_ws, ws_cache_idx.data(), &n_cached,
stream);
// collect allready cached values
cache.GetVecs(ws_cache_idx.data(), n_cached, tile.data(), stream);

int non_cached = n_ws - n_cached;
if (non_cached > 0) {
int *ws_idx_new = ws_idx + n_cached;
// AssignCacheIdx can permute ws_idx_new, therefore it has to come
// before calcKernel. Could come on separate stream to do collectrows
// while AssignCacheIdx runs
cache.AssignCacheIdx(ws_idx_new, non_cached,
ws_cache_idx.data() + n_cached,
stream); // cache stream

// collect training vectors for kernel elements that needs to be calculated
MLCommon::Matrix::copyRows(x, n_rows, n_cols, x_ws.data(), ws_idx_new,
non_cached, stream, false);
math_t *tile_new = tile.data() + n_cached * n_rows;
(*kernel)(x, n_rows, n_cols, x_ws.data(), non_cached, tile_new, stream);
// We need AssignCacheIdx to be finished before calling StoreCols
cache.StoreVecs(tile_new, n_rows, non_cached,
ws_cache_idx.data() + n_cached, stream);
}
} else {
if (n_ws > 0) {
// collect all the feature vectors in the working set
MLCommon::Matrix::copyRows(x, n_rows, n_cols, x_ws.data(), ws_idx, n_ws,
stream, false);
(*kernel)(x, n_rows, n_cols, x_ws.data(), n_ws, tile.data(), stream);
}
}
return tile.data();
}
};

}; // end namespace SVM
}; // end namespace ML
Loading

0 comments on commit be9e8c0

Please sign in to comment.