Skip to content

Commit

Permalink
adding one pass kernels but best metric issue does not allow the use
Browse files Browse the repository at this point in the history
  • Loading branch information
vishalmehta1991 committed Dec 9, 2019
1 parent 12e9ac0 commit ba5a222
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 33 deletions.
37 changes: 26 additions & 11 deletions cpp/src/decisiontree/levelalgo/levelfunc_regressor.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ void grow_deep_tree_regression(
MLCommon::updateDevice(d_colids, h_colids, Ncols, tempmem->stream);
}
std::vector<unsigned int> feature_selector(h_colids, h_colids + Ncols);
float* infogain = tempmem->h_outgain->data();

for (int depth = 0; (depth < maxdepth) && (n_nodes_nextitr != 0); depth++) {
depth_cnt = depth + 1;
Expand All @@ -128,26 +129,40 @@ void grow_deep_tree_regression(
sparsesize, depth, tempmem);

if (split_cr == ML::CRITERION::MSE) {
get_mse_regression_fused<T>(
data, labels, flagsptr, sample_cnt, nrows, Ncols, ncols_sampled, nbins,
n_nodes, split_algo, tempmem, d_mseout, d_predout, d_count);
//get_mse_regression_fused<T>(
// data, labels, flagsptr, sample_cnt, nrows, Ncols, ncols_sampled, nbins,
// n_nodes, split_algo, tempmem, d_mseout, d_predout, d_count);
//get_best_split_regression<T, MSEImpurity<T>>(
// h_mseout, d_mseout, h_predout, d_predout, h_count, d_count, h_colids,
// d_colids, h_colstart, d_colstart, Ncols, ncols_sampled, nbins, n_nodes,
// depth, min_rows_per_node, split_algo, sparsesize, infogain,
// sparse_meanstate, sparse_countstate, sparsetree, sparse_nodelist,
// h_split_colidx, h_split_binidx, d_split_colidx, d_split_binidx,
// tempmem);
get_mse_regression<T, SquareFunctor>(
data, labels, flagsptr, sample_cnt, nrows, Ncols, ncols_sampled, nbins,
n_nodes, split_algo, tempmem, d_mseout, d_predout, d_count);
get_best_split_regression<T, MAEImpurity<T>>(
h_mseout, d_mseout, h_predout, d_predout, h_count, d_count, h_colids,
d_colids, h_colstart, d_colstart, Ncols, ncols_sampled, nbins, n_nodes,
depth, min_rows_per_node, split_algo, sparsesize, infogain,
sparse_meanstate, sparse_countstate, sparsetree, sparse_nodelist,
h_split_colidx, h_split_binidx, d_split_colidx, d_split_binidx,
tempmem);

} else {
get_mse_regression<T, AbsFunctor>(
data, labels, flagsptr, sample_cnt, nrows, Ncols, ncols_sampled, nbins,
n_nodes, split_algo, tempmem, d_mseout, d_predout, d_count);
get_best_split_regression<T, MAEImpurity<T>>(
h_mseout, d_mseout, h_predout, d_predout, h_count, d_count, h_colids,
d_colids, h_colstart, d_colstart, Ncols, ncols_sampled, nbins, n_nodes,
depth, min_rows_per_node, split_algo, sparsesize, infogain,
sparse_meanstate, sparse_countstate, sparsetree, sparse_nodelist,
h_split_colidx, h_split_binidx, d_split_colidx, d_split_binidx,
tempmem);
}

float* infogain = tempmem->h_outgain->data();
get_best_split_regression(
h_mseout, d_mseout, h_predout, d_predout, h_count, d_count, h_colids,
d_colids, h_colstart, d_colstart, Ncols, ncols_sampled, nbins, n_nodes,
depth, min_rows_per_node, split_algo, sparsesize, infogain,
sparse_meanstate, sparse_countstate, sparsetree, sparse_nodelist,
h_split_colidx, h_split_binidx, d_split_colidx, d_split_binidx, tempmem);

CUDA_CHECK(cudaStreamSynchronize(tempmem->stream));
leaf_eval_regression(infogain, depth, min_impurity_decrease, maxdepth,
maxleaves, h_new_node_flags, sparsetree, sparsesize,
Expand Down
21 changes: 10 additions & 11 deletions cpp/src/decisiontree/levelalgo/levelhelper_regressor.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ void get_mse_regression(const T *data, const T *labels, unsigned int *flags,
CUDA_CHECK(cudaGetLastError());
}
}
template <typename T>
template <typename T, typename Impurity>
void get_best_split_regression(
T *mseout, T *d_mseout, T *predout, T *d_predout, unsigned int *count,
unsigned int *d_count, unsigned int *h_colids, unsigned int *d_colids,
Expand Down Expand Up @@ -263,10 +263,12 @@ void get_best_split_regression(
CUDA_CHECK(cudaMemsetAsync(d_split_colidx, 0, n_nodes * sizeof(int),
tempmem->stream));

get_best_split_regression_kernel<<<n_nodes, threads, 0, tempmem->stream>>>(
d_mseout, d_predout, d_count, d_parentmean, d_parentcount, d_parentmetric,
nbins, ncols_sampled, n_nodes, min_rpn, d_outgain, d_split_colidx,
d_split_binidx, d_childmean, d_childcount, d_childmetric);
get_best_split_regression_kernel<T, Impurity>
<<<n_nodes, threads, 0, tempmem->stream>>>(
d_mseout, d_predout, d_count, d_parentmean, d_parentcount,
d_parentmetric, nbins, ncols_sampled, n_nodes, min_rpn, d_outgain,
d_split_colidx, d_split_binidx, d_childmean, d_childcount,
d_childmetric);
CUDA_CHECK(cudaGetLastError());

MLCommon::updateHost(h_childmetric, d_childmetric, 2 * n_nodes,
Expand Down Expand Up @@ -347,16 +349,13 @@ void get_best_split_regression(
T tmp_mse_left = mseout[coloff_mse + binoff_mse + nodeoff_mse];
T tmp_mse_right = mseout[coloff_mse + binoff_mse + nodeoff_mse + 1];

T impurity2 = MAEImpurity<T>::exec(
parent_count, tmp_lnrows, tmp_rnrows, parent_mean, tmp_meanleft,
tmp_mse_left, tmp_mse_right);

T impurity =
Impurity::exec(parent_count, tmp_lnrows, tmp_rnrows, parent_mean,
tmp_meanleft, tmp_mse_left, tmp_mse_right);
tmp_meanleft /= tmp_lnrows;
tmp_meanright /= tmp_rnrows;
tmp_mse_left /= tmp_lnrows;
tmp_mse_right /= tmp_rnrows;
T impurity = (tmp_lnrows * 1.0 / totalrows) * tmp_mse_left +
(tmp_rnrows * 1.0 / totalrows) * tmp_mse_right;
float info_gain =
(float)(sparsetree[parentid].best_metric_val - impurity);

Expand Down
25 changes: 14 additions & 11 deletions cpp/src/decisiontree/levelalgo/levelkernel_regressor.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,11 @@ struct MSEImpurity {
static HDI T exec(const unsigned int total, const unsigned int left,
const unsigned int right, const T parent_mean,
const T sumleft, const T sumsq_left, const T sumsq_right) {
T temp = sumleft / total;
T sumright = (parent_mean * total) - sumleft;
T left_impurity = (sumsq_left / total) -
(total / left) * (sumleft / total) * (sumleft / total);
T right_impurity = (sumsq_right / total) - (total / right) *
(sumright / total) *
(sumright / total);
T left_impurity = (sumsq_left / total) - (total / left) * temp * temp;
temp = sumright / total;
T right_impurity = (sumsq_right / total) - (total / right) * temp * temp;
return (left_impurity + right_impurity);
}
};
Expand Down Expand Up @@ -499,7 +498,7 @@ __global__ void get_mse_pred_kernel_global(
}

//This is device version of best split in case, used when more than 512 nodes.
template <typename T>
template <typename T, typename Impurity>
__global__ void get_best_split_regression_kernel(
const T *__restrict__ mseout, const T *__restrict__ predout,
const unsigned int *__restrict__ count, const T *__restrict__ parentmean,
Expand Down Expand Up @@ -530,13 +529,17 @@ __global__ void get_best_split_regression_kernel(
if (tmp_lnrows == 0 || tmp_rnrows == 0 || totalrows < min_rpn) continue;
T tmp_meanleft = predout[threadoffset];
T tmp_meanright = parent_mean * parent_count - tmp_meanleft;
T tmp_mse_left = mseout[2 * threadoffset];
T tmp_mse_right = mseout[2 * threadoffset + 1];

T impurity =
Impurity::exec(parent_count, tmp_lnrows, tmp_rnrows, parent_mean,
tmp_meanleft, tmp_mse_left, tmp_mse_right);

tmp_meanleft /= tmp_lnrows;
tmp_meanright /= tmp_rnrows;
T tmp_mse_left = mseout[2 * threadoffset] / tmp_lnrows;
T tmp_mse_right = mseout[2 * threadoffset + 1] / tmp_rnrows;

T impurity = (tmp_lnrows * 1.0 / totalrows) * tmp_mse_left +
(tmp_rnrows * 1.0 / totalrows) * tmp_mse_right;
tmp_mse_left /= tmp_lnrows;
tmp_mse_right /= tmp_rnrows;
float info_gain = (float)(parent_metric - impurity);

if (info_gain > tid_pair.gain) {
Expand Down

0 comments on commit ba5a222

Please sign in to comment.