Skip to content

Commit

Permalink
add cudasynchronize to pin point error, ci trial #2
Browse files Browse the repository at this point in the history
  • Loading branch information
lijinf2 committed Feb 13, 2024
1 parent fa57a91 commit 0d25832
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 0 deletions.
1 change: 1 addition & 0 deletions ci/test_python_dask.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ pytest \
--cov=cuml_dask \
--cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cuml-dask-coverage.xml" \
--cov-report=term \
-s \
test_dask_logistic_regression.py::test_standardization_sparse
# .

Expand Down
17 changes: 17 additions & 0 deletions cpp/src/glm/qn/mg/standardization.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@
#include <raft/core/device_mdspan.hpp>
#include <raft/matrix/init.cuh>

#include <raft/core/device_mdarray.hpp>

#include <raft/util/cudart_utils.hpp>

namespace ML {
namespace GLM {
namespace opg {
Expand Down Expand Up @@ -78,6 +82,7 @@ void mean_stddev(const raft::handle_t& handle,
T* mean_vector,
T* stddev_vector)
{
cudaDeviceSynchronize();
int D = X.n;
int num_rows = X.m;
auto stream = handle.get_stream();
Expand All @@ -88,6 +93,7 @@ void mean_stddev(const raft::handle_t& handle,
rmm::device_uvector<T> ones(num_rows, stream);
auto ones_view = raft::make_device_vector_view(ones.data(), num_rows);
raft::matrix::fill(handle, ones_view, T(1.0));
cudaDeviceSynchronize();

SimpleDenseMat<T> ones_mat(ones.data(), 1, num_rows);
X.gemmb(handle, 1., ones_mat, false, false, 0., mean_mat, stream);
Expand All @@ -96,17 +102,28 @@ void mean_stddev(const raft::handle_t& handle,
raft::linalg::multiplyScalar(mean_vector, mean_vector, weight, D, stream);
comm.allreduce(mean_vector, mean_vector, D, raft::comms::op_t::SUM, stream);
comm.sync_stream(stream);
cudaDeviceSynchronize();

// calculate stdev.S
SimpleDenseMat<T> stddev_mat(stddev_vector, 1, D);

ML::Logger::get().setLevel(6);
rmm::device_uvector<T> values_copy(X.nnz, stream);
CUML_LOG_DEBUG("sparkdebug X.nnz: %d", X.nnz);

auto log_X_values = raft::arr2Str(X.values, X.nnz, "", stream);
CUML_LOG_DEBUG("sparkdebug log_X_values: %s", log_X_values.c_str());

raft::copy(values_copy.data(), X.values, X.nnz, stream);
CUML_LOG_DEBUG("sparkdebug finished copying X.values with X.nnz: %d", X.nnz);
cudaDeviceSynchronize();

auto square_op = [] __device__(const T a) { return a * a; };
raft::linalg::unaryOp(X.values, X.values, X.nnz, square_op, stream);
X.gemmb(handle, 1., ones_mat, false, false, 0., stddev_mat, stream);
raft::copy(X.values, values_copy.data(), X.nnz, stream);
CUML_LOG_DEBUG("sparkdebug finished copying back to X.values with X.nnz: %d", X.nnz);
cudaDeviceSynchronize();

weight = n_samples < 1 ? T(0) : T(1) / T(n_samples - 1);
raft::linalg::multiplyScalar(stddev_vector, stddev_vector, weight, D, stream);
Expand Down

0 comments on commit 0d25832

Please sign in to comment.