From dabd24056a8a90665c768d722ba15c8c3972022b Mon Sep 17 00:00:00 2001 From: fis Date: Mon, 30 Mar 2020 19:06:49 +0800 Subject: [PATCH] Prevent copying SimpleDMatrix. * Set default dtor for SimpleDMatrix to initialize default copy ctor, which is deleted due to unique ptr. * Remove commented code. * Remove warning for calling host function (std::max). * Remove warning for initialization order. * Remove warning for unused variables. --- src/common/common.h | 5 +++ src/common/compressed_iterator.h | 6 ++-- src/common/hist_util.h | 4 +-- src/common/quantile.h | 2 +- src/data/data.cc | 19 ----------- src/data/simple_dmatrix.h | 1 + src/data/sparse_page_dmatrix.h | 1 - tests/cpp/common/test_hist_util.cc | 2 -- tests/cpp/data/test_simple_dmatrix.cc | 46 +++++++++++++-------------- 9 files changed, 36 insertions(+), 50 deletions(-) diff --git a/src/common/common.h b/src/common/common.h index ccff63eb75ec..4be2c48aa3b9 100644 --- a/src/common/common.h +++ b/src/common/common.h @@ -64,6 +64,11 @@ inline std::vector Split(const std::string& s, char delim) { return ret; } +template +XGBOOST_DEVICE T Max(T a, T b) { + return a < b ? b : a; +} + // simple routine to convert any data to string template inline std::string ToString(const T& data) { diff --git a/src/common/compressed_iterator.h b/src/common/compressed_iterator.h index 644c560a0c99..eea7eb35de4f 100644 --- a/src/common/compressed_iterator.h +++ b/src/common/compressed_iterator.h @@ -8,6 +8,8 @@ #include #include +#include "common.h" + #ifdef __CUDACC__ #include "device_helpers.cuh" #endif // __CUDACC__ @@ -29,12 +31,12 @@ inline void ClearBit(CompressedByteT *byte, int bit_idx) { *byte &= ~(1 << bit_idx); } static const int kPadding = 4; // Assign padding so we can read slightly off - // the beginning of the array + // the beginning of the array // The number of bits required to represent a given unsigned range inline XGBOOST_DEVICE size_t SymbolBits(size_t num_symbols) { auto bits = std::ceil(log2(static_cast(num_symbols))); - return std::max(static_cast(bits), size_t(1)); + return common::Max(static_cast(bits), size_t(1)); } } // namespace detail diff --git a/src/common/hist_util.h b/src/common/hist_util.h index 66a7a6ef2af9..827bde4317b5 100644 --- a/src/common/hist_util.h +++ b/src/common/hist_util.h @@ -297,9 +297,9 @@ struct Index { std::vector data_; std::vector offset_; // size of this field is equal to number of features void* data_ptr_; - uint32_t* offset_ptr_; - size_t p_; BinTypeSize binTypeSize_; + size_t p_; + uint32_t* offset_ptr_; Func func_; }; diff --git a/src/common/quantile.h b/src/common/quantile.h index 27e5560a9933..dd13e7138a3f 100644 --- a/src/common/quantile.h +++ b/src/common/quantile.h @@ -710,7 +710,7 @@ class QuantileSketchTemplate { // check invariant size_t n = (1ULL << nlevel); CHECK(n * limit_size >= maxn) << "invalid init parameter"; - CHECK(nlevel <= std::max(1, static_cast(limit_size * eps))) + CHECK(nlevel <= std::max(static_cast(1), static_cast(limit_size * eps))) << "invalid init parameter"; } diff --git a/src/data/data.cc b/src/data/data.cc index de7aa170e2ae..79c5fc5e3719 100644 --- a/src/data/data.cc +++ b/src/data/data.cc @@ -427,25 +427,6 @@ DMatrix* DMatrix::Load(const std::string& uri, return dmat; } - -/* -DMatrix* DMatrix::Create(std::unique_ptr>&& source, - const std::string& cache_prefix) { - if (cache_prefix.length() == 0) { - // Data split mode is fixed to be row right now. - rabit::Allreduce(&source->info.num_col_, 1); - return new data::SimpleDMatrix(std::move(source)); - } else { -#if DMLC_ENABLE_STD_THREAD - return new data::SparsePageDMatrix(std::move(source), cache_prefix); -#else - LOG(FATAL) << "External memory is not enabled in mingw"; - return nullptr; -#endif // DMLC_ENABLE_STD_THREAD - } -} -*/ - template DMatrix* DMatrix::Create(AdapterT* adapter, float missing, int nthread, const std::string& cache_prefix, size_t page_size ) { diff --git a/src/data/simple_dmatrix.h b/src/data/simple_dmatrix.h index f7768b01f56a..daf7aac13be7 100644 --- a/src/data/simple_dmatrix.h +++ b/src/data/simple_dmatrix.h @@ -23,6 +23,7 @@ class SimpleDMatrix : public DMatrix { explicit SimpleDMatrix(AdapterT* adapter, float missing, int nthread); explicit SimpleDMatrix(dmlc::Stream* in_stream); + ~SimpleDMatrix() override = default; void SaveToLocalFile(const std::string& fname); diff --git a/src/data/sparse_page_dmatrix.h b/src/data/sparse_page_dmatrix.h index 1116d1c8818c..05d318313cc8 100644 --- a/src/data/sparse_page_dmatrix.h +++ b/src/data/sparse_page_dmatrix.h @@ -30,7 +30,6 @@ class SparsePageDMatrix : public DMatrix { row_source_.reset(new data::SparsePageSource(adapter, missing, nthread, cache_prefix, page_size)); } - // Set number of threads but keep old value so we can reset it after ~SparsePageDMatrix() override = default; MetaInfo& Info() override; diff --git a/tests/cpp/common/test_hist_util.cc b/tests/cpp/common/test_hist_util.cc index 309445b7a24d..a6fe02db0810 100644 --- a/tests/cpp/common/test_hist_util.cc +++ b/tests/cpp/common/test_hist_util.cc @@ -403,7 +403,6 @@ TEST(hist_util, IndexBinData) { size_t constexpr kRows = 100; size_t constexpr kCols = 10; - size_t bin_id = 0; for (auto max_bin : kBinSizes) { auto p_fmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatix(); common::GHistIndexMatrix hmat; @@ -434,7 +433,6 @@ TEST(hist_util, SparseIndexBinData) { size_t constexpr kRows = 100; size_t constexpr kCols = 10; - size_t bin_id = 0; for (auto max_bin : bin_sizes) { auto p_fmat = RandomDataGenerator(kRows, kCols, 0.2).GenerateDMatix(); common::GHistIndexMatrix hmat; diff --git a/tests/cpp/data/test_simple_dmatrix.cc b/tests/cpp/data/test_simple_dmatrix.cc index 259a992aec81..8117168faf6f 100644 --- a/tests/cpp/data/test_simple_dmatrix.cc +++ b/tests/cpp/data/test_simple_dmatrix.cc @@ -72,32 +72,32 @@ TEST(SimpleDMatrix, Empty) { data::CSRAdapter csr_adapter(row_ptr.data(), feature_idx.data(), data.data(), 0, 0, 0); - data::SimpleDMatrix dmat(&csr_adapter, - std::numeric_limits::quiet_NaN(), 1); - CHECK_EQ(dmat.Info().num_nonzero_, 0); - CHECK_EQ(dmat.Info().num_row_, 0); - CHECK_EQ(dmat.Info().num_col_, 0); - for (auto &batch : dmat.GetBatches()) { + std::unique_ptr dmat(new data::SimpleDMatrix( + &csr_adapter, std::numeric_limits::quiet_NaN(), 1)); + CHECK_EQ(dmat->Info().num_nonzero_, 0); + CHECK_EQ(dmat->Info().num_row_, 0); + CHECK_EQ(dmat->Info().num_col_, 0); + for (auto &batch : dmat->GetBatches()) { CHECK_EQ(batch.Size(), 0); } data::DenseAdapter dense_adapter(nullptr, 0, 0); - dmat = data::SimpleDMatrix(&dense_adapter, - std::numeric_limits::quiet_NaN(), 1); - CHECK_EQ(dmat.Info().num_nonzero_, 0); - CHECK_EQ(dmat.Info().num_row_, 0); - CHECK_EQ(dmat.Info().num_col_, 0); - for (auto &batch : dmat.GetBatches()) { + dmat.reset( new data::SimpleDMatrix(&dense_adapter, + std::numeric_limits::quiet_NaN(), 1) ); + CHECK_EQ(dmat->Info().num_nonzero_, 0); + CHECK_EQ(dmat->Info().num_row_, 0); + CHECK_EQ(dmat->Info().num_col_, 0); + for (auto &batch : dmat->GetBatches()) { CHECK_EQ(batch.Size(), 0); } data::CSCAdapter csc_adapter(nullptr, nullptr, nullptr, 0, 0); - dmat = data::SimpleDMatrix(&csc_adapter, - std::numeric_limits::quiet_NaN(), 1); - CHECK_EQ(dmat.Info().num_nonzero_, 0); - CHECK_EQ(dmat.Info().num_row_, 0); - CHECK_EQ(dmat.Info().num_col_, 0); - for (auto &batch : dmat.GetBatches()) { + dmat.reset(new data::SimpleDMatrix( + &csc_adapter, std::numeric_limits::quiet_NaN(), 1)); + CHECK_EQ(dmat->Info().num_nonzero_, 0); + CHECK_EQ(dmat->Info().num_row_, 0); + CHECK_EQ(dmat->Info().num_col_, 0); + for (auto &batch : dmat->GetBatches()) { CHECK_EQ(batch.Size(), 0); } } @@ -109,11 +109,11 @@ TEST(SimpleDMatrix, MissingData) { data::CSRAdapter adapter(row_ptr.data(), feature_idx.data(), data.data(), 2, 3, 2); - data::SimpleDMatrix dmat(&adapter, std::numeric_limits::quiet_NaN(), - 1); - CHECK_EQ(dmat.Info().num_nonzero_, 2); - dmat = data::SimpleDMatrix(&adapter, 1.0, 1); - CHECK_EQ(dmat.Info().num_nonzero_, 1); + std::unique_ptr dmat{new data::SimpleDMatrix{ + &adapter, std::numeric_limits::quiet_NaN(), 1}}; + CHECK_EQ(dmat->Info().num_nonzero_, 2); + dmat.reset(new data::SimpleDMatrix(&adapter, 1.0, 1)); + CHECK_EQ(dmat->Info().num_nonzero_, 1); } TEST(SimpleDMatrix, EmptyRow) {