Skip to content

Commit

Permalink
Define bin type. (#7850)
Browse files Browse the repository at this point in the history
  • Loading branch information
trivialfis authored Apr 29, 2022
1 parent f7db16a commit 288c525
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 23 deletions.
2 changes: 2 additions & 0 deletions include/xgboost/base.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,8 @@ using bst_float = float; // NOLINT
using bst_cat_t = int32_t; // NOLINT
/*! \brief Type for data column (feature) index. */
using bst_feature_t = uint32_t; // NOLINT
/*! \brief Type for histogram bin index. */
using bst_bin_t = int32_t; // NOLINT
/*! \brief Type for data row index.
*
* Be careful `std::size_t' is implementation-defined. Meaning that the binary
Expand Down
10 changes: 5 additions & 5 deletions src/common/column_matrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ class Column {
public:
static constexpr int32_t kMissingId = -1;

Column(ColumnType type, common::Span<const BinIdxType> index, const uint32_t index_base)
: type_(type), index_(index), index_base_(index_base) {}
Column(ColumnType type, common::Span<const BinIdxType> index, const bst_bin_t index_base)
: type_(type), index_(index), index_base_{index_base} {}

virtual ~Column() = default;

Expand All @@ -60,19 +60,19 @@ class Column {
/* bin indexes in range [0, max_bins - 1] */
common::Span<const BinIdxType> index_;
/* bin index offset for specific feature */
const uint32_t index_base_;
bst_bin_t const index_base_;
};

template <typename BinIdxType>
class SparseColumn : public Column<BinIdxType> {
public:
SparseColumn(ColumnType type, common::Span<const BinIdxType> index, uint32_t index_base,
SparseColumn(ColumnType type, common::Span<const BinIdxType> index, bst_bin_t index_base,
common::Span<const size_t> row_ind)
: Column<BinIdxType>(type, index, index_base), row_ind_(row_ind) {}

const size_t* GetRowData() const { return row_ind_.data(); }

int32_t GetBinIdx(size_t rid, size_t* state) const {
bst_bin_t GetBinIdx(size_t rid, size_t* state) const {
const size_t column_size = this->Size();
if (!((*state) < column_size)) {
return this->kMissingId;
Expand Down
24 changes: 10 additions & 14 deletions src/common/hist_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,6 @@ class HistogramCuts {
float max_cat_{-1.0f};

protected:
using BinIdx = uint32_t;

void Swap(HistogramCuts&& that) noexcept(true) {
std::swap(cut_values_, that.cut_values_);
std::swap(cut_ptrs_, that.cut_ptrs_);
Expand Down Expand Up @@ -110,31 +108,31 @@ class HistogramCuts {

// Return the index of a cut point that is strictly greater than the input
// value, or the last available index if none exists
BinIdx SearchBin(float value, bst_feature_t column_id, std::vector<uint32_t> const& ptrs,
std::vector<float> const& values) const {
bst_bin_t SearchBin(float value, bst_feature_t column_id, std::vector<uint32_t> const& ptrs,
std::vector<float> const& values) const {
auto end = ptrs[column_id + 1];
auto beg = ptrs[column_id];
auto it = std::upper_bound(values.cbegin() + beg, values.cbegin() + end, value);
BinIdx idx = it - values.cbegin();
bst_bin_t idx = it - values.cbegin();
idx -= !!(idx == end);
return idx;
}

BinIdx SearchBin(float value, bst_feature_t column_id) const {
bst_bin_t SearchBin(float value, bst_feature_t column_id) const {
return this->SearchBin(value, column_id, Ptrs(), Values());
}

/**
* \brief Search the bin index for numerical feature.
*/
BinIdx SearchBin(Entry const& e) const {
bst_bin_t SearchBin(Entry const& e) const {
return SearchBin(e.fvalue, e.index);
}

/**
* \brief Search the bin index for categorical feature.
*/
BinIdx SearchCatBin(Entry const &e) const {
bst_bin_t SearchCatBin(Entry const &e) const {
auto const &ptrs = this->Ptrs();
auto const &vals = this->Values();
auto end = ptrs.at(e.index + 1) + vals.cbegin();
Expand Down Expand Up @@ -296,10 +294,10 @@ struct Index {
};

template <typename GradientIndex>
int32_t XGBOOST_HOST_DEV_INLINE BinarySearchBin(size_t begin, size_t end,
GradientIndex const &data,
uint32_t const fidx_begin,
uint32_t const fidx_end) {
bst_bin_t XGBOOST_HOST_DEV_INLINE BinarySearchBin(size_t begin, size_t end,
GradientIndex const& data,
uint32_t const fidx_begin,
uint32_t const fidx_end) {
size_t previous_middle = std::numeric_limits<size_t>::max();
while (end != begin) {
size_t middle = begin + (end - begin) / 2;
Expand All @@ -324,8 +322,6 @@ int32_t XGBOOST_HOST_DEV_INLINE BinarySearchBin(size_t begin, size_t end,
return -1;
}

class ColumnMatrix;

template<typename GradientSumT>
using GHistRow = Span<xgboost::detail::GradientPairInternal<GradientSumT> >;

Expand Down
12 changes: 8 additions & 4 deletions src/data/gradient_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@
#include "xgboost/data.h"

namespace xgboost {
namespace common {
class ColumnMatrix;
} // namespace common

/*!
* \brief preprocessed global index matrix, in CSR format
*
Expand Down Expand Up @@ -80,13 +84,13 @@ class GHistIndexMatrix {
for (bst_uint j = 0; j < inst.size(); ++j) {
auto e = inst[j];
if (common::IsCat(ft, e.index)) {
auto bin_idx = cut.SearchCatBin(e);
bst_bin_t bin_idx = cut.SearchCatBin(e);
index_data[ibegin + j] = get_offset(bin_idx, j);
++hit_count_tloc_[tid * nbins + bin_idx];
} else {
uint32_t idx = cut.SearchBin(e.fvalue, e.index, ptrs, values);
index_data[ibegin + j] = get_offset(idx, j);
++hit_count_tloc_[tid * nbins + idx];
bst_bin_t bin_idx = cut.SearchBin(e.fvalue, e.index, ptrs, values);
index_data[ibegin + j] = get_offset(bin_idx, j);
++hit_count_tloc_[tid * nbins + bin_idx];
}
}
});
Expand Down

0 comments on commit 288c525

Please sign in to comment.