Skip to content

Commit

Permalink
Fix r interaction constraints (#5543)
Browse files Browse the repository at this point in the history
* Unify the parsing code.

* Cleanup.
  • Loading branch information
trivialfis authored Apr 17, 2020
1 parent 93df871 commit c245eb8
Show file tree
Hide file tree
Showing 8 changed files with 107 additions and 55 deletions.
41 changes: 29 additions & 12 deletions R-package/tests/testthat/test_interaction_constraints.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,25 +14,42 @@ test_that("interaction constraints for regression", {
bst <- xgboost(data = train, label = y, max_depth = 3,
eta = 0.1, nthread = 2, nrounds = 100, verbose = 0,
interaction_constraints = list(c(0,1)))

# Set all observations to have the same x3 values then increment
# by the same amount
preds <- lapply(c(1,2,3), function(x){
tmat <- matrix(c(x1,x2,rep(x,1000)), ncol=3)
return(predict(bst, tmat))
})
preds <- lapply(c(1,2,3), function(x){
tmat <- matrix(c(x1,x2,rep(x,1000)), ncol=3)
return(predict(bst, tmat))
})

# Check incrementing x3 has the same effect on all observations
# since x3 is constrained to be independent of x1 and x2
# and all observations start off from the same x3 value
diff1 <- preds[[2]] - preds[[1]]
test1 <- all(abs(diff1 - diff1[1]) < 1e-4)
diff2 <- preds[[3]] - preds[[2]]
test2 <- all(abs(diff2 - diff2[1]) < 1e-4)
diff1 <- preds[[2]] - preds[[1]]
test1 <- all(abs(diff1 - diff1[1]) < 1e-4)

diff2 <- preds[[3]] - preds[[2]]
test2 <- all(abs(diff2 - diff2[1]) < 1e-4)

expect_true({
test1 & test2
}, "Interaction Contraint Satisfied")

})

test_that("interaction constraints scientific representation", {
rows <- 10
## When number exceeds 1e5, R paste function uses scientific representation.
## See: https://github.com/dmlc/xgboost/issues/5179
cols <- 1e5+10

d <- matrix(rexp(rows, rate=.1), nrow=rows, ncol=cols)
y <- rnorm(rows)

dtrain <- xgb.DMatrix(data=d, info = list(label=y))
inc <- list(c(seq.int(from = 0, to = cols, by = 1)))

with_inc <- xgb.train(data=dtrain, tree_method='hist',
interaction_constraints=inc, nrounds=10)
without_inc <- xgb.train(data=dtrain, tree_method='hist', nrounds=10)
expect_equal(xgb.save.raw(with_inc), xgb.save.raw(without_inc))
})
14 changes: 6 additions & 8 deletions src/tree/constraints.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <vector>

#include "xgboost/span.h"
#include "xgboost/json.h"
#include "constraints.h"
#include "param.h"

Expand All @@ -27,15 +28,12 @@ void FeatureInteractionConstraintHost::Reset() {
if (!enabled_) {
return;
}
// Parse interaction constraints
std::istringstream iss(this->interaction_constraint_str_);
dmlc::JSONReader reader(&iss);
// Read std::vector<std::vector<bst_uint>> first and then
// convert to std::vector<std::unordered_set<bst_uint>>
std::vector<std::vector<bst_uint>> tmp;
// Read std::vector<std::vector<bst_feature_t>> first and then
// convert to std::vector<std::unordered_set<bst_feature_t>>
std::vector<std::vector<bst_feature_t>> tmp;
try {
reader.Read(&tmp);
} catch (dmlc::Error const& e) {
ParseInteractionConstraint(this->interaction_constraint_str_, &tmp);
} catch (dmlc::Error const &e) {
LOG(FATAL) << "Failed to parse feature interaction constraint:\n"
<< this->interaction_constraint_str_ << "\n"
<< "With error:\n" << e.what();
Expand Down
35 changes: 15 additions & 20 deletions src/tree/constraints.cu
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,7 @@
#include <thrust/iterator/counting_iterator.h>

#include <algorithm>
#include <bitset>
#include <string>
#include <sstream>
#include <set>

#include "xgboost/logging.h"
Expand All @@ -18,28 +16,25 @@
#include "param.h"
#include "../common/device_helpers.cuh"


namespace xgboost {

size_t FeatureInteractionConstraint::Features() const {
size_t FeatureInteractionConstraintDevice::Features() const {
return d_sets_ptr_.size() - 1;
}

void FeatureInteractionConstraint::Configure(
void FeatureInteractionConstraintDevice::Configure(
tree::TrainParam const& param, int32_t const n_features) {
has_constraint_ = true;
if (param.interaction_constraints.length() == 0) {
has_constraint_ = false;
return;
}
// --- Parse interaction constraints
std::istringstream iss(param.interaction_constraints);
dmlc::JSONReader reader(&iss);
// Interaction constraints parsed from string parameter. After
// parsing, this looks like {{0, 1, 2}, {2, 3 ,4}}.
std::vector<std::vector<int32_t>> h_feature_constraints;
std::vector<std::vector<bst_feature_t>> h_feature_constraints;
try {
reader.Read(&h_feature_constraints);
ParseInteractionConstraint(param.interaction_constraints, &h_feature_constraints);
} catch (dmlc::Error const& e) {
LOG(FATAL) << "Failed to parse feature interaction constraint:\n"
<< param.interaction_constraints << "\n"
Expand Down Expand Up @@ -68,13 +63,13 @@ void FeatureInteractionConstraint::Configure(

// Represent constraints as CSR format, flatten is the value vector,
// ptr is row_ptr vector in CSR.
std::vector<int32_t> h_feature_constraints_flatten;
std::vector<uint32_t> h_feature_constraints_flatten;
for (auto const& constraints : h_feature_constraints) {
for (int32_t c : constraints) {
for (uint32_t c : constraints) {
h_feature_constraints_flatten.emplace_back(c);
}
}
std::vector<int32_t> h_feature_constraints_ptr;
std::vector<size_t> h_feature_constraints_ptr;
size_t n_features_in_constraints = 0;
h_feature_constraints_ptr.emplace_back(n_features_in_constraints);
for (auto const& v : h_feature_constraints) {
Expand Down Expand Up @@ -130,13 +125,13 @@ void FeatureInteractionConstraint::Configure(
s_result_buffer_ = dh::ToSpan(result_buffer_);
}

FeatureInteractionConstraint::FeatureInteractionConstraint(
FeatureInteractionConstraintDevice::FeatureInteractionConstraintDevice(
tree::TrainParam const& param, int32_t const n_features) :
has_constraint_{true}, n_sets_{0} {
this->Configure(param, n_features);
}

void FeatureInteractionConstraint::Reset() {
void FeatureInteractionConstraintDevice::Reset() {
for (auto& node : node_constraints_storage_) {
thrust::fill(node.begin(), node.end(), 0);
}
Expand All @@ -153,7 +148,7 @@ __global__ void ClearBuffersKernel(
}
}

void FeatureInteractionConstraint::ClearBuffers() {
void FeatureInteractionConstraintDevice::ClearBuffers() {
CHECK_EQ(output_buffer_bits_.Size(), input_buffer_bits_.Size());
CHECK_LE(feature_buffer_.Size(), output_buffer_bits_.Size());
uint32_t constexpr kBlockThreads = 256;
Expand All @@ -164,7 +159,7 @@ void FeatureInteractionConstraint::ClearBuffers() {
output_buffer_bits_, input_buffer_bits_);
}

common::Span<bst_feature_t> FeatureInteractionConstraint::QueryNode(int32_t node_id) {
common::Span<bst_feature_t> FeatureInteractionConstraintDevice::QueryNode(int32_t node_id) {
if (!has_constraint_) { return {}; }
CHECK_LT(node_id, s_node_constraints_.size());

Expand Down Expand Up @@ -203,7 +198,7 @@ __global__ void QueryFeatureListKernel(LBitField64 node_constraints,
result_buffer_output &= result_buffer_input;
}

common::Span<bst_feature_t> FeatureInteractionConstraint::Query(
common::Span<bst_feature_t> FeatureInteractionConstraintDevice::Query(
common::Span<bst_feature_t> feature_list, int32_t nid) {
if (!has_constraint_ || nid == 0) {
return feature_list;
Expand Down Expand Up @@ -250,8 +245,8 @@ __global__ void RestoreFeatureListFromSetsKernel(
LBitField64 feature_buffer,

bst_feature_t fid,
common::Span<int32_t> feature_interactions,
common::Span<int32_t> feature_interactions_ptr, // of size n interaction set + 1
common::Span<bst_feature_t> feature_interactions,
common::Span<size_t> feature_interactions_ptr, // of size n interaction set + 1

common::Span<bst_feature_t> interactions_list,
common::Span<size_t> interactions_list_ptr) {
Expand Down Expand Up @@ -302,7 +297,7 @@ __global__ void InteractionConstraintSplitKernel(LBitField64 feature,
}
}

void FeatureInteractionConstraint::Split(
void FeatureInteractionConstraintDevice::Split(
bst_node_t node_id, bst_feature_t feature_id, bst_node_t left_id, bst_node_t right_id) {
if (!has_constraint_) { return; }
CHECK_NE(node_id, left_id)
Expand Down
20 changes: 10 additions & 10 deletions src/tree/constraints.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -88,18 +88,18 @@ struct ValueConstraint {
};

// Feature interaction constraints built for GPU Hist updater.
struct FeatureInteractionConstraint {
struct FeatureInteractionConstraintDevice {
protected:
// Whether interaction constraint is used.
bool has_constraint_;
// n interaction sets.
int32_t n_sets_;
size_t n_sets_;

// The parsed feature interaction constraints as CSR.
dh::device_vector<int32_t> d_fconstraints_;
common::Span<int32_t> s_fconstraints_;
dh::device_vector<int32_t> d_fconstraints_ptr_;
common::Span<int32_t> s_fconstraints_ptr_;
dh::device_vector<bst_feature_t> d_fconstraints_;
common::Span<bst_feature_t> s_fconstraints_;
dh::device_vector<size_t> d_fconstraints_ptr_;
common::Span<size_t> s_fconstraints_ptr_;
/* Interaction sets for each feature as CSR. For an input like:
* [[0, 1], [1, 2]], this will have values:
*
Expand Down Expand Up @@ -141,11 +141,11 @@ struct FeatureInteractionConstraint {

public:
size_t Features() const;
FeatureInteractionConstraint() = default;
FeatureInteractionConstraintDevice() = default;
void Configure(tree::TrainParam const& param, int32_t const n_features);
FeatureInteractionConstraint(tree::TrainParam const& param, int32_t const n_features);
FeatureInteractionConstraint(FeatureInteractionConstraint const& that) = default;
FeatureInteractionConstraint(FeatureInteractionConstraint&& that) = default;
FeatureInteractionConstraintDevice(tree::TrainParam const& param, int32_t const n_features);
FeatureInteractionConstraintDevice(FeatureInteractionConstraintDevice const& that) = default;
FeatureInteractionConstraintDevice(FeatureInteractionConstraintDevice&& that) = default;
/*! \brief Reset before constructing a new tree. */
void Reset();
/*! \brief Return a list of features given node id */
Expand Down
29 changes: 29 additions & 0 deletions src/tree/param.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <vector>
#include <utility>

#include "xgboost/json.h"
#include "param.h"

namespace std {
Expand Down Expand Up @@ -79,3 +80,31 @@ std::istream &operator>>(std::istream &is, std::vector<int> &t) {
return is;
}
} // namespace std

namespace xgboost {
void ParseInteractionConstraint(
std::string const &constraint_str,
std::vector<std::vector<bst_feature_t>> *p_out) {
auto &out = *p_out;
auto j_inc = Json::Load({constraint_str.c_str(), constraint_str.size()});
auto const &all = get<Array>(j_inc);
out.resize(all.size());
for (size_t i = 0; i < all.size(); ++i) {
auto const &set = get<Array const>(all[i]);
for (auto const &v : set) {
if (XGBOOST_EXPECT(IsA<Integer>(v), true)) {
uint32_t u = static_cast<uint32_t const>(get<Integer const>(v));
out[i].emplace_back(u);
} else if (IsA<Number>(v)) {
double d = get<Number const>(v);
CHECK_EQ(std::floor(d), d)
<< "Found floating point number in interaction constraints";
out[i].emplace_back(static_cast<uint32_t const>(d));
} else {
LOG(FATAL) << "Unknown value type for interaction constraint:"
<< v.GetValue().TypeStr();
}
}
}
}
} // namespace xgboost
15 changes: 14 additions & 1 deletion src/tree/param.h
Original file line number Diff line number Diff line change
Expand Up @@ -483,8 +483,21 @@ struct SplitEntryContainer {
};

using SplitEntry = SplitEntryContainer<GradStats>;

} // namespace tree

/*
* \brief Parse the interaction constraints from string.
* \param constraint_str String storing the interfaction constraints:
*
* Example input string:
*
* "[[1, 2], [3, 4]]""
*
* \param p_out Pointer to output
*/
void ParseInteractionConstraint(
std::string const &constraint_str,
std::vector<std::vector<xgboost::bst_feature_t>> *p_out);
} // namespace xgboost

// define string serializer for vector, to get the arguments
Expand Down
2 changes: 1 addition & 1 deletion src/tree/updater_gpu_hist.cu
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,7 @@ struct GPUHistMakerDevice {
common::Monitor monitor;
std::vector<ValueConstraint> node_value_constraints;
common::ColumnSampler column_sampler;
FeatureInteractionConstraint interaction_constraints;
FeatureInteractionConstraintDevice interaction_constraints;

using ExpandQueue =
std::priority_queue<ExpandEntry, std::vector<ExpandEntry>,
Expand Down
6 changes: 3 additions & 3 deletions tests/cpp/tree/test_constraints.cu
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@
namespace xgboost {
namespace {

struct FConstraintWrapper : public FeatureInteractionConstraint {
struct FConstraintWrapper : public FeatureInteractionConstraintDevice {
common::Span<LBitField64> GetNodeConstraints() {
return FeatureInteractionConstraint::s_node_constraints_;
return FeatureInteractionConstraintDevice::s_node_constraints_;
}
FConstraintWrapper(tree::TrainParam param, bst_feature_t n_features) :
FeatureInteractionConstraint(param, n_features) {}
FeatureInteractionConstraintDevice(param, n_features) {}

dh::device_vector<bst_feature_t> const& GetDSets() const {
return d_sets_;
Expand Down

0 comments on commit c245eb8

Please sign in to comment.