Skip to content

Commit

Permalink
[POC] Implement Exact tree method for multi-target.
Browse files Browse the repository at this point in the history
* Added a new exact tree method.
* Specialize many utilities for it.
  • Loading branch information
trivialfis committed Jul 2, 2020
1 parent eb067c1 commit 53e6b57
Show file tree
Hide file tree
Showing 58 changed files with 2,471 additions and 727 deletions.
1 change: 1 addition & 0 deletions amalgamation/xgboost-all0.cc
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
#include "../src/tree/tree_model.cc"
#include "../src/tree/tree_updater.cc"
#include "../src/tree/updater_colmaker.cc"
#include "../src/tree/updater_exact.cc"
#include "../src/tree/updater_quantile_hist.cc"
#include "../src/tree/updater_prune.cc"
#include "../src/tree/updater_refresh.cc"
Expand Down
1 change: 1 addition & 0 deletions demo/multi-target/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.png
142 changes: 142 additions & 0 deletions demo/multi-target/regression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
'''The example is taken from:
https://scikit-learn.org/stable/auto_examples/tree/plot_tree_regression_multioutput.html#sphx-glr-auto-examples-tree-plot-tree-regression-multioutput-py
Multi-target tree may have lower accuracy due to smaller model capacity, but
provides better computation performance for prediction.
The current implementation supports only exact tree method and is considered as
highly experimental. We do not recommend any real world usage.
There are 3 different ways to train a multi target model.
- Train 1 model for each target manually. See `train_stacked_native` below.
- Train 1 stack of trees for each target by XGBoost. This is the default
implementation with `output_type` set to `single`.
- Train 1 stack of trees for all target variables, with the tree leaf being a
vector. This can be enabled by setting `output_type` to `multi`.
'''

import numpy as np
from matplotlib import pyplot as plt
import xgboost as xgb
from xgboost.experimental import XGBMultiRegressor
import argparse

# Generate some random data with y being a circle.
rng = np.random.RandomState(1994)
X = np.sort(200 * rng.rand(100, 1) - 100, axis=0)
y = np.array([np.pi * np.sin(X).ravel(), np.pi * np.cos(X).ravel()]).T
y[::5, :] += (0.5 - rng.rand(20, 2))

boosted_rounds = 32

y = y - y.min()
y: np.ndarray = y / y.max()
y = y.copy()


def plot_predt(y, y_predt, name):
'''Plot the output prediction along with labels.
Parameters
----------
y : np.ndarray
labels
y_predt : np.ndarray
prediction from XGBoost.
name : str
output file name for matplotlib.
'''
s = 25
plt.scatter(y[:, 0], y[:, 1], c="navy", s=s,
edgecolor="black", label="data")
plt.scatter(y_predt[:, 0], y_predt[:, 1], c="cornflowerblue", s=s,
edgecolor="black", label="max_depth=2")
plt.xlim([-1, 2])
plt.ylim([-1, 2])
plt.savefig(name + '.png')
plt.close()


def train_multi_skl():
'''Train a multi-target regression with XGBoost's scikit-learn interface. This
method demos training multi-target trees with each vector as leaf value,
also training a model that uses single target tree with one stack of trees
for each target variable.
'''
# Train with vector leaf trees.
reg = XGBMultiRegressor(output_type='multi',
num_targets=y.shape[1],
n_estimators=boosted_rounds)
reg.fit(X, y, eval_set=[(X, y)])
y_predt = reg.predict(X)
plot_predt(y, y_predt, 'skl-multi')

# Train 1 stack of trees for each target variable.
reg = XGBMultiRegressor(output_type='single',
num_targets=y.shape[1],
n_estimators=boosted_rounds)
reg.fit(X, y, eval_set=[(X, y)])
y_predt = reg.predict(X)
plot_predt(y, y_predt, 'skl-sinlge')


def train_multi_native():
'''Train a multi-target regression with native XGBoost interface. This method
demos training multi-target trees with each vector as leaf value, also
training a model that uses single target tree with one stack of trees for
each target variable.
'''
d = xgb.DMatrix(X, y)
# Train with vector leaf trees.
booster = xgb.train({'tree_method': 'exact',
'nthread': 16,
'output_type': 'multi',
'num_targets': y.shape[1],
'objective': 'reg:squarederror'
}, d,
num_boost_round=boosted_rounds,
evals=[(d, 'Train')])
y_predt = booster.predict(d)
plot_predt(y, y_predt, 'native-multi')

# Train 1 stack of trees for each target variable.
booster = xgb.train({'tree_method': 'exact',
'nthread': 16,
'output_type': 'single',
'num_targets': y.shape[1],
'objective': 'reg:squarederror'
}, d,
num_boost_round=boosted_rounds,
evals=[(d, 'Train')])
y_predt = booster.predict(d)
plot_predt(y, y_predt, 'native-single')


def train_stacked_native():
'''Train 2 XGBoost models, each one targeting a single output variable.'''
# Extract the first target variable
d = xgb.DMatrix(X, y[:, 0].copy())
params = {'tree_method': 'exact',
'objective': 'reg:squarederror'}
booster = xgb.train(
params, d, num_boost_round=boosted_rounds, evals=[(d, 'Train')])
y_predt_0 = booster.predict(d)

# Extract the second target variable
d = xgb.DMatrix(X, y[:, 1].copy())
booster = xgb.train(params, d, num_boost_round=boosted_rounds)
y_predt_1 = booster.predict(d)
y_predt = np.stack([y_predt_0, y_predt_1], axis=-1)
plot_predt(y, y_predt, 'stacked')


if __name__ == '__main__':
parser = argparse.ArgumentParser()
args = parser.parse_args()

train_multi_native()
train_multi_skl()
train_stacked_native()
27 changes: 17 additions & 10 deletions include/xgboost/base.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,16 +57,23 @@
#if defined(__GNUC__) && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ > 4) && \
!defined(__CUDACC__)
#include <parallel/algorithm>
#include <parallel/numeric>
#define XGBOOST_PARALLEL_SORT(X, Y, Z) __gnu_parallel::sort((X), (Y), (Z))
#define XGBOOST_PARALLEL_STABLE_SORT(X, Y, Z) \
__gnu_parallel::stable_sort((X), (Y), (Z))
#define XGBOOST_PARALLEL_ACCUMULATE(__BEG, __END, __INIT, __OP) \
__gnu_parallel::accumulate(__BEG, __END, __INIT, __OP)
#elif defined(_MSC_VER) && (!__INTEL_COMPILER)
#include <ppl.h>
#define XGBOOST_PARALLEL_SORT(X, Y, Z) concurrency::parallel_sort((X), (Y), (Z))
#define XGBOOST_PARALLEL_STABLE_SORT(X, Y, Z) std::stable_sort((X), (Y), (Z))
#define XGBOOST_PARALLEL_ACCUMULATE(__BEG, __END, __INIT, __OP) \
std::accumulate(__BEG, __END, __INIT, __OP)
#else
#define XGBOOST_PARALLEL_SORT(X, Y, Z) std::sort((X), (Y), (Z))
#define XGBOOST_PARALLEL_STABLE_SORT(X, Y, Z) std::stable_sort((X), (Y), (Z))
#define XGBOOST_PARALLEL_ACCUMULATE(__BEG, __END, __INIT, __OP) \
std::accumulate(__BEG, __END, __INIT, __OP)
#endif // GLIBC VERSION

#if defined(__GNUC__)
Expand Down Expand Up @@ -135,8 +142,8 @@ class GradientPairInternal {
/*! \brief second order gradient statistics */
T hess_;

XGBOOST_DEVICE void SetGrad(T g) { grad_ = g; }
XGBOOST_DEVICE void SetHess(T h) { hess_ = h; }
XGBOOST_DEVICE void SetGrad(T g) { grad_ = std::move(g); }
XGBOOST_DEVICE void SetHess(T h) { hess_ = std::move(h); }

public:
using ValueT = T;
Expand All @@ -150,12 +157,9 @@ class GradientPairInternal {
a += b;
}

XGBOOST_DEVICE GradientPairInternal() : grad_(0), hess_(0) {}

XGBOOST_DEVICE GradientPairInternal(T grad, T hess) {
SetGrad(grad);
SetHess(hess);
}
constexpr XGBOOST_DEVICE GradientPairInternal() : grad_(0), hess_(0) {}
constexpr XGBOOST_DEVICE GradientPairInternal(T grad, T hess)
: grad_{std::move(grad)}, hess_{std::move(hess)} {}

// Copy constructor if of same value type, marked as default to be trivially_copyable
GradientPairInternal(const GradientPairInternal<T> &g) = default;
Expand All @@ -168,8 +172,11 @@ class GradientPairInternal {
SetHess(g.GetHess());
}

XGBOOST_DEVICE T GetGrad() const { return grad_; }
XGBOOST_DEVICE T GetHess() const { return hess_; }
XGBOOST_DEVICE T const& GetGrad() const { return grad_; }
XGBOOST_DEVICE T const& GetHess() const { return hess_; }

XGBOOST_DEVICE T& GetGrad() { return grad_; }
XGBOOST_DEVICE T& GetHess() { return hess_; }

XGBOOST_DEVICE GradientPairInternal<T> &operator+=(
const GradientPairInternal<T> &rhs) {
Expand Down
7 changes: 5 additions & 2 deletions include/xgboost/data.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ class MetaInfo {
uint64_t num_nonzero_{0}; // NOLINT
/*! \brief label of each instance */
HostDeviceVector<bst_float> labels_; // NOLINT
bst_row_t labels_rows;
bst_feature_t labels_cols { 1 };
/*!
* \brief the index of begin and end of a group
* needed when the learning task is ranking.
Expand Down Expand Up @@ -156,7 +158,7 @@ class MetaInfo {
*
* Right now only 1 column is permitted.
*/
void SetInfo(const char* key, std::string const& interface_str);
void SetInfo(const char* key, std::string const& interface_str, int32_t device);

/*
* \brief Extend with other MetaInfo.
Expand All @@ -169,6 +171,7 @@ class MetaInfo {
void Extend(MetaInfo const& that, bool accumulate_rows);

private:
void SetInfoDevice(const char* key, std::string const& interface_str);
/*! \brief argsort of labels */
mutable std::vector<size_t> label_order_cache_;
};
Expand Down Expand Up @@ -446,7 +449,7 @@ class DMatrix {
this->Info().SetInfo(key, dptr, dtype, num);
}
virtual void SetInfo(const char* key, std::string const& interface_str) {
this->Info().SetInfo(key, interface_str);
this->Info().SetInfo(key, interface_str, 0);
}
/*! \brief meta information of the dataset */
virtual const MetaInfo& Info() const = 0;
Expand Down
22 changes: 0 additions & 22 deletions include/xgboost/learner.h
Original file line number Diff line number Diff line change
Expand Up @@ -226,27 +226,5 @@ class Learner : public Model, public Configurable, public rabit::Serializable {
/*! \brief Training parameter. */
GenericParameter generic_parameters_;
};

struct LearnerModelParamLegacy;

/*
* \brief Basic Model Parameters, used to describe the booster.
*/
struct LearnerModelParam {
/* \brief global bias */
bst_float base_score { 0.5f };
/* \brief number of features */
uint32_t num_feature { 0 };
/* \brief number of classes, if it is multi-class classification */
uint32_t num_output_group { 0 };

LearnerModelParam() = default;
// As the old `LearnerModelParamLegacy` is still used by binary IO, we keep
// this one as an immutable copy.
LearnerModelParam(LearnerModelParamLegacy const& user_param, float base_margin);
/* \brief Whether this parameter is initialized with LearnerModelParamLegacy. */
bool Initialized() const { return num_feature != 0; }
};

} // namespace xgboost
#endif // XGBOOST_LEARNER_H_
31 changes: 31 additions & 0 deletions include/xgboost/model.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
#ifndef XGBOOST_MODEL_H_
#define XGBOOST_MODEL_H_

#include <cstdint>

namespace dmlc {
class Stream;
} // namespace dmlc
Expand Down Expand Up @@ -41,6 +43,35 @@ struct Configurable {
*/
virtual void SaveConfig(Json* out) const = 0;
};

struct LearnerModelParamLegacy;

enum class OutputType : int32_t {
kSingle,
kMulti
};

/*
* \brief Basic Model Parameters, used to describe the booster.
*/
struct LearnerModelParam {
/* \brief global bias */
float base_score { 0.5 };
/* \brief number of features */
uint32_t num_feature { 0 };
/* \brief number of classes, if it is multi-class classification */
uint32_t num_output_group { 0 };
/* \brief number of target variables. */
uint32_t num_targets { 1 };
OutputType output_type { OutputType::kSingle };

LearnerModelParam() = default;
// As the old `LearnerModelParamLegacy` is still used by binary IO, we keep
// this one as an immutable copy.
LearnerModelParam(LearnerModelParamLegacy const& user_param, float base_margin);
/* \brief Whether this parameter is initialized with LearnerModelParamLegacy. */
bool Initialized() const { return num_feature != 0; }
};
} // namespace xgboost

#endif // XGBOOST_MODEL_H_
Loading

0 comments on commit 53e6b57

Please sign in to comment.