Skip to content

Commit

Permalink
Changing the threshold value of the method selection (#5)
Browse files Browse the repository at this point in the history
* Changing hist border to 2^18
  • Loading branch information
RukhovichIV authored Aug 30, 2021
1 parent bad86d3 commit 7719321
Show file tree
Hide file tree
Showing 23 changed files with 392 additions and 152 deletions.
24 changes: 22 additions & 2 deletions Jenkinsfile-win64
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ pipeline {
steps {
script {
parallel ([
'build-win64-cuda10.1': { BuildWin64() }
'build-win64-cuda10.1': { BuildWin64() },
'build-rpkg-win64-cuda10.1': { BuildRPackageWithCUDAWin64() }
])
}
}
Expand Down Expand Up @@ -75,6 +76,7 @@ def checkoutSrcs() {

def BuildWin64() {
node('win64 && cuda10_unified') {
deleteDir()
unstash name: 'srcs'
echo "Building XGBoost for Windows AMD64 target..."
bat "nvcc --version"
Expand Down Expand Up @@ -115,8 +117,26 @@ def BuildWin64() {
}
}

def BuildRPackageWithCUDAWin64() {
node('win64 && cuda10_unified') {
deleteDir()
unstash name: 'srcs'
bat "nvcc --version"
if (env.BRANCH_NAME == 'master' || env.BRANCH_NAME.startsWith('release')) {
bat """
bash tests/ci_build/build_r_pkg_with_cuda_win64.sh ${commit_id}
"""
echo 'Uploading R tarball...'
path = ("${BRANCH_NAME}" == 'master') ? '' : "${BRANCH_NAME}/"
s3Upload bucket: 'xgboost-nightly-builds', path: path, acl: 'PublicRead', includePathPattern:'xgboost_r_gpu_win64_*.tar.gz'
}
deleteDir()
}
}

def TestWin64() {
node('win64 && cuda10_unified') {
deleteDir()
unstash name: 'srcs'
unstash name: 'xgboost_whl'
unstash name: 'xgboost_cli'
Expand All @@ -127,7 +147,7 @@ def TestWin64() {
bat "build\\testxgboost.exe"
echo "Installing Python dependencies..."
def env_name = 'win64_' + UUID.randomUUID().toString().replaceAll('-', '')
bat "conda env create -n ${env_name} --file=tests/ci_build/conda_env/win64_test.yml"
bat "conda activate && mamba env create -n ${env_name} --file=tests/ci_build/conda_env/win64_test.yml"
echo "Installing Python wheel..."
bat """
conda activate ${env_name} && for /R %%i in (python-package\\dist\\*.whl) DO python -m pip install "%%i"
Expand Down
4 changes: 3 additions & 1 deletion cmake/Utils.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,9 @@ function(format_gencode_flags flags out)
endif()
# Set up architecture flags
if(NOT flags)
if (CUDA_VERSION VERSION_GREATER_EQUAL "11.0")
if (CUDA_VERSION VERSION_GREATER_EQUAL "11.1")
set(flags "50;52;60;61;70;75;80;86")
elseif (CUDA_VERSION VERSION_GREATER_EQUAL "11.0")
set(flags "35;50;52;60;61;70;75;80")
elseif(CUDA_VERSION VERSION_GREATER_EQUAL "10.0")
set(flags "35;50;52;60;61;70;75")
Expand Down
13 changes: 9 additions & 4 deletions doc/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,12 @@ R
and then run ``install.packages("xgboost")``. Without OpenMP, XGBoost will only use a
single CPU core, leading to suboptimal training speed.

* We also provide **experimental** pre-built binary on Linux x86_64 with GPU support.
* We also provide **experimental** pre-built binary with GPU support. With this binary,
you will be able to use the GPU algorithm without building XGBoost from the source.
Download the binary package from the Releases page. The file name will be of the form
``xgboost_r_gpu_linux_[version].tar.gz``. Then install XGBoost by running:
``xgboost_r_gpu_[os]_[version].tar.gz``, where ``[os]`` is either ``linux`` or ``win64``.
(We build the binaries for 64-bit Linux and Windows.)
Then install XGBoost by running:

.. code-block:: bash
Expand Down Expand Up @@ -142,9 +145,11 @@ R
-

Other than standard CRAN installation, we also provide *experimental* pre-built binary on
Linux x86_64 with GPU support. You can go to `this page
with GPU support. You can go to `this page
<https://s3-us-west-2.amazonaws.com/xgboost-nightly-builds/list.html>`_, Find the commit
ID you want to install: ``xgboost_r_gpu_linux_[commit].tar.gz``, download it then run:
ID you want to install and then locate the file ``xgboost_r_gpu_[os]_[commit].tar.gz``,
where ``[os]`` is either ``linux`` or ``win64``. (We build the binaries for 64-bit Linux
and Windows.) Download it and run the following commands:

.. code-block:: bash
Expand Down
10 changes: 0 additions & 10 deletions doc/parameter.rst
Original file line number Diff line number Diff line change
Expand Up @@ -243,16 +243,6 @@ Additional parameters for ``hist`` and ``gpu_hist`` tree method

- Use single precision to build histograms instead of double precision.

Additional parameters for ``gpu_hist`` tree method
==================================================

* ``deterministic_histogram``, [default=``true``]

- Build histogram on GPU deterministically. Histogram building is not deterministic due
to the non-associative aspect of floating point summation. We employ a pre-rounding
routine to mitigate the issue, which may lead to slightly lower accuracy. Set to
``false`` to disable it.

Additional parameters for Dart Booster (``booster=dart``)
=========================================================

Expand Down
5 changes: 4 additions & 1 deletion include/xgboost/base.h
Original file line number Diff line number Diff line change
Expand Up @@ -255,9 +255,12 @@ class GradientPairInternal {

/*! \brief gradient statistics pair usually needed in gradient boosting */
using GradientPair = detail::GradientPairInternal<float>;

/*! \brief High precision gradient statistics pair */
using GradientPairPrecise = detail::GradientPairInternal<double>;
/*! \brief Fixed point representation for gradient pair. */
using GradientPairInt32 = detail::GradientPairInternal<int>;
/*! \brief Fixed point representation for high precision gradient pair. */
using GradientPairInt64 = detail::GradientPairInternal<int64_t>;

using Args = std::vector<std::pair<std::string, std::string> >;

Expand Down
2 changes: 1 addition & 1 deletion python-package/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ def run(self):

with open(os.path.join(CURRENT_DIR, 'README.rst'), encoding='utf-8') as fd:
description = fd.read()
with open(os.path.join(CURRENT_DIR, 'xgboost/VERSION')) as fd:
with open(os.path.join(CURRENT_DIR, 'xgboost/VERSION'), encoding="ascii") as fd:
version = fd.read().strip()

setup(name='xgboost',
Expand Down
2 changes: 1 addition & 1 deletion python-package/xgboost/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
pass

VERSION_FILE = os.path.join(os.path.dirname(__file__), 'VERSION')
with open(VERSION_FILE) as f:
with open(VERSION_FILE, encoding="ascii") as f:
__version__ = f.read().strip()

__all__ = ['DMatrix', 'DeviceQuantileDMatrix', 'Booster', 'DataIter',
Expand Down
4 changes: 2 additions & 2 deletions python-package/xgboost/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ class XGBoostLabelEncoder(LabelEncoder):
'''Label encoder with JSON serialization methods.'''
def to_json(self):
'''Returns a JSON compatible dictionary'''
meta = dict()
meta = {}
for k, v in self.__dict__.items():
if isinstance(v, np.ndarray):
meta[k] = v.tolist()
Expand All @@ -81,7 +81,7 @@ def to_json(self):
def from_json(self, doc):
# pylint: disable=attribute-defined-outside-init
'''Load the encoder back from a JSON compatible dict.'''
meta = dict()
meta = {}
for k, v in doc.items():
if k == 'classes_':
self.classes_ = np.array(v)
Expand Down
3 changes: 2 additions & 1 deletion python-package/xgboost/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -2197,7 +2197,8 @@ def dump_model(self, fout, fmap='', with_stats=False, dump_format="text"):
"""
if isinstance(fout, (STRING_TYPES, os.PathLike)):
fout = os.fspath(os.path.expanduser(fout))
fout = open(fout, 'w') # pylint: disable=consider-using-with
# pylint: disable=consider-using-with
fout = open(fout, 'w', encoding="utf-8")
need_close = True
else:
need_close = False
Expand Down
6 changes: 3 additions & 3 deletions python-package/xgboost/sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -538,7 +538,7 @@ def get_xgb_params(self) -> Dict[str, Any]:
'importance_type', 'kwargs', 'missing', 'n_estimators', 'use_label_encoder',
"enable_categorical"
}
filtered = dict()
filtered = {}
for k, v in params.items():
if k not in wrapper_specific and not callable(v):
filtered[k] = v
Expand All @@ -557,7 +557,7 @@ def _get_type(self) -> str:
return self._estimator_type # pylint: disable=no-member

def save_model(self, fname: Union[str, os.PathLike]) -> None:
meta = dict()
meta = {}
for k, v in self.__dict__.items():
if k == '_le':
meta['_le'] = self._le.to_json()
Expand Down Expand Up @@ -596,7 +596,7 @@ def load_model(self, fname: Union[str, bytearray, os.PathLike]) -> None:
)
return
meta = json.loads(meta_str)
states = dict()
states = {}
for k, v in meta.items():
if k == '_le':
self._le = XGBoostLabelEncoder()
Expand Down
56 changes: 49 additions & 7 deletions src/common/device_helpers.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*!
* Copyright 2017-2020 XGBoost contributors
* Copyright 2017-2021 XGBoost contributors
*/
#pragma once
#include <thrust/device_ptr.h>
Expand Down Expand Up @@ -98,24 +98,28 @@ template <typename T = size_t,
std::enable_if_t<std::is_same<size_t, T>::value &&
!std::is_same<size_t, unsigned long long>::value> * = // NOLINT
nullptr>
T __device__ __forceinline__ atomicAdd(T *addr, T v) { // NOLINT
XGBOOST_DEV_INLINE T atomicAdd(T *addr, T v) { // NOLINT
using Type = typename dh::detail::AtomicDispatcher<sizeof(T)>::Type;
Type ret = ::atomicAdd(reinterpret_cast<Type *>(addr), static_cast<Type>(v));
return static_cast<T>(ret);
}

namespace dh {

#ifdef XGBOOST_USE_NCCL
#define safe_nccl(ans) ThrowOnNcclError((ans), __FILE__, __LINE__)

inline ncclResult_t ThrowOnNcclError(ncclResult_t code, const char *file,
int line) {
int line) {
if (code != ncclSuccess) {
std::stringstream ss;
ss << "NCCL failure :" << ncclGetErrorString(code) << " ";
ss << file << "(" << line << ")";
throw std::runtime_error(ss.str());
ss << "NCCL failure :" << ncclGetErrorString(code);
if (code == ncclUnhandledCudaError) {
// nccl usually preserves the last error so we can get more details.
auto err = cudaPeekAtLastError();
ss << " " << thrust::system_error(err, thrust::cuda_category()).what();
}
ss << " " << file << "(" << line << ")";
LOG(FATAL) << ss.str();
}

return code;
Expand Down Expand Up @@ -1104,6 +1108,44 @@ XGBOOST_DEV_INLINE void AtomicAddGpair(OutputGradientT* dest,
static_cast<typename OutputGradientT::ValueT>(gpair.GetHess()));
}

/**
* \brief An atomicAdd designed for gradient pair with better performance. For general
* int64_t atomicAdd, one can simply cast it to unsigned long long.
*/
XGBOOST_DEV_INLINE void AtomicAdd64As32(int64_t *dst, int64_t src) {
uint32_t* y_low = reinterpret_cast<uint32_t *>(dst);
uint32_t *y_high = y_low + 1;

auto cast_src = reinterpret_cast<uint64_t *>(&src);

uint32_t const x_low = static_cast<uint32_t>(src);
uint32_t const x_high = (*cast_src) >> 32;

auto const old = atomicAdd(y_low, x_low);
uint32_t const carry = old > (std::numeric_limits<uint32_t>::max() - x_low) ? 1 : 0;
uint32_t const sig = x_high + carry;
atomicAdd(y_high, sig);
}

XGBOOST_DEV_INLINE void
AtomicAddGpair(xgboost::GradientPairInt64 *dest,
xgboost::GradientPairInt64 const &gpair) {
auto dst_ptr = reinterpret_cast<int64_t *>(dest);
auto g = gpair.GetGrad();
auto h = gpair.GetHess();

AtomicAdd64As32(dst_ptr, g);
AtomicAdd64As32(dst_ptr + 1, h);
}

XGBOOST_DEV_INLINE void
AtomicAddGpair(xgboost::GradientPairInt32 *dest,
xgboost::GradientPairInt32 const &gpair) {
auto dst_ptr = reinterpret_cast<typename xgboost::GradientPairInt32::ValueT*>(dest);

::atomicAdd(dst_ptr, static_cast<int>(gpair.GetGrad()));
::atomicAdd(dst_ptr + 1, static_cast<int>(gpair.GetHess()));
}

// Thrust version of this function causes error on Windows
template <typename ReturnT, typename IterT, typename FuncT>
Expand Down
2 changes: 1 addition & 1 deletion src/gbm/gbtree.cc
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ void GBTree::PerformTreeMethodHeuristic(DMatrix* fmat) {
LOG(INFO) << "Tree method is automatically set to 'approx' "
"since external-memory data matrix is used.";
tparam_.tree_method = TreeMethod::kApprox;
} else if (fmat->Info().num_row_ >= (4UL << 20UL)) {
} else if (fmat->Info().num_row_ >= (1UL << 18UL)) {
/* Choose tree_method='hist' automatically for large data matrix */
LOG(INFO) << "Tree method is automatically selected to be "
"'hist' for faster work. To use the old behavior "
Expand Down
Loading

0 comments on commit 7719321

Please sign in to comment.