diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2e7f16864805..db99d9a9d609 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -4,7 +4,7 @@ if(PLUGIN_SYCL)
string(REPLACE " -isystem ${CONDA_PREFIX}/include" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
endif()
-project(xgboost LANGUAGES CXX C VERSION 2.1.1)
+project(xgboost LANGUAGES CXX C VERSION 2.1.2)
include(cmake/Utils.cmake)
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION
index db5290334b49..f6dbf02cbaec 100644
--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@@ -1,8 +1,8 @@
Package: xgboost
Type: Package
Title: Extreme Gradient Boosting
-Version: 2.1.1.1
-Date: 2024-07-30
+Version: 2.1.2.1
+Date: 2024-10-23
Authors@R: c(
person("Tianqi", "Chen", role = c("aut"),
email = "tianqi.tchen@gmail.com"),
diff --git a/R-package/configure b/R-package/configure
index f6deab049b2d..4d7f4ab5c906 100755
--- a/R-package/configure
+++ b/R-package/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.71 for xgboost 2.1.1.
+# Generated by GNU Autoconf 2.71 for xgboost 2.1.2.
#
#
# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
@@ -607,8 +607,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='xgboost'
PACKAGE_TARNAME='xgboost'
-PACKAGE_VERSION='2.1.1'
-PACKAGE_STRING='xgboost 2.1.1'
+PACKAGE_VERSION='2.1.2'
+PACKAGE_STRING='xgboost 2.1.2'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''
@@ -1259,7 +1259,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures xgboost 2.1.1 to adapt to many kinds of systems.
+\`configure' configures xgboost 2.1.2 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1321,7 +1321,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of xgboost 2.1.1:";;
+ short | recursive ) echo "Configuration of xgboost 2.1.2:";;
esac
cat <<\_ACEOF
@@ -1404,7 +1404,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-xgboost configure 2.1.1
+xgboost configure 2.1.2
generated by GNU Autoconf 2.71
Copyright (C) 2021 Free Software Foundation, Inc.
@@ -1603,7 +1603,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by xgboost $as_me 2.1.1, which was
+It was created by xgboost $as_me 2.1.2, which was
generated by GNU Autoconf 2.71. Invocation command line was
$ $0$ac_configure_args_raw
@@ -2732,11 +2732,11 @@ if test x$ac_prog_cxx_stdcxx = xno
then :
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CXX option to enable C++11 features" >&5
printf %s "checking for $CXX option to enable C++11 features... " >&6; }
-if test ${ac_cv_prog_cxx_11+y}
+if test ${ac_cv_prog_cxx_cxx11+y}
then :
printf %s "(cached) " >&6
else $as_nop
- ac_cv_prog_cxx_11=no
+ ac_cv_prog_cxx_cxx11=no
ac_save_CXX=$CXX
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
@@ -2778,11 +2778,11 @@ if test x$ac_prog_cxx_stdcxx = xno
then :
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CXX option to enable C++98 features" >&5
printf %s "checking for $CXX option to enable C++98 features... " >&6; }
-if test ${ac_cv_prog_cxx_98+y}
+if test ${ac_cv_prog_cxx_cxx98+y}
then :
printf %s "(cached) " >&6
else $as_nop
- ac_cv_prog_cxx_98=no
+ ac_cv_prog_cxx_cxx98=no
ac_save_CXX=$CXX
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
@@ -3709,7 +3709,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by xgboost $as_me 2.1.1, which was
+This file was extended by xgboost $as_me 2.1.2, which was
generated by GNU Autoconf 2.71. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -3773,7 +3773,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config='$ac_cs_config_escaped'
ac_cs_version="\\
-xgboost config.status 2.1.1
+xgboost config.status 2.1.2
configured by $0, generated by GNU Autoconf 2.71,
with options \\"\$ac_cs_config\\"
diff --git a/R-package/configure.ac b/R-package/configure.ac
index 46b251708fba..516160ffd5bd 100644
--- a/R-package/configure.ac
+++ b/R-package/configure.ac
@@ -2,7 +2,7 @@
AC_PREREQ(2.69)
-AC_INIT([xgboost],[2.1.1],[],[xgboost],[])
+AC_INIT([xgboost],[2.1.2],[],[xgboost],[])
: ${R_HOME=`R RHOME`}
if test -z "${R_HOME}"; then
diff --git a/dev/release-artifacts.py b/dev/release-artifacts.py
index 4c4340984b6e..5d7854e06719 100644
--- a/dev/release-artifacts.py
+++ b/dev/release-artifacts.py
@@ -1,6 +1,7 @@
-"""Simple script for managing Python, R, and source release packages.
+"""
+Simple script for managing Python, R, and source release packages.
-tqdm, sh are required to run this script.
+tqdm, sh, build, twine are required to run this script.
"""
import argparse
@@ -10,15 +11,15 @@
import tarfile
import tempfile
from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any, Dict, List, Optional, Tuple
from urllib.request import urlretrieve
import tqdm
from packaging import version
from sh.contrib import git
-# The package building is managed by Jenkins CI.
-PREFIX = "https://s3-us-west-2.amazonaws.com/xgboost-nightly-builds/release_"
+# S3 bucket hosting the release artifacts
+S3_BUCKET_URL = "https://s3-us-west-2.amazonaws.com/xgboost-nightly-builds"
ROOT = Path(__file__).absolute().parent.parent
DIST = ROOT / "python-package" / "dist"
@@ -26,9 +27,9 @@
class DirectoryExcursion:
- def __init__(self, path: Union[os.PathLike, str]) -> None:
+ def __init__(self, path: Path) -> None:
self.path = path
- self.curdir = os.path.normpath(os.path.abspath(os.path.curdir))
+ self.curdir = Path.cwd().resolve()
def __enter__(self) -> None:
os.chdir(self.path)
@@ -37,75 +38,85 @@ def __exit__(self, *args: Any) -> None:
os.chdir(self.curdir)
-def show_progress(block_num, block_size, total_size):
- "Show file download progress."
+def show_progress(block_num: int, block_size: int, total_size: int) -> None:
+ """Show file download progress."""
global pbar
if pbar is None:
pbar = tqdm.tqdm(total=total_size / 1024, unit="kB")
downloaded = block_num * block_size
if downloaded < total_size:
- upper = (total_size - downloaded) / 1024
- pbar.update(min(block_size / 1024, upper))
+ pbar.update(min(block_size / 1024, (total_size - downloaded) / 1024))
else:
pbar.close()
pbar = None
-def retrieve(url, filename=None):
- print(f"{url} -> {filename}")
- return urlretrieve(url, filename, reporthook=show_progress)
+def retrieve(url: str, filename: Optional[Path] = None) -> str:
+ """Retrieve a file from a URL with progress indication."""
+ print(f"Downloading {url} -> {filename}")
+ return urlretrieve(url, filename, reporthook=show_progress)[0]
def latest_hash() -> str:
- "Get latest commit hash."
- ret = subprocess.run(["git", "rev-parse", "HEAD"], capture_output=True)
- assert ret.returncode == 0, "Failed to get latest commit hash."
- commit_hash = ret.stdout.decode("utf-8").strip()
- return commit_hash
-
-
-def download_wheels(
+ """Get latest commit hash."""
+ try:
+ result = subprocess.run(
+ ["git", "rev-parse", "HEAD"],
+ check=True,
+ capture_output=True,
+ text=True,
+ encoding="utf-8",
+ )
+ return result.stdout.strip()
+ except subprocess.CalledProcessError as e:
+ raise RuntimeError("Failed to get latest commit hash.") from e
+
+
+def _download_python_wheels(
platforms: List[str],
- dir_URL: str,
+ dir_url: str,
src_filename_prefix: str,
target_filename_prefix: str,
- outdir: str,
-) -> List[str]:
- """Download all binary wheels. dir_URL is the URL for remote directory storing the
- release wheels.
+ outdir: Path,
+) -> List[Path]:
+ """Download all Python binary wheels for a given set of platforms"""
+ wheel_paths = []
+ dist_dir = outdir / "dist"
+ dist_dir.mkdir(exist_ok=True)
- """
+ for platform in platforms:
+ src_wheel = f"{src_filename_prefix}{platform}.whl"
+ url = f"{dir_url}{src_wheel}"
+ target_wheel = f"{target_filename_prefix}{platform}.whl"
+ wheel_path = dist_dir / target_wheel
+ wheel_paths.append(wheel_path)
+
+ retrieve(url=url, filename=wheel_path)
+
+ try:
+ result = subprocess.run(
+ ["twine", "check", str(wheel_path)],
+ check=True,
+ capture_output=True,
+ text=True,
+ encoding="utf-8",
+ )
+ if "warning" in result.stderr or "warning" in result.stdout:
+ raise RuntimeError(
+ f"Unresolved warnings:\n{result.stderr}\n{result.stdout}"
+ )
+ except subprocess.CalledProcessError as e:
+ raise RuntimeError("Failed twine check") from e
+ return wheel_paths
- filenames = []
- outdir = os.path.join(outdir, "dist")
- if not os.path.exists(outdir):
- os.mkdir(outdir)
- for platform in platforms:
- src_wheel = src_filename_prefix + platform + ".whl"
- url = dir_URL + src_wheel
-
- target_wheel = target_filename_prefix + platform + ".whl"
- filename = os.path.join(outdir, target_wheel)
- filenames.append(filename)
- retrieve(url=url, filename=filename)
- ret = subprocess.run(["twine", "check", filename], capture_output=True)
- assert ret.returncode == 0, "Failed twine check"
- stderr = ret.stderr.decode("utf-8")
- stdout = ret.stdout.decode("utf-8")
- assert stderr.find("warning") == -1, "Unresolved warnings:\n" + stderr
- assert stdout.find("warning") == -1, "Unresolved warnings:\n" + stdout
- return filenames
-
-
-def make_pysrc_wheel(
- release: str, rc: Optional[str], rc_ver: Optional[int], outdir: str
+def make_python_sdist(
+ release: str, rc: Optional[str], rc_ver: Optional[int], outdir: Path
) -> None:
"""Make Python source distribution."""
- dist = os.path.abspath(os.path.normpath(os.path.join(outdir, "dist")))
- if not os.path.exists(dist):
- os.mkdir(dist)
+ dist_dir = outdir / "dist"
+ dist_dir.mkdir(exist_ok=True)
# Apply patch to remove NCCL dependency
# Save the original content of pyproject.toml so that we can restore it later
@@ -114,47 +125,59 @@ def make_pysrc_wheel(
orig_pyproj_lines = f.read()
with open("tests/buildkite/remove_nccl_dep.patch", "r") as f:
patch_lines = f.read()
- subprocess.run(["patch", "-p0"], input=patch_lines, text=True)
-
- with DirectoryExcursion(os.path.join(ROOT, "python-package")):
- subprocess.check_call(["python", "-m", "build", "--sdist"])
- if rc is not None:
- name = f"xgboost-{release}{rc}{rc_ver}.tar.gz"
- else:
- name = f"xgboost-{release}.tar.gz"
- src = os.path.join(DIST, name)
- subprocess.check_call(["twine", "check", src])
- target = os.path.join(dist, name)
- shutil.move(src, target)
+ subprocess.run(
+ ["patch", "-p0"], input=patch_lines, check=True, text=True, encoding="utf-8"
+ )
+
+ with DirectoryExcursion(ROOT / "python-package"):
+ subprocess.run(["python", "-m", "build", "--sdist"], check=True)
+ sdist_name = (
+ f"xgboost-{release}{rc}{rc_ver}.tar.gz"
+ if rc
+ else f"xgboost-{release}.tar.gz"
+ )
+ src = DIST / sdist_name
+ subprocess.run(["twine", "check", str(src)], check=True)
+ dest = dist_dir / sdist_name
+ shutil.move(src, dest)
with DirectoryExcursion(ROOT):
with open("python-package/pyproject.toml", "w") as f:
- print(orig_pyproj_lines, file=f, end="")
+ f.write(orig_pyproj_lines)
-def download_py_packages(
- branch: str, major: int, minor: int, commit_hash: str, outdir: str
-) -> None:
- platforms = [
+def download_python_wheels(branch: str, commit_hash: str, outdir: Path) -> None:
+ """Download all Python binary wheels for the specified branch."""
+ full_platforms = [
"win_amd64",
+ "manylinux2014_x86_64",
+ "manylinux2014_aarch64",
"manylinux_2_28_x86_64",
"manylinux_2_28_aarch64",
"macosx_10_15_x86_64.macosx_11_0_x86_64.macosx_12_0_x86_64",
"macosx_12_0_arm64",
]
+ minimal_platforms = [
+ "win_amd64",
+ "manylinux2014_x86_64",
+ "manylinux2014_aarch64",
+ ]
- branch = branch.split("_")[1] # release_x.y.z
- dir_URL = PREFIX + branch + "/"
- src_filename_prefix = "xgboost-" + args.release + "%2B" + commit_hash + "-py3-none-"
- target_filename_prefix = "xgboost-" + args.release + "-py3-none-"
-
- if not os.path.exists(DIST):
- os.mkdir(DIST)
-
- filenames = download_wheels(
- platforms, dir_URL, src_filename_prefix, target_filename_prefix, outdir
- )
- print("List of downloaded wheels:", filenames)
+ dir_url = f"{S3_BUCKET_URL}/{branch}/"
+ wheels = []
+
+ for pkg_name, platforms in [
+ ("xgboost", full_platforms),
+ ("xgboost_cpu", minimal_platforms),
+ ]:
+ src_filename_prefix = f"{pkg_name}-{args.release}%2B{commit_hash}-py3-none-"
+ target_filename_prefix = f"{pkg_name}-{args.release}-py3-none-"
+ wheels.extend(
+ _download_python_wheels(
+ platforms, dir_url, src_filename_prefix, target_filename_prefix, outdir
+ )
+ )
+ print(f"List of downloaded wheels: {wheels}")
print(
"""
Following steps should be done manually:
@@ -164,89 +187,99 @@ def download_py_packages(
)
-def download_r_packages(
- release: str, branch: str, rc: str, commit: str, outdir: str
+def download_r_artifacts(
+ release: str, branch: str, rc: str, commit: str, outdir: Path
) -> Tuple[Dict[str, str], List[str]]:
+ """Download R package artifacts for the specified release and branch."""
platforms = ["linux"]
- dirname = os.path.join(outdir, "r-packages")
- if not os.path.exists(dirname):
- os.mkdir(dirname)
+ rpkg_dir = outdir / "r-packages"
+ rpkg_dir.mkdir(exist_ok=True)
- filenames = []
- branch = branch.split("_")[1] # release_x.y.z
+ artifacts = []
urls = {}
for plat in platforms:
- url = f"{PREFIX}{branch}/xgboost_r_gpu_{plat}_{commit}.tar.gz"
-
- if not rc:
- filename = f"xgboost_r_gpu_{plat}_{release}.tar.gz"
- else:
- filename = f"xgboost_r_gpu_{plat}_{release}-{rc}.tar.gz"
-
- target = os.path.join(dirname, filename)
- retrieve(url=url, filename=target)
- filenames.append(target)
+ url = f"{S3_BUCKET_URL}/{branch}/xgboost_r_gpu_{plat}_{commit}.tar.gz"
+ artifact_name = (
+ f"xgboost_r_gpu_{plat}_{release}-{rc}.tar.gz"
+ if rc
+ else f"xgboost_r_gpu_{plat}_{release}.tar.gz"
+ )
+ artifact_path = rpkg_dir / artifact_name
+ retrieve(url=url, filename=artifact_path)
+ artifacts.append(artifact_path)
urls[plat] = url
- print("Finished downloading R packages:", filenames)
+ print(f"Finished downloading R package artifacts: {artifacts}")
hashes = []
- with DirectoryExcursion(os.path.join(outdir, "r-packages")):
- for f in filenames:
- ret = subprocess.run(
- ["sha256sum", os.path.basename(f)], capture_output=True
+ with DirectoryExcursion(rpkg_dir):
+ for f in artifacts:
+ result = subprocess.run(
+ ["sha256sum", f.name],
+ check=True,
+ capture_output=True,
+ text=True,
+ encoding="utf-8",
)
- h = ret.stdout.decode().strip()
- hashes.append(h)
+ hashes.append(result.stdout.strip())
return urls, hashes
-def check_path():
- root = os.path.abspath(os.path.curdir)
- assert os.path.basename(root) == "xgboost", "Must be run on project root."
+def check_path() -> None:
+ """Ensure the script is run from the project root directory."""
+ current_dir = Path.cwd().resolve()
+ if current_dir.name != "xgboost":
+ raise RuntimeError("Must be run from the project root directory.")
-def make_src_package(release: str, outdir: str) -> Tuple[str, str]:
- tarname = f"xgboost-{release}.tar.gz"
- tarpath = os.path.join(outdir, tarname)
- if os.path.exists(tarpath):
- os.remove(tarpath)
+def make_src_tarball(release: str, outdir: Path) -> Tuple[str, str]:
+ tarball_name = f"xgboost-{release}.tar.gz"
+ tarball_path = outdir / tarball_name
+ if tarball_path.exists():
+ tarball_path.unlink()
with tempfile.TemporaryDirectory() as tmpdir_str:
tmpdir = Path(tmpdir_str)
- shutil.copytree(os.path.curdir, tmpdir / "xgboost")
+ shutil.copytree(Path.cwd(), tmpdir / "xgboost")
with DirectoryExcursion(tmpdir / "xgboost"):
- ret = subprocess.run(
+ result = subprocess.run(
["git", "submodule", "foreach", "--quiet", "echo $sm_path"],
+ check=True,
capture_output=True,
+ text=True,
+ encoding="utf-8",
)
- submodules = ret.stdout.decode().strip().split()
+ submodules = result.stdout.strip().split()
for mod in submodules:
- mod_path = os.path.join(os.path.abspath(os.path.curdir), mod, ".git")
- os.remove(mod_path)
+ mod_path = Path.cwd().resolve() / mod / ".git"
+ mod_path.unlink()
shutil.rmtree(".git")
- with tarfile.open(tarpath, "x:gz") as tar:
- src = tmpdir / "xgboost"
- tar.add(src, arcname="xgboost")
+ with tarfile.open(tarball_path, "x:gz") as tar:
+ tar.add(tmpdir / "xgboost", arcname="xgboost")
- with DirectoryExcursion(os.path.dirname(tarpath)):
- ret = subprocess.run(["sha256sum", tarname], capture_output=True)
- h = ret.stdout.decode().strip()
- return tarname, h
+ with DirectoryExcursion(tarball_path.parent):
+ result = subprocess.run(
+ ["sha256sum", tarball_name],
+ check=True,
+ capture_output=True,
+ text=True,
+ encoding="utf-8",
+ )
+ sha256sum = result.stdout.strip()
+ return tarball_name, sha256sum
def release_note(
release: str,
artifact_hashes: List[str],
r_urls: Dict[str, str],
- tarname: str,
- outdir: str,
+ tarball_name: str,
+ outdir: Path,
) -> None:
"""Generate a note for GitHub release description."""
r_gpu_linux_url = r_urls["linux"]
- r_gpu_win64_url = r_urls["win64"]
src_tarball = (
- f"https://github.com/dmlc/xgboost/releases/download/v{release}/{tarname}"
+ f"https://github.com/dmlc/xgboost/releases/download/v{release}/{tarball_name}"
)
hash_note = "\n".join(artifact_hashes)
@@ -265,44 +298,41 @@ def release_note(
**Experimental binary packages for R with CUDA enabled**
* xgboost_r_gpu_linux_{release}.tar.gz: [Download]({r_gpu_linux_url})
-* xgboost_r_gpu_win64_{release}.tar.gz: [Download]({r_gpu_win64_url})
**Source tarball**
* xgboost.tar.gz: [Download]({src_tarball})"""
print(end_note)
- with open(os.path.join(outdir, "end_note.md"), "w") as fd:
- fd.write(end_note)
+ with open(outdir / "end_note.md", "w") as f:
+ f.write(end_note)
def main(args: argparse.Namespace) -> None:
check_path()
- rel = version.parse(args.release)
- assert isinstance(rel, version.Version)
-
- major = rel.major
- minor = rel.minor
- patch = rel.micro
+ release_parsed: version.Version = version.parse(args.release)
+ print(f"Release: {release_parsed}")
- print("Release:", rel)
- if not rel.is_prerelease:
+ major = release_parsed.major
+ minor = release_parsed.minor
+ patch = release_parsed.micro
+ if not release_parsed.is_prerelease:
# Major release
rc: Optional[str] = None
rc_ver: Optional[int] = None
else:
# RC release
- major = rel.major
- minor = rel.minor
- patch = rel.micro
- assert rel.pre is not None
- rc, rc_ver = rel.pre
- assert rc == "rc"
-
- release = str(major) + "." + str(minor) + "." + str(patch)
+ rc, rc_ver = release_parsed.pre
+ if rc != "rc":
+ raise ValueError(
+ "Only supports release candidates with 'rc' in the version string"
+ )
+
+ # Release string with only major, minor, patch components
+ release = f"{major}.{minor}.{patch}"
if args.branch is not None:
branch = args.branch
else:
- branch = "release_" + str(major) + "." + str(minor) + ".0"
+ branch = f"release_{major}.{minor}.0"
git.clean("-xdf")
git.checkout(branch)
@@ -310,33 +340,35 @@ def main(args: argparse.Namespace) -> None:
git.submodule("update")
commit_hash = latest_hash()
- outdir = os.path.abspath(args.outdir)
- if not os.path.exists(outdir):
- os.mkdir(outdir)
+ outdir = Path(args.outdir).resolve()
+ if ROOT in outdir.parents:
+ raise ValueError("Output directory must be outside of the source tree.")
+ outdir.mkdir(exist_ok=True)
+
+ artifact_hashes: List[str] = []
- # source tarball
- hashes: List[str] = []
- tarname, h = make_src_package(release, outdir)
- hashes.append(h)
+ # Source tarball
+ tarball_name, hash = make_src_tarball(release, outdir)
+ artifact_hashes.append(hash)
# CUDA R packages
- urls, hr = download_r_packages(
+ urls, hashes = download_r_artifacts(
release,
branch,
- "" if rc is None else rc + str(rc_ver),
+ "" if rc is None else f"rc{rc_ver}",
commit_hash,
outdir,
)
- hashes.extend(hr)
+ artifact_hashes.extend(hashes)
# Python source wheel
- make_pysrc_wheel(release, rc, rc_ver, outdir)
+ make_python_sdist(release, rc, rc_ver, outdir)
# Python binary wheels
- download_py_packages(branch, major, minor, commit_hash, outdir)
+ download_python_wheels(branch, commit_hash, outdir)
# Write end note
- release_note(release, hashes, urls, tarname, outdir)
+ release_note(release, artifact_hashes, urls, tarball_name, outdir)
if __name__ == "__main__":
diff --git a/include/xgboost/collective/socket.h b/include/xgboost/collective/socket.h
index c5dd977f6255..84885cf317f5 100644
--- a/include/xgboost/collective/socket.h
+++ b/include/xgboost/collective/socket.h
@@ -548,13 +548,10 @@ class TCPSocket {
[[nodiscard]] HandleT const &Handle() const { return handle_; }
/**
* @brief Listen to incoming requests. Should be called after bind.
+ *
+ * Both the default and minimum backlog is set to 256.
*/
- [[nodiscard]] Result Listen(std::int32_t backlog = 16) {
- if (listen(handle_, backlog) != 0) {
- return system::FailWithCode("Failed to listen.");
- }
- return Success();
- }
+ [[nodiscard]] Result Listen(std::int32_t backlog = 256);
/**
* @brief Bind socket to INADDR_ANY, return the port selected by the OS.
*/
diff --git a/include/xgboost/version_config.h b/include/xgboost/version_config.h
index ee4c824d7f61..cb5e2bb2845d 100644
--- a/include/xgboost/version_config.h
+++ b/include/xgboost/version_config.h
@@ -6,6 +6,6 @@
#define XGBOOST_VER_MAJOR 2 /* NOLINT */
#define XGBOOST_VER_MINOR 1 /* NOLINT */
-#define XGBOOST_VER_PATCH 1 /* NOLINT */
+#define XGBOOST_VER_PATCH 2 /* NOLINT */
#endif // XGBOOST_VERSION_CONFIG_H_
diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml
index 53b7333e8fe3..9f647f0bc60f 100644
--- a/jvm-packages/pom.xml
+++ b/jvm-packages/pom.xml
@@ -6,7 +6,7 @@
ml.dmlc
xgboost-jvm_2.12
- 2.1.1
+ 2.1.2
pom
XGBoost JVM Package
JVM Package for XGBoost
diff --git a/jvm-packages/xgboost4j-example/pom.xml b/jvm-packages/xgboost4j-example/pom.xml
index 554a7bf8f4be..52bb7b6ed1ea 100644
--- a/jvm-packages/xgboost4j-example/pom.xml
+++ b/jvm-packages/xgboost4j-example/pom.xml
@@ -6,11 +6,11 @@
ml.dmlc
xgboost-jvm_2.12
- 2.1.1
+ 2.1.2
xgboost4j-example
xgboost4j-example_2.12
- 2.1.1
+ 2.1.2
jar
diff --git a/jvm-packages/xgboost4j-flink/pom.xml b/jvm-packages/xgboost4j-flink/pom.xml
index f24e0426c7ef..da9e79d00269 100644
--- a/jvm-packages/xgboost4j-flink/pom.xml
+++ b/jvm-packages/xgboost4j-flink/pom.xml
@@ -6,12 +6,12 @@
ml.dmlc
xgboost-jvm_2.12
- 2.1.1
+ 2.1.2
xgboost4j-flink
xgboost4j-flink_2.12
- 2.1.1
+ 2.1.2
2.2.0
diff --git a/jvm-packages/xgboost4j-gpu/pom.xml b/jvm-packages/xgboost4j-gpu/pom.xml
index cbc147b8aeff..3c24b82f69cb 100644
--- a/jvm-packages/xgboost4j-gpu/pom.xml
+++ b/jvm-packages/xgboost4j-gpu/pom.xml
@@ -6,11 +6,11 @@
ml.dmlc
xgboost-jvm_2.12
- 2.1.1
+ 2.1.2
xgboost4j-gpu_2.12
xgboost4j-gpu
- 2.1.1
+ 2.1.2
jar
diff --git a/jvm-packages/xgboost4j-spark-gpu/pom.xml b/jvm-packages/xgboost4j-spark-gpu/pom.xml
index dd5e3179c023..934a16a4243d 100644
--- a/jvm-packages/xgboost4j-spark-gpu/pom.xml
+++ b/jvm-packages/xgboost4j-spark-gpu/pom.xml
@@ -6,7 +6,7 @@
ml.dmlc
xgboost-jvm_2.12
- 2.1.1
+ 2.1.2
xgboost4j-spark-gpu
xgboost4j-spark-gpu_2.12
diff --git a/jvm-packages/xgboost4j-spark/pom.xml b/jvm-packages/xgboost4j-spark/pom.xml
index 1858defd5b02..b95c52afd174 100644
--- a/jvm-packages/xgboost4j-spark/pom.xml
+++ b/jvm-packages/xgboost4j-spark/pom.xml
@@ -6,7 +6,7 @@
ml.dmlc
xgboost-jvm_2.12
- 2.1.1
+ 2.1.2
xgboost4j-spark
xgboost4j-spark_2.12
diff --git a/jvm-packages/xgboost4j/pom.xml b/jvm-packages/xgboost4j/pom.xml
index a4b8985c3493..2a5fa17c887e 100644
--- a/jvm-packages/xgboost4j/pom.xml
+++ b/jvm-packages/xgboost4j/pom.xml
@@ -6,11 +6,11 @@
ml.dmlc
xgboost-jvm_2.12
- 2.1.1
+ 2.1.2
xgboost4j
xgboost4j_2.12
- 2.1.1
+ 2.1.2
jar
diff --git a/python-package/hatch_build.py b/python-package/hatch_build.py
index 925c917b967f..d81a21cd55d6 100644
--- a/python-package/hatch_build.py
+++ b/python-package/hatch_build.py
@@ -3,16 +3,16 @@
Here, we customize the tag of the generated wheels.
"""
-import sysconfig
from typing import Any, Dict
from hatchling.builders.hooks.plugin.interface import BuildHookInterface
+from packaging.tags import platform_tags
def get_tag() -> str:
"""Get appropriate wheel tag according to system"""
- tag_platform = sysconfig.get_platform().replace("-", "_").replace(".", "_")
- return f"py3-none-{tag_platform}"
+ platform_tag = next(platform_tags())
+ return f"py3-none-{platform_tag}"
class CustomBuildHook(BuildHookInterface):
diff --git a/python-package/pyproject.toml b/python-package/pyproject.toml
index 05bbe998c44a..de67edc53b3c 100644
--- a/python-package/pyproject.toml
+++ b/python-package/pyproject.toml
@@ -1,6 +1,7 @@
[build-system]
requires = [
- "hatchling>=1.12.1"
+ "hatchling>=1.12.1",
+ "packaging>=21.3",
]
backend-path = ["."]
build-backend = "packager.pep517"
@@ -13,7 +14,7 @@ authors = [
{ name = "Hyunsu Cho", email = "chohyu01@cs.washington.edu" },
{ name = "Jiaming Yuan", email = "jm.yuan@outlook.com" }
]
-version = "2.1.1"
+version = "2.1.2"
requires-python = ">=3.8"
license = { text = "Apache-2.0" }
classifiers = [
@@ -30,7 +31,7 @@ classifiers = [
dependencies = [
"numpy",
"scipy",
- "nvidia-nccl-cu12 ; platform_system == 'Linux' and platform_machine != 'aarch64'"
+ "nvidia-nccl-cu12 ; platform_system == 'Linux' and platform_machine != 'aarch64'",
]
[project.urls]
diff --git a/python-package/xgboost/VERSION b/python-package/xgboost/VERSION
index 3e3c2f1e5edb..eca07e4c1a8c 100644
--- a/python-package/xgboost/VERSION
+++ b/python-package/xgboost/VERSION
@@ -1 +1 @@
-2.1.1
+2.1.2
diff --git a/python-package/xgboost/data.py b/python-package/xgboost/data.py
index 7e0ae793ba6e..bd196e2e59f9 100644
--- a/python-package/xgboost/data.py
+++ b/python-package/xgboost/data.py
@@ -458,7 +458,7 @@ def pandas_pa_type(ser: Any) -> np.ndarray:
# combine_chunks takes the most significant amount of time
chunk: pa.Array = aa.combine_chunks()
# When there's null value, we have to use copy
- zero_copy = chunk.null_count == 0
+ zero_copy = chunk.null_count == 0 and not pa.types.is_boolean(chunk.type)
# Alternately, we can use chunk.buffers(), which returns a list of buffers and
# we need to concatenate them ourselves.
# FIXME(jiamingy): Is there a better way to access the arrow buffer along with
@@ -825,37 +825,9 @@ def _arrow_transform(data: DataType) -> Any:
data = cast(pa.Table, data)
- def type_mapper(dtype: pa.DataType) -> Optional[str]:
- """Maps pyarrow type to pandas arrow extension type."""
- if pa.types.is_int8(dtype):
- return pd.ArrowDtype(pa.int8())
- if pa.types.is_int16(dtype):
- return pd.ArrowDtype(pa.int16())
- if pa.types.is_int32(dtype):
- return pd.ArrowDtype(pa.int32())
- if pa.types.is_int64(dtype):
- return pd.ArrowDtype(pa.int64())
- if pa.types.is_uint8(dtype):
- return pd.ArrowDtype(pa.uint8())
- if pa.types.is_uint16(dtype):
- return pd.ArrowDtype(pa.uint16())
- if pa.types.is_uint32(dtype):
- return pd.ArrowDtype(pa.uint32())
- if pa.types.is_uint64(dtype):
- return pd.ArrowDtype(pa.uint64())
- if pa.types.is_float16(dtype):
- return pd.ArrowDtype(pa.float16())
- if pa.types.is_float32(dtype):
- return pd.ArrowDtype(pa.float32())
- if pa.types.is_float64(dtype):
- return pd.ArrowDtype(pa.float64())
- if pa.types.is_boolean(dtype):
- return pd.ArrowDtype(pa.bool_())
- return None
-
# For common cases, this is zero-copy, can check with:
# pa.total_allocated_bytes()
- df = data.to_pandas(types_mapper=type_mapper)
+ df = data.to_pandas(types_mapper=pd.ArrowDtype)
return df
diff --git a/python-package/xgboost/testing/data.py b/python-package/xgboost/testing/data.py
index 0f2d65cc0afa..4071219c44ef 100644
--- a/python-package/xgboost/testing/data.py
+++ b/python-package/xgboost/testing/data.py
@@ -165,10 +165,6 @@ def pd_arrow_dtypes() -> Generator:
# Integer
dtypes = pandas_pyarrow_mapper
- Null: Union[float, None, Any] = np.nan
- orig = pd.DataFrame(
- {"f0": [1, 2, Null, 3], "f1": [4, 3, Null, 1]}, dtype=np.float32
- )
# Create a dictionary-backed dataframe, enable this when the roundtrip is
# implemented in pandas/pyarrow
#
@@ -191,24 +187,33 @@ def pd_arrow_dtypes() -> Generator:
# pd_catcodes = pd_cat_df["f1"].cat.codes
# assert pd_catcodes.equals(pa_catcodes)
- for Null in (None, pd.NA):
+ for Null in (None, pd.NA, 0):
for dtype in dtypes:
if dtype.startswith("float16") or dtype.startswith("bool"):
continue
+ # Use np.nan is a baseline
+ orig_null = Null if not pd.isna(Null) and Null == 0 else np.nan
+ orig = pd.DataFrame(
+ {"f0": [1, 2, orig_null, 3], "f1": [4, 3, orig_null, 1]},
+ dtype=np.float32,
+ )
+
df = pd.DataFrame(
{"f0": [1, 2, Null, 3], "f1": [4, 3, Null, 1]}, dtype=dtype
)
yield orig, df
- orig = pd.DataFrame(
- {"f0": [True, False, pd.NA, True], "f1": [False, True, pd.NA, True]},
- dtype=pd.BooleanDtype(),
- )
- df = pd.DataFrame(
- {"f0": [True, False, pd.NA, True], "f1": [False, True, pd.NA, True]},
- dtype=pd.ArrowDtype(pa.bool_()),
- )
- yield orig, df
+ # If Null is `False`, then there's no missing value.
+ for Null in (pd.NA, False):
+ orig = pd.DataFrame(
+ {"f0": [True, False, Null, True], "f1": [False, True, Null, True]},
+ dtype=pd.BooleanDtype(),
+ )
+ df = pd.DataFrame(
+ {"f0": [True, False, Null, True], "f1": [False, True, Null, True]},
+ dtype=pd.ArrowDtype(pa.bool_()),
+ )
+ yield orig, df
def check_inf(rng: RNG) -> None:
diff --git a/rabit/include/rabit/internal/socket.h b/rabit/include/rabit/internal/socket.h
index 97fb3d4be497..7eeda3181ff6 100644
--- a/rabit/include/rabit/internal/socket.h
+++ b/rabit/include/rabit/internal/socket.h
@@ -35,7 +35,7 @@
#if !defined(_WIN32)
-#include
+#include
using SOCKET = int;
using sock_size_t = size_t; // NOLINT
diff --git a/src/collective/socket.cc b/src/collective/socket.cc
index 5145c13a1366..dd6c05e6f964 100644
--- a/src/collective/socket.cc
+++ b/src/collective/socket.cc
@@ -3,6 +3,7 @@
*/
#include "xgboost/collective/socket.h"
+#include // for max
#include // for array
#include // std::size_t
#include // std::int32_t
@@ -58,6 +59,14 @@ SockAddrV4 SockAddrV4::InaddrAny() { return MakeSockAddress("0.0.0.0", 0).V4();
SockAddrV6 SockAddrV6::Loopback() { return MakeSockAddress("::1", 0).V6(); }
SockAddrV6 SockAddrV6::InaddrAny() { return MakeSockAddress("::", 0).V6(); }
+[[nodiscard]] Result TCPSocket::Listen(std::int32_t backlog) {
+ backlog = std::max(backlog, 256);
+ if (listen(this->handle_, backlog) != 0) {
+ return system::FailWithCode("Failed to listen.");
+ }
+ return Success();
+}
+
std::size_t TCPSocket::Send(StringView str) {
CHECK(!this->IsClosed());
CHECK_LT(str.size(), std::numeric_limits::max());
diff --git a/src/collective/tracker.cc b/src/collective/tracker.cc
index c8776f294690..9bffbc5590e5 100644
--- a/src/collective/tracker.cc
+++ b/src/collective/tracker.cc
@@ -120,7 +120,8 @@ RabitTracker::RabitTracker(Json const& config) : Tracker{config} {
listener_ = TCPSocket::Create(addr.IsV4() ? SockDomain::kV4 : SockDomain::kV6);
return listener_.Bind(host_, &this->port_);
} << [&] {
- return listener_.Listen();
+ CHECK_GT(this->n_workers_, 0);
+ return listener_.Listen(this->n_workers_);
};
SafeColl(rc);
}
diff --git a/src/common/bitfield.h b/src/common/bitfield.h
index 62107876490f..6ecd7fcdf5a0 100644
--- a/src/common/bitfield.h
+++ b/src/common/bitfield.h
@@ -108,9 +108,11 @@ struct BitFieldContainer {
#if defined(__CUDA_ARCH__)
__device__ BitFieldContainer& operator|=(BitFieldContainer const& rhs) {
auto tid = blockIdx.x * blockDim.x + threadIdx.x;
- size_t min_size = min(NumValues(), rhs.NumValues());
+ std::size_t min_size = std::min(this->Capacity(), rhs.Capacity());
if (tid < min_size) {
- Data()[tid] |= rhs.Data()[tid];
+ if (this->Check(tid) || rhs.Check(tid)) {
+ this->Set(tid);
+ }
}
return *this;
}
@@ -126,16 +128,20 @@ struct BitFieldContainer {
#if defined(__CUDA_ARCH__)
__device__ BitFieldContainer& operator&=(BitFieldContainer const& rhs) {
- size_t min_size = min(NumValues(), rhs.NumValues());
auto tid = blockIdx.x * blockDim.x + threadIdx.x;
+ std::size_t min_size = std::min(this->Capacity(), rhs.Capacity());
if (tid < min_size) {
- Data()[tid] &= rhs.Data()[tid];
+ if (this->Check(tid) && rhs.Check(tid)) {
+ this->Set(tid);
+ } else {
+ this->Clear(tid);
+ }
}
return *this;
}
#else
BitFieldContainer& operator&=(BitFieldContainer const& rhs) {
- size_t min_size = std::min(NumValues(), rhs.NumValues());
+ std::size_t min_size = std::min(NumValues(), rhs.NumValues());
for (size_t i = 0; i < min_size; ++i) {
Data()[i] &= rhs.Data()[i];
}
diff --git a/src/common/device_helpers.cuh b/src/common/device_helpers.cuh
index f4fce42f84f8..03b8f2c0a40c 100644
--- a/src/common/device_helpers.cuh
+++ b/src/common/device_helpers.cuh
@@ -224,13 +224,6 @@ __global__ void LaunchNKernel(size_t begin, size_t end, L lambda) {
lambda(i);
}
}
-template
-__global__ void LaunchNKernel(int device_idx, size_t begin, size_t end,
- L lambda) {
- for (auto i : GridStrideRange(begin, end)) {
- lambda(i, device_idx);
- }
-}
/* \brief A wrapper around kernel launching syntax, used to guard against empty input.
*
diff --git a/src/common/io.cc b/src/common/io.cc
index 1715669b091a..b472de45ab4b 100644
--- a/src/common/io.cc
+++ b/src/common/io.cc
@@ -15,7 +15,7 @@
#if defined(__unix__) || defined(__APPLE__)
#include // for open, O_RDONLY
-#include // for mmap, mmap64, munmap
+#include // for mmap, munmap, madvise
#include // for close, getpagesize
#elif defined(xgboost_IS_WIN)
#define WIN32_LEAN_AND_MEAN
@@ -233,9 +233,9 @@ std::unique_ptr Open(std::string path, std::size_t offset, std::size_t
#if defined(__linux__) || defined(__GLIBC__)
int prot{PROT_READ};
- ptr = reinterpret_cast(mmap64(nullptr, view_size, prot, MAP_PRIVATE, fd, view_start));
- madvise(ptr, view_size, MADV_WILLNEED);
+ ptr = reinterpret_cast(mmap(nullptr, view_size, prot, MAP_PRIVATE, fd, view_start));
CHECK_NE(ptr, MAP_FAILED) << "Failed to map: " << path << ". " << SystemErrorMsg();
+ madvise(ptr, view_size, MADV_WILLNEED);
auto handle =
std::make_unique(fd, ptr, view_size, offset - view_start, std::move(path));
#elif defined(xgboost_IS_WIN)
diff --git a/src/common/threading_utils.cc b/src/common/threading_utils.cc
index 1f4d5be2f361..46a007e3c750 100644
--- a/src/common/threading_utils.cc
+++ b/src/common/threading_utils.cc
@@ -3,7 +3,7 @@
*/
#include "threading_utils.h"
-#include // for max
+#include // for max, min
#include // for exception
#include // for path, exists
#include // for ifstream
@@ -99,17 +99,18 @@ std::int32_t GetCfsCPUCount() noexcept {
return -1;
}
-std::int32_t OmpGetNumThreads(std::int32_t n_threads) {
+std::int32_t OmpGetNumThreads(std::int32_t n_threads) noexcept(true) {
// Don't use parallel if we are in a parallel region.
if (omp_in_parallel()) {
return 1;
}
+ // Honor the openmp thread limit, which can be set via environment variable.
+ auto max_n_threads = std::min({omp_get_num_procs(), omp_get_max_threads(), OmpGetThreadLimit()});
// If -1 or 0 is specified by the user, we default to maximum number of threads.
if (n_threads <= 0) {
- n_threads = std::min(omp_get_num_procs(), omp_get_max_threads());
+ n_threads = max_n_threads;
}
- // Honor the openmp thread limit, which can be set via environment variable.
- n_threads = std::min(n_threads, OmpGetThreadLimit());
+ n_threads = std::min(n_threads, max_n_threads);
n_threads = std::max(n_threads, 1);
return n_threads;
}
diff --git a/src/common/threading_utils.h b/src/common/threading_utils.h
index ac71190353a7..38db8e3a5f99 100644
--- a/src/common/threading_utils.h
+++ b/src/common/threading_utils.h
@@ -257,9 +257,9 @@ inline std::int32_t OmpGetThreadLimit() {
std::int32_t GetCfsCPUCount() noexcept;
/**
- * \brief Get the number of available threads based on n_threads specified by users.
+ * @brief Get the number of available threads based on n_threads specified by users.
*/
-std::int32_t OmpGetNumThreads(std::int32_t n_threads);
+std::int32_t OmpGetNumThreads(std::int32_t n_threads) noexcept(true);
/*!
* \brief A C-style array with in-stack allocation. As long as the array is smaller than
diff --git a/src/data/ellpack_page.cu b/src/data/ellpack_page.cu
index d9ea85919bd8..b96db053edf1 100644
--- a/src/data/ellpack_page.cu
+++ b/src/data/ellpack_page.cu
@@ -70,7 +70,7 @@ __global__ void CompressBinEllpackKernel(
// {feature_cuts, ncuts} forms the array of cuts of `feature'.
const float* feature_cuts = &cuts[cut_ptrs[feature]];
int ncuts = cut_ptrs[feature + 1] - cut_ptrs[feature];
- bool is_cat = common::IsCat(feature_types, ifeature);
+ bool is_cat = common::IsCat(feature_types, feature);
// Assigning the bin in current entry.
// S.t.: fvalue < feature_cuts[bin]
if (is_cat) {
diff --git a/src/objective/lambdarank_obj.cc b/src/objective/lambdarank_obj.cc
index 36495d0caa88..e9a49c6a76b9 100644
--- a/src/objective/lambdarank_obj.cc
+++ b/src/objective/lambdarank_obj.cc
@@ -314,7 +314,7 @@ class LambdaRankObj : public FitIntercept {
CHECK_EQ(info.weights_.Size(), n_groups) << error::GroupWeight();
}
- if (ti_plus_.Size() == 0 && param_.lambdarank_unbiased) {
+ if ((ti_plus_.Empty() || li_full_.Empty()) && param_.lambdarank_unbiased) {
CHECK_EQ(iter, 0);
ti_plus_ = linalg::Constant(ctx_, 1.0, p_cache_->MaxPositionSize());
tj_minus_ = linalg::Constant(ctx_, 1.0, p_cache_->MaxPositionSize());
diff --git a/src/tree/constraints.cu b/src/tree/constraints.cu
index ae1d3073c7cc..26ff9f45478d 100644
--- a/src/tree/constraints.cu
+++ b/src/tree/constraints.cu
@@ -6,7 +6,6 @@
#include
#include
-#include
#include
#include
@@ -279,10 +278,6 @@ __global__ void InteractionConstraintSplitKernel(LBitField64 feature,
}
// enable constraints from feature
node |= feature;
- // clear the buffer after use
- if (tid < feature.Capacity()) {
- feature.Clear(tid);
- }
// enable constraints from parent
left |= node;
@@ -304,7 +299,7 @@ void FeatureInteractionConstraintDevice::Split(
<< " Split node: " << node_id << " and its left child: "
<< left_id << " cannot be the same.";
CHECK_NE(node_id, right_id)
- << " Split node: " << node_id << " and its left child: "
+ << " Split node: " << node_id << " and its right child: "
<< right_id << " cannot be the same.";
CHECK_LT(right_id, s_node_constraints_.size());
CHECK_NE(s_node_constraints_.size(), 0);
@@ -330,6 +325,8 @@ void FeatureInteractionConstraintDevice::Split(
feature_buffer_,
feature_id,
node, left, right);
-}
+ // clear the buffer after use
+ thrust::fill_n(thrust::device, feature_buffer_.Data(), feature_buffer_.NumValues(), 0);
+}
} // namespace xgboost
diff --git a/src/tree/gpu_hist/row_partitioner.cuh b/src/tree/gpu_hist/row_partitioner.cuh
index fde6c4dd0fa9..7bc5a8338078 100644
--- a/src/tree/gpu_hist/row_partitioner.cuh
+++ b/src/tree/gpu_hist/row_partitioner.cuh
@@ -134,22 +134,23 @@ void SortPositionBatch(common::Span> d_batch_info,
});
size_t temp_bytes = 0;
if (tmp->empty()) {
- cub::DeviceScan::InclusiveScan(nullptr, temp_bytes, input_iterator, discard_write_iterator,
- IndexFlagOp(), total_rows);
+ dh::safe_cuda(cub::DeviceScan::InclusiveScan(
+ nullptr, temp_bytes, input_iterator, discard_write_iterator, IndexFlagOp(), total_rows));
tmp->resize(temp_bytes);
}
temp_bytes = tmp->size();
- cub::DeviceScan::InclusiveScan(tmp->data().get(), temp_bytes, input_iterator,
- discard_write_iterator, IndexFlagOp(), total_rows);
+ dh::safe_cuda(cub::DeviceScan::InclusiveScan(tmp->data().get(), temp_bytes, input_iterator,
+ discard_write_iterator, IndexFlagOp(), total_rows));
constexpr int kBlockSize = 256;
// Value found by experimentation
const int kItemsThread = 12;
- const int grid_size = xgboost::common::DivRoundUp(total_rows, kBlockSize * kItemsThread);
- SortPositionCopyKernel
- <<>>(batch_info_itr, ridx, ridx_tmp, total_rows);
+ std::uint32_t const kGridSize =
+ xgboost::common::DivRoundUp(total_rows, kBlockSize * kItemsThread);
+ dh::LaunchKernel{kGridSize, kBlockSize, 0}(SortPositionCopyKernel,
+ batch_info_itr, ridx, ridx_tmp, total_rows);
}
struct NodePositionInfo {
@@ -328,11 +329,13 @@ class RowPartitioner {
sizeof(NodePositionInfo) * ridx_segments_.size(),
cudaMemcpyDefault));
- constexpr int kBlockSize = 512;
+ constexpr std::uint32_t kBlockSize = 512;
const int kItemsThread = 8;
- const int grid_size = xgboost::common::DivRoundUp(ridx_.size(), kBlockSize * kItemsThread);
+ const std::uint32_t grid_size =
+ xgboost::common::DivRoundUp(ridx_.size(), kBlockSize * kItemsThread);
common::Span d_ridx(ridx_.data().get(), ridx_.size());
- FinalisePositionKernel<<>>(
+ dh::LaunchKernel{grid_size, kBlockSize}(
+ FinalisePositionKernel,
dh::ToSpan(d_node_info_storage), d_ridx, d_out_position, op);
}
};
diff --git a/tests/buildkite/remove_nccl_dep.patch b/tests/buildkite/remove_nccl_dep.patch
index a2a4a5c88289..c5a8fe3acee1 100644
--- a/tests/buildkite/remove_nccl_dep.patch
+++ b/tests/buildkite/remove_nccl_dep.patch
@@ -1,14 +1,12 @@
diff --git python-package/pyproject.toml python-package/pyproject.toml
-index 8835def25..46c1451c2 100644
+index 20d3f9974..953087ff4 100644
--- python-package/pyproject.toml
+++ python-package/pyproject.toml
-@@ -30,8 +30,7 @@ classifiers = [
- ]
+@@ -30,7 +30,6 @@ classifiers = [
dependencies = [
"numpy",
-- "scipy",
-- "nvidia-nccl-cu12 ; platform_system == 'Linux' and platform_machine != 'aarch64'"
-+ "scipy"
+ "scipy",
+- "nvidia-nccl-cu12 ; platform_system == 'Linux' and platform_machine != 'aarch64'",
]
[project.urls]
diff --git a/tests/ci_build/Dockerfile.gpu b/tests/ci_build/Dockerfile.gpu
index d7b7084e7755..eab55672ec32 100644
--- a/tests/ci_build/Dockerfile.gpu
+++ b/tests/ci_build/Dockerfile.gpu
@@ -27,7 +27,7 @@ RUN \
"nccl>=${NCCL_SHORT_VER}" \
dask \
dask-cuda=$RAPIDS_VERSION_ARG* dask-cudf=$RAPIDS_VERSION_ARG* cupy \
- numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis \
+ numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz "hypothesis<=6.112" \
"pyspark>=3.4.0" cloudpickle cuda-python && \
mamba clean --all && \
conda run --no-capture-output -n gpu_test pip install buildkite-test-collector
diff --git a/tests/ci_build/conda_env/aarch64_test.yml b/tests/ci_build/conda_env/aarch64_test.yml
index 2af0324c9770..207face0187a 100644
--- a/tests/ci_build/conda_env/aarch64_test.yml
+++ b/tests/ci_build/conda_env/aarch64_test.yml
@@ -14,7 +14,7 @@ dependencies:
- matplotlib
- dask
- distributed
-- hypothesis
+- hypothesis<=6.112
- graphviz
- python-graphviz
- codecov
diff --git a/tests/ci_build/conda_env/linux_cpu_test.yml b/tests/ci_build/conda_env/linux_cpu_test.yml
index fd630c85a07f..b2120d981c2e 100644
--- a/tests/ci_build/conda_env/linux_cpu_test.yml
+++ b/tests/ci_build/conda_env/linux_cpu_test.yml
@@ -20,7 +20,7 @@ dependencies:
- dask
- distributed
- python-graphviz
-- hypothesis>=6.46
+- hypothesis<=6.112
- astroid
- sh
- mock
diff --git a/tests/ci_build/conda_env/linux_sycl_test.yml b/tests/ci_build/conda_env/linux_sycl_test.yml
index edac720c34f5..802f32e553c4 100644
--- a/tests/ci_build/conda_env/linux_sycl_test.yml
+++ b/tests/ci_build/conda_env/linux_sycl_test.yml
@@ -13,7 +13,7 @@ dependencies:
- scipy
- scikit-learn
- pandas
-- hypothesis>=6.46
+- hypothesis<=6.112
- pytest
- pytest-timeout
- pytest-cov
diff --git a/tests/ci_build/conda_env/macos_cpu_test.yml b/tests/ci_build/conda_env/macos_cpu_test.yml
index e2e377e2145d..a5eaa253f186 100644
--- a/tests/ci_build/conda_env/macos_cpu_test.yml
+++ b/tests/ci_build/conda_env/macos_cpu_test.yml
@@ -18,7 +18,7 @@ dependencies:
- distributed
- graphviz
- python-graphviz
-- hypothesis
+- hypothesis<=6.112
- astroid
- sphinx
- sh
diff --git a/tests/ci_build/conda_env/python_lint.yml b/tests/ci_build/conda_env/python_lint.yml
index dc5105a84fab..fb3af3aa3cb6 100644
--- a/tests/ci_build/conda_env/python_lint.yml
+++ b/tests/ci_build/conda_env/python_lint.yml
@@ -17,6 +17,6 @@ dependencies:
- isort
- cloudpickle
- pytest
-- hypothesis
+- hypothesis<=6.112
- hatchling
- pyspark>=3.4.0
diff --git a/tests/ci_build/conda_env/win64_cpu_test.yml b/tests/ci_build/conda_env/win64_cpu_test.yml
index 7789e94a6fcb..81fb3b1c73f3 100644
--- a/tests/ci_build/conda_env/win64_cpu_test.yml
+++ b/tests/ci_build/conda_env/win64_cpu_test.yml
@@ -14,7 +14,7 @@ dependencies:
- python-graphviz
- pytest
- jsonschema
-- hypothesis
+- hypothesis<=6.112
- python-graphviz
- pip
- py-ubjson
diff --git a/tests/ci_build/conda_env/win64_test.yml b/tests/ci_build/conda_env/win64_test.yml
index 3f62c034c6e0..f131c6585026 100644
--- a/tests/ci_build/conda_env/win64_test.yml
+++ b/tests/ci_build/conda_env/win64_test.yml
@@ -10,7 +10,7 @@ dependencies:
- pandas
- pytest
- boto3
-- hypothesis
+- hypothesis<=6.112
- jsonschema
- cupy
- python-graphviz
diff --git a/tests/cpp/common/test_threading_utils.cc b/tests/cpp/common/test_threading_utils.cc
index 2b1a2580a90a..844adbc56477 100644
--- a/tests/cpp/common/test_threading_utils.cc
+++ b/tests/cpp/common/test_threading_utils.cc
@@ -1,17 +1,16 @@
/**
- * Copyright 2019-2023 by XGBoost Contributors
+ * Copyright 2019-2024, XGBoost Contributors
*/
#include
#include // std::size_t
+#include // for std::thread
#include "../../../src/common/threading_utils.h" // BlockedSpace2d,ParallelFor2d,ParallelFor
#include "dmlc/omp.h" // omp_in_parallel
#include "xgboost/context.h" // Context
-namespace xgboost {
-namespace common {
-
+namespace xgboost::common {
TEST(ParallelFor2d, CreateBlockedSpace2d) {
constexpr size_t kDim1 = 5;
constexpr size_t kDim2 = 3;
@@ -102,5 +101,14 @@ TEST(ParallelFor, Basic) {
});
ASSERT_FALSE(omp_in_parallel());
}
-} // namespace common
-} // namespace xgboost
+
+TEST(OmpGetNumThreads, Max) {
+#if defined(_OPENMP)
+ auto n_threads = OmpGetNumThreads(1 << 18);
+ ASSERT_LE(n_threads, std::thread::hardware_concurrency()); // le due to container
+ n_threads = OmpGetNumThreads(0);
+ ASSERT_GE(n_threads, 1);
+ ASSERT_LE(n_threads, std::thread::hardware_concurrency());
+#endif
+}
+} // namespace xgboost::common
diff --git a/tests/cpp/data/test_ellpack_page.cu b/tests/cpp/data/test_ellpack_page.cu
index ab4539fd411d..c3ef7983418f 100644
--- a/tests/cpp/data/test_ellpack_page.cu
+++ b/tests/cpp/data/test_ellpack_page.cu
@@ -5,11 +5,13 @@
#include
-#include "../../../src/common/categorical.h"
+#include "../../../src/common/categorical.h" // for AsCat
+#include "../../../src/common/compressed_iterator.h" // for CompressedByteT
#include "../../../src/common/hist_util.h"
#include "../../../src/data/ellpack_page.cuh"
#include "../../../src/data/ellpack_page.h"
-#include "../../../src/tree/param.h" // TrainParam
+#include "../../../src/data/gradient_index.h" // for GHistIndexMatrix
+#include "../../../src/tree/param.h" // TrainParam
#include "../helpers.h"
#include "../histogram_helpers.h"
#include "gtest/gtest.h"
@@ -91,7 +93,7 @@ TEST(EllpackPage, FromCategoricalBasic) {
auto& h_ft = m->Info().feature_types.HostVector();
h_ft.resize(kCols, FeatureType::kCategorical);
- Context ctx{MakeCUDACtx(0)};
+ auto ctx = MakeCUDACtx(0);
auto p = BatchParam{max_bins, tree::TrainParam::DftSparseThreshold()};
auto ellpack = EllpackPage(&ctx, m.get(), p);
auto accessor = ellpack.Impl()->GetDeviceAccessor(FstCU());
@@ -122,6 +124,37 @@ TEST(EllpackPage, FromCategoricalBasic) {
}
}
+TEST(EllpackPage, FromCategoricalMissing) {
+ auto ctx = MakeCUDACtx(0);
+
+ std::shared_ptr cuts;
+ auto nan = std::numeric_limits::quiet_NaN();
+ // 2 rows and 3 columns. The second column is nan, row_stride is 2.
+ std::vector data{{0.1, nan, 1, 0.2, nan, 0}};
+ auto p_fmat = GetDMatrixFromData(data, 2, 3);
+ p_fmat->Info().feature_types.HostVector() = {FeatureType::kNumerical, FeatureType::kNumerical,
+ FeatureType::kCategorical};
+ p_fmat->Info().feature_types.SetDevice(ctx.Device());
+
+ auto p = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
+ for (auto const& page : p_fmat->GetBatches(&ctx, p)) {
+ cuts = std::make_shared(page.Cuts());
+ }
+ cuts->cut_ptrs_.SetDevice(ctx.Device());
+ cuts->cut_values_.SetDevice(ctx.Device());
+ cuts->min_vals_.SetDevice(ctx.Device());
+ for (auto const& page : p_fmat->GetBatches(&ctx, p)) {
+ std::vector h_buffer;
+ auto h_acc = page.Impl()->GetHostAccessor(p_fmat->Info().feature_types.ConstDeviceSpan());
+ ASSERT_EQ(h_acc.n_rows, 2);
+ ASSERT_EQ(h_acc.row_stride, 2);
+ ASSERT_EQ(h_acc.gidx_iter[0], 0);
+ ASSERT_EQ(h_acc.gidx_iter[1], 4); // cat 1
+ ASSERT_EQ(h_acc.gidx_iter[2], 1);
+ ASSERT_EQ(h_acc.gidx_iter[3], 3); // cat 0
+ }
+}
+
struct ReadRowFunction {
EllpackDeviceAccessor matrix;
int row;
diff --git a/tests/cpp/tree/gpu_hist/test_row_partitioner.cu b/tests/cpp/tree/gpu_hist/test_row_partitioner.cu
index 14ea6fd70a4e..54bf17247432 100644
--- a/tests/cpp/tree/gpu_hist/test_row_partitioner.cu
+++ b/tests/cpp/tree/gpu_hist/test_row_partitioner.cu
@@ -6,15 +6,12 @@
#include
#include
-#include
#include
#include "../../../../src/tree/gpu_hist/row_partitioner.cuh"
#include "../../helpers.h"
#include "xgboost/base.h"
-#include "xgboost/context.h"
-#include "xgboost/task.h"
-#include "xgboost/tree_model.h"
+#include "../../helpers.h" // for RandomDataGenerator
namespace xgboost::tree {
void TestUpdatePositionBatch() {
@@ -55,7 +52,9 @@ void TestSortPositionBatch(const std::vector& ridx_in, const std::vector ridx_tmp(ridx_in.size());
thrust::device_vector counts(segments.size());
- auto op = [=] __device__(auto ridx, int split_index, int data) { return ridx % 2 == 0; };
+ auto op = [=] __device__(auto ridx, int split_index, int data) {
+ return ridx % 2 == 0;
+ };
std::vector op_data(segments.size());
std::vector> h_batch_info(segments.size());
dh::TemporaryArray> d_batch_info(segments.size());
@@ -73,7 +72,9 @@ void TestSortPositionBatch(const std::vector& ridx_in, const std::vector& ridx_in, const std::vector
#include
#include
-#include
-#include
-#include
+
+#include
#include
+#include
+
+#include "../../../src/common/device_helpers.cuh"
#include "../../../src/tree/constraints.cuh"
#include "../../../src/tree/param.h"
-#include "../../../src/common/device_helpers.cuh"
namespace xgboost {
namespace {
@@ -36,9 +37,7 @@ std::string GetConstraintsStr() {
}
tree::TrainParam GetParameter() {
- std::vector> args{
- {"interaction_constraints", GetConstraintsStr()}
- };
+ Args args{{"interaction_constraints", GetConstraintsStr()}};
tree::TrainParam param;
param.Init(args);
return param;
diff --git a/tests/python/test_ranking.py b/tests/python/test_ranking.py
index 49508f594c52..d93d48a90561 100644
--- a/tests/python/test_ranking.py
+++ b/tests/python/test_ranking.py
@@ -202,6 +202,12 @@ def after_training(self, model) -> bool:
# less biased on low ranks.
assert df["ti+"].iloc[-1] < df["ti+"].iloc[0]
+ # Training continuation
+ ltr.fit(x, c, qid=q, eval_set=[(x, c)], eval_qid=[q], xgb_model=ltr)
+ # normalized
+ np.testing.assert_allclose(df["ti+"].iloc[0], 1.0)
+ np.testing.assert_allclose(df["tj-"].iloc[0], 1.0)
+
def test_normalization() -> None:
run_normalization("cpu")