diff --git a/CMakeLists.txt b/CMakeLists.txt index 2e7f16864805..db99d9a9d609 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ if(PLUGIN_SYCL) string(REPLACE " -isystem ${CONDA_PREFIX}/include" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") endif() -project(xgboost LANGUAGES CXX C VERSION 2.1.1) +project(xgboost LANGUAGES CXX C VERSION 2.1.2) include(cmake/Utils.cmake) list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules") diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index db5290334b49..f6dbf02cbaec 100644 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -1,8 +1,8 @@ Package: xgboost Type: Package Title: Extreme Gradient Boosting -Version: 2.1.1.1 -Date: 2024-07-30 +Version: 2.1.2.1 +Date: 2024-10-23 Authors@R: c( person("Tianqi", "Chen", role = c("aut"), email = "tianqi.tchen@gmail.com"), diff --git a/R-package/configure b/R-package/configure index f6deab049b2d..4d7f4ab5c906 100755 --- a/R-package/configure +++ b/R-package/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.71 for xgboost 2.1.1. +# Generated by GNU Autoconf 2.71 for xgboost 2.1.2. # # # Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation, @@ -607,8 +607,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='xgboost' PACKAGE_TARNAME='xgboost' -PACKAGE_VERSION='2.1.1' -PACKAGE_STRING='xgboost 2.1.1' +PACKAGE_VERSION='2.1.2' +PACKAGE_STRING='xgboost 2.1.2' PACKAGE_BUGREPORT='' PACKAGE_URL='' @@ -1259,7 +1259,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures xgboost 2.1.1 to adapt to many kinds of systems. +\`configure' configures xgboost 2.1.2 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1321,7 +1321,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of xgboost 2.1.1:";; + short | recursive ) echo "Configuration of xgboost 2.1.2:";; esac cat <<\_ACEOF @@ -1404,7 +1404,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -xgboost configure 2.1.1 +xgboost configure 2.1.2 generated by GNU Autoconf 2.71 Copyright (C) 2021 Free Software Foundation, Inc. @@ -1603,7 +1603,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by xgboost $as_me 2.1.1, which was +It was created by xgboost $as_me 2.1.2, which was generated by GNU Autoconf 2.71. Invocation command line was $ $0$ac_configure_args_raw @@ -2732,11 +2732,11 @@ if test x$ac_prog_cxx_stdcxx = xno then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CXX option to enable C++11 features" >&5 printf %s "checking for $CXX option to enable C++11 features... " >&6; } -if test ${ac_cv_prog_cxx_11+y} +if test ${ac_cv_prog_cxx_cxx11+y} then : printf %s "(cached) " >&6 else $as_nop - ac_cv_prog_cxx_11=no + ac_cv_prog_cxx_cxx11=no ac_save_CXX=$CXX cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -2778,11 +2778,11 @@ if test x$ac_prog_cxx_stdcxx = xno then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CXX option to enable C++98 features" >&5 printf %s "checking for $CXX option to enable C++98 features... " >&6; } -if test ${ac_cv_prog_cxx_98+y} +if test ${ac_cv_prog_cxx_cxx98+y} then : printf %s "(cached) " >&6 else $as_nop - ac_cv_prog_cxx_98=no + ac_cv_prog_cxx_cxx98=no ac_save_CXX=$CXX cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -3709,7 +3709,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by xgboost $as_me 2.1.1, which was +This file was extended by xgboost $as_me 2.1.2, which was generated by GNU Autoconf 2.71. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -3773,7 +3773,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config='$ac_cs_config_escaped' ac_cs_version="\\ -xgboost config.status 2.1.1 +xgboost config.status 2.1.2 configured by $0, generated by GNU Autoconf 2.71, with options \\"\$ac_cs_config\\" diff --git a/R-package/configure.ac b/R-package/configure.ac index 46b251708fba..516160ffd5bd 100644 --- a/R-package/configure.ac +++ b/R-package/configure.ac @@ -2,7 +2,7 @@ AC_PREREQ(2.69) -AC_INIT([xgboost],[2.1.1],[],[xgboost],[]) +AC_INIT([xgboost],[2.1.2],[],[xgboost],[]) : ${R_HOME=`R RHOME`} if test -z "${R_HOME}"; then diff --git a/dev/release-artifacts.py b/dev/release-artifacts.py index 4c4340984b6e..5d7854e06719 100644 --- a/dev/release-artifacts.py +++ b/dev/release-artifacts.py @@ -1,6 +1,7 @@ -"""Simple script for managing Python, R, and source release packages. +""" +Simple script for managing Python, R, and source release packages. -tqdm, sh are required to run this script. +tqdm, sh, build, twine are required to run this script. """ import argparse @@ -10,15 +11,15 @@ import tarfile import tempfile from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import Any, Dict, List, Optional, Tuple from urllib.request import urlretrieve import tqdm from packaging import version from sh.contrib import git -# The package building is managed by Jenkins CI. -PREFIX = "https://s3-us-west-2.amazonaws.com/xgboost-nightly-builds/release_" +# S3 bucket hosting the release artifacts +S3_BUCKET_URL = "https://s3-us-west-2.amazonaws.com/xgboost-nightly-builds" ROOT = Path(__file__).absolute().parent.parent DIST = ROOT / "python-package" / "dist" @@ -26,9 +27,9 @@ class DirectoryExcursion: - def __init__(self, path: Union[os.PathLike, str]) -> None: + def __init__(self, path: Path) -> None: self.path = path - self.curdir = os.path.normpath(os.path.abspath(os.path.curdir)) + self.curdir = Path.cwd().resolve() def __enter__(self) -> None: os.chdir(self.path) @@ -37,75 +38,85 @@ def __exit__(self, *args: Any) -> None: os.chdir(self.curdir) -def show_progress(block_num, block_size, total_size): - "Show file download progress." +def show_progress(block_num: int, block_size: int, total_size: int) -> None: + """Show file download progress.""" global pbar if pbar is None: pbar = tqdm.tqdm(total=total_size / 1024, unit="kB") downloaded = block_num * block_size if downloaded < total_size: - upper = (total_size - downloaded) / 1024 - pbar.update(min(block_size / 1024, upper)) + pbar.update(min(block_size / 1024, (total_size - downloaded) / 1024)) else: pbar.close() pbar = None -def retrieve(url, filename=None): - print(f"{url} -> {filename}") - return urlretrieve(url, filename, reporthook=show_progress) +def retrieve(url: str, filename: Optional[Path] = None) -> str: + """Retrieve a file from a URL with progress indication.""" + print(f"Downloading {url} -> {filename}") + return urlretrieve(url, filename, reporthook=show_progress)[0] def latest_hash() -> str: - "Get latest commit hash." - ret = subprocess.run(["git", "rev-parse", "HEAD"], capture_output=True) - assert ret.returncode == 0, "Failed to get latest commit hash." - commit_hash = ret.stdout.decode("utf-8").strip() - return commit_hash - - -def download_wheels( + """Get latest commit hash.""" + try: + result = subprocess.run( + ["git", "rev-parse", "HEAD"], + check=True, + capture_output=True, + text=True, + encoding="utf-8", + ) + return result.stdout.strip() + except subprocess.CalledProcessError as e: + raise RuntimeError("Failed to get latest commit hash.") from e + + +def _download_python_wheels( platforms: List[str], - dir_URL: str, + dir_url: str, src_filename_prefix: str, target_filename_prefix: str, - outdir: str, -) -> List[str]: - """Download all binary wheels. dir_URL is the URL for remote directory storing the - release wheels. + outdir: Path, +) -> List[Path]: + """Download all Python binary wheels for a given set of platforms""" + wheel_paths = [] + dist_dir = outdir / "dist" + dist_dir.mkdir(exist_ok=True) - """ + for platform in platforms: + src_wheel = f"{src_filename_prefix}{platform}.whl" + url = f"{dir_url}{src_wheel}" + target_wheel = f"{target_filename_prefix}{platform}.whl" + wheel_path = dist_dir / target_wheel + wheel_paths.append(wheel_path) + + retrieve(url=url, filename=wheel_path) + + try: + result = subprocess.run( + ["twine", "check", str(wheel_path)], + check=True, + capture_output=True, + text=True, + encoding="utf-8", + ) + if "warning" in result.stderr or "warning" in result.stdout: + raise RuntimeError( + f"Unresolved warnings:\n{result.stderr}\n{result.stdout}" + ) + except subprocess.CalledProcessError as e: + raise RuntimeError("Failed twine check") from e + return wheel_paths - filenames = [] - outdir = os.path.join(outdir, "dist") - if not os.path.exists(outdir): - os.mkdir(outdir) - for platform in platforms: - src_wheel = src_filename_prefix + platform + ".whl" - url = dir_URL + src_wheel - - target_wheel = target_filename_prefix + platform + ".whl" - filename = os.path.join(outdir, target_wheel) - filenames.append(filename) - retrieve(url=url, filename=filename) - ret = subprocess.run(["twine", "check", filename], capture_output=True) - assert ret.returncode == 0, "Failed twine check" - stderr = ret.stderr.decode("utf-8") - stdout = ret.stdout.decode("utf-8") - assert stderr.find("warning") == -1, "Unresolved warnings:\n" + stderr - assert stdout.find("warning") == -1, "Unresolved warnings:\n" + stdout - return filenames - - -def make_pysrc_wheel( - release: str, rc: Optional[str], rc_ver: Optional[int], outdir: str +def make_python_sdist( + release: str, rc: Optional[str], rc_ver: Optional[int], outdir: Path ) -> None: """Make Python source distribution.""" - dist = os.path.abspath(os.path.normpath(os.path.join(outdir, "dist"))) - if not os.path.exists(dist): - os.mkdir(dist) + dist_dir = outdir / "dist" + dist_dir.mkdir(exist_ok=True) # Apply patch to remove NCCL dependency # Save the original content of pyproject.toml so that we can restore it later @@ -114,47 +125,59 @@ def make_pysrc_wheel( orig_pyproj_lines = f.read() with open("tests/buildkite/remove_nccl_dep.patch", "r") as f: patch_lines = f.read() - subprocess.run(["patch", "-p0"], input=patch_lines, text=True) - - with DirectoryExcursion(os.path.join(ROOT, "python-package")): - subprocess.check_call(["python", "-m", "build", "--sdist"]) - if rc is not None: - name = f"xgboost-{release}{rc}{rc_ver}.tar.gz" - else: - name = f"xgboost-{release}.tar.gz" - src = os.path.join(DIST, name) - subprocess.check_call(["twine", "check", src]) - target = os.path.join(dist, name) - shutil.move(src, target) + subprocess.run( + ["patch", "-p0"], input=patch_lines, check=True, text=True, encoding="utf-8" + ) + + with DirectoryExcursion(ROOT / "python-package"): + subprocess.run(["python", "-m", "build", "--sdist"], check=True) + sdist_name = ( + f"xgboost-{release}{rc}{rc_ver}.tar.gz" + if rc + else f"xgboost-{release}.tar.gz" + ) + src = DIST / sdist_name + subprocess.run(["twine", "check", str(src)], check=True) + dest = dist_dir / sdist_name + shutil.move(src, dest) with DirectoryExcursion(ROOT): with open("python-package/pyproject.toml", "w") as f: - print(orig_pyproj_lines, file=f, end="") + f.write(orig_pyproj_lines) -def download_py_packages( - branch: str, major: int, minor: int, commit_hash: str, outdir: str -) -> None: - platforms = [ +def download_python_wheels(branch: str, commit_hash: str, outdir: Path) -> None: + """Download all Python binary wheels for the specified branch.""" + full_platforms = [ "win_amd64", + "manylinux2014_x86_64", + "manylinux2014_aarch64", "manylinux_2_28_x86_64", "manylinux_2_28_aarch64", "macosx_10_15_x86_64.macosx_11_0_x86_64.macosx_12_0_x86_64", "macosx_12_0_arm64", ] + minimal_platforms = [ + "win_amd64", + "manylinux2014_x86_64", + "manylinux2014_aarch64", + ] - branch = branch.split("_")[1] # release_x.y.z - dir_URL = PREFIX + branch + "/" - src_filename_prefix = "xgboost-" + args.release + "%2B" + commit_hash + "-py3-none-" - target_filename_prefix = "xgboost-" + args.release + "-py3-none-" - - if not os.path.exists(DIST): - os.mkdir(DIST) - - filenames = download_wheels( - platforms, dir_URL, src_filename_prefix, target_filename_prefix, outdir - ) - print("List of downloaded wheels:", filenames) + dir_url = f"{S3_BUCKET_URL}/{branch}/" + wheels = [] + + for pkg_name, platforms in [ + ("xgboost", full_platforms), + ("xgboost_cpu", minimal_platforms), + ]: + src_filename_prefix = f"{pkg_name}-{args.release}%2B{commit_hash}-py3-none-" + target_filename_prefix = f"{pkg_name}-{args.release}-py3-none-" + wheels.extend( + _download_python_wheels( + platforms, dir_url, src_filename_prefix, target_filename_prefix, outdir + ) + ) + print(f"List of downloaded wheels: {wheels}") print( """ Following steps should be done manually: @@ -164,89 +187,99 @@ def download_py_packages( ) -def download_r_packages( - release: str, branch: str, rc: str, commit: str, outdir: str +def download_r_artifacts( + release: str, branch: str, rc: str, commit: str, outdir: Path ) -> Tuple[Dict[str, str], List[str]]: + """Download R package artifacts for the specified release and branch.""" platforms = ["linux"] - dirname = os.path.join(outdir, "r-packages") - if not os.path.exists(dirname): - os.mkdir(dirname) + rpkg_dir = outdir / "r-packages" + rpkg_dir.mkdir(exist_ok=True) - filenames = [] - branch = branch.split("_")[1] # release_x.y.z + artifacts = [] urls = {} for plat in platforms: - url = f"{PREFIX}{branch}/xgboost_r_gpu_{plat}_{commit}.tar.gz" - - if not rc: - filename = f"xgboost_r_gpu_{plat}_{release}.tar.gz" - else: - filename = f"xgboost_r_gpu_{plat}_{release}-{rc}.tar.gz" - - target = os.path.join(dirname, filename) - retrieve(url=url, filename=target) - filenames.append(target) + url = f"{S3_BUCKET_URL}/{branch}/xgboost_r_gpu_{plat}_{commit}.tar.gz" + artifact_name = ( + f"xgboost_r_gpu_{plat}_{release}-{rc}.tar.gz" + if rc + else f"xgboost_r_gpu_{plat}_{release}.tar.gz" + ) + artifact_path = rpkg_dir / artifact_name + retrieve(url=url, filename=artifact_path) + artifacts.append(artifact_path) urls[plat] = url - print("Finished downloading R packages:", filenames) + print(f"Finished downloading R package artifacts: {artifacts}") hashes = [] - with DirectoryExcursion(os.path.join(outdir, "r-packages")): - for f in filenames: - ret = subprocess.run( - ["sha256sum", os.path.basename(f)], capture_output=True + with DirectoryExcursion(rpkg_dir): + for f in artifacts: + result = subprocess.run( + ["sha256sum", f.name], + check=True, + capture_output=True, + text=True, + encoding="utf-8", ) - h = ret.stdout.decode().strip() - hashes.append(h) + hashes.append(result.stdout.strip()) return urls, hashes -def check_path(): - root = os.path.abspath(os.path.curdir) - assert os.path.basename(root) == "xgboost", "Must be run on project root." +def check_path() -> None: + """Ensure the script is run from the project root directory.""" + current_dir = Path.cwd().resolve() + if current_dir.name != "xgboost": + raise RuntimeError("Must be run from the project root directory.") -def make_src_package(release: str, outdir: str) -> Tuple[str, str]: - tarname = f"xgboost-{release}.tar.gz" - tarpath = os.path.join(outdir, tarname) - if os.path.exists(tarpath): - os.remove(tarpath) +def make_src_tarball(release: str, outdir: Path) -> Tuple[str, str]: + tarball_name = f"xgboost-{release}.tar.gz" + tarball_path = outdir / tarball_name + if tarball_path.exists(): + tarball_path.unlink() with tempfile.TemporaryDirectory() as tmpdir_str: tmpdir = Path(tmpdir_str) - shutil.copytree(os.path.curdir, tmpdir / "xgboost") + shutil.copytree(Path.cwd(), tmpdir / "xgboost") with DirectoryExcursion(tmpdir / "xgboost"): - ret = subprocess.run( + result = subprocess.run( ["git", "submodule", "foreach", "--quiet", "echo $sm_path"], + check=True, capture_output=True, + text=True, + encoding="utf-8", ) - submodules = ret.stdout.decode().strip().split() + submodules = result.stdout.strip().split() for mod in submodules: - mod_path = os.path.join(os.path.abspath(os.path.curdir), mod, ".git") - os.remove(mod_path) + mod_path = Path.cwd().resolve() / mod / ".git" + mod_path.unlink() shutil.rmtree(".git") - with tarfile.open(tarpath, "x:gz") as tar: - src = tmpdir / "xgboost" - tar.add(src, arcname="xgboost") + with tarfile.open(tarball_path, "x:gz") as tar: + tar.add(tmpdir / "xgboost", arcname="xgboost") - with DirectoryExcursion(os.path.dirname(tarpath)): - ret = subprocess.run(["sha256sum", tarname], capture_output=True) - h = ret.stdout.decode().strip() - return tarname, h + with DirectoryExcursion(tarball_path.parent): + result = subprocess.run( + ["sha256sum", tarball_name], + check=True, + capture_output=True, + text=True, + encoding="utf-8", + ) + sha256sum = result.stdout.strip() + return tarball_name, sha256sum def release_note( release: str, artifact_hashes: List[str], r_urls: Dict[str, str], - tarname: str, - outdir: str, + tarball_name: str, + outdir: Path, ) -> None: """Generate a note for GitHub release description.""" r_gpu_linux_url = r_urls["linux"] - r_gpu_win64_url = r_urls["win64"] src_tarball = ( - f"https://github.com/dmlc/xgboost/releases/download/v{release}/{tarname}" + f"https://github.com/dmlc/xgboost/releases/download/v{release}/{tarball_name}" ) hash_note = "\n".join(artifact_hashes) @@ -265,44 +298,41 @@ def release_note( **Experimental binary packages for R with CUDA enabled** * xgboost_r_gpu_linux_{release}.tar.gz: [Download]({r_gpu_linux_url}) -* xgboost_r_gpu_win64_{release}.tar.gz: [Download]({r_gpu_win64_url}) **Source tarball** * xgboost.tar.gz: [Download]({src_tarball})""" print(end_note) - with open(os.path.join(outdir, "end_note.md"), "w") as fd: - fd.write(end_note) + with open(outdir / "end_note.md", "w") as f: + f.write(end_note) def main(args: argparse.Namespace) -> None: check_path() - rel = version.parse(args.release) - assert isinstance(rel, version.Version) - - major = rel.major - minor = rel.minor - patch = rel.micro + release_parsed: version.Version = version.parse(args.release) + print(f"Release: {release_parsed}") - print("Release:", rel) - if not rel.is_prerelease: + major = release_parsed.major + minor = release_parsed.minor + patch = release_parsed.micro + if not release_parsed.is_prerelease: # Major release rc: Optional[str] = None rc_ver: Optional[int] = None else: # RC release - major = rel.major - minor = rel.minor - patch = rel.micro - assert rel.pre is not None - rc, rc_ver = rel.pre - assert rc == "rc" - - release = str(major) + "." + str(minor) + "." + str(patch) + rc, rc_ver = release_parsed.pre + if rc != "rc": + raise ValueError( + "Only supports release candidates with 'rc' in the version string" + ) + + # Release string with only major, minor, patch components + release = f"{major}.{minor}.{patch}" if args.branch is not None: branch = args.branch else: - branch = "release_" + str(major) + "." + str(minor) + ".0" + branch = f"release_{major}.{minor}.0" git.clean("-xdf") git.checkout(branch) @@ -310,33 +340,35 @@ def main(args: argparse.Namespace) -> None: git.submodule("update") commit_hash = latest_hash() - outdir = os.path.abspath(args.outdir) - if not os.path.exists(outdir): - os.mkdir(outdir) + outdir = Path(args.outdir).resolve() + if ROOT in outdir.parents: + raise ValueError("Output directory must be outside of the source tree.") + outdir.mkdir(exist_ok=True) + + artifact_hashes: List[str] = [] - # source tarball - hashes: List[str] = [] - tarname, h = make_src_package(release, outdir) - hashes.append(h) + # Source tarball + tarball_name, hash = make_src_tarball(release, outdir) + artifact_hashes.append(hash) # CUDA R packages - urls, hr = download_r_packages( + urls, hashes = download_r_artifacts( release, branch, - "" if rc is None else rc + str(rc_ver), + "" if rc is None else f"rc{rc_ver}", commit_hash, outdir, ) - hashes.extend(hr) + artifact_hashes.extend(hashes) # Python source wheel - make_pysrc_wheel(release, rc, rc_ver, outdir) + make_python_sdist(release, rc, rc_ver, outdir) # Python binary wheels - download_py_packages(branch, major, minor, commit_hash, outdir) + download_python_wheels(branch, commit_hash, outdir) # Write end note - release_note(release, hashes, urls, tarname, outdir) + release_note(release, artifact_hashes, urls, tarball_name, outdir) if __name__ == "__main__": diff --git a/include/xgboost/collective/socket.h b/include/xgboost/collective/socket.h index c5dd977f6255..84885cf317f5 100644 --- a/include/xgboost/collective/socket.h +++ b/include/xgboost/collective/socket.h @@ -548,13 +548,10 @@ class TCPSocket { [[nodiscard]] HandleT const &Handle() const { return handle_; } /** * @brief Listen to incoming requests. Should be called after bind. + * + * Both the default and minimum backlog is set to 256. */ - [[nodiscard]] Result Listen(std::int32_t backlog = 16) { - if (listen(handle_, backlog) != 0) { - return system::FailWithCode("Failed to listen."); - } - return Success(); - } + [[nodiscard]] Result Listen(std::int32_t backlog = 256); /** * @brief Bind socket to INADDR_ANY, return the port selected by the OS. */ diff --git a/include/xgboost/version_config.h b/include/xgboost/version_config.h index ee4c824d7f61..cb5e2bb2845d 100644 --- a/include/xgboost/version_config.h +++ b/include/xgboost/version_config.h @@ -6,6 +6,6 @@ #define XGBOOST_VER_MAJOR 2 /* NOLINT */ #define XGBOOST_VER_MINOR 1 /* NOLINT */ -#define XGBOOST_VER_PATCH 1 /* NOLINT */ +#define XGBOOST_VER_PATCH 2 /* NOLINT */ #endif // XGBOOST_VERSION_CONFIG_H_ diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml index 53b7333e8fe3..9f647f0bc60f 100644 --- a/jvm-packages/pom.xml +++ b/jvm-packages/pom.xml @@ -6,7 +6,7 @@ ml.dmlc xgboost-jvm_2.12 - 2.1.1 + 2.1.2 pom XGBoost JVM Package JVM Package for XGBoost diff --git a/jvm-packages/xgboost4j-example/pom.xml b/jvm-packages/xgboost4j-example/pom.xml index 554a7bf8f4be..52bb7b6ed1ea 100644 --- a/jvm-packages/xgboost4j-example/pom.xml +++ b/jvm-packages/xgboost4j-example/pom.xml @@ -6,11 +6,11 @@ ml.dmlc xgboost-jvm_2.12 - 2.1.1 + 2.1.2 xgboost4j-example xgboost4j-example_2.12 - 2.1.1 + 2.1.2 jar diff --git a/jvm-packages/xgboost4j-flink/pom.xml b/jvm-packages/xgboost4j-flink/pom.xml index f24e0426c7ef..da9e79d00269 100644 --- a/jvm-packages/xgboost4j-flink/pom.xml +++ b/jvm-packages/xgboost4j-flink/pom.xml @@ -6,12 +6,12 @@ ml.dmlc xgboost-jvm_2.12 - 2.1.1 + 2.1.2 xgboost4j-flink xgboost4j-flink_2.12 - 2.1.1 + 2.1.2 2.2.0 diff --git a/jvm-packages/xgboost4j-gpu/pom.xml b/jvm-packages/xgboost4j-gpu/pom.xml index cbc147b8aeff..3c24b82f69cb 100644 --- a/jvm-packages/xgboost4j-gpu/pom.xml +++ b/jvm-packages/xgboost4j-gpu/pom.xml @@ -6,11 +6,11 @@ ml.dmlc xgboost-jvm_2.12 - 2.1.1 + 2.1.2 xgboost4j-gpu_2.12 xgboost4j-gpu - 2.1.1 + 2.1.2 jar diff --git a/jvm-packages/xgboost4j-spark-gpu/pom.xml b/jvm-packages/xgboost4j-spark-gpu/pom.xml index dd5e3179c023..934a16a4243d 100644 --- a/jvm-packages/xgboost4j-spark-gpu/pom.xml +++ b/jvm-packages/xgboost4j-spark-gpu/pom.xml @@ -6,7 +6,7 @@ ml.dmlc xgboost-jvm_2.12 - 2.1.1 + 2.1.2 xgboost4j-spark-gpu xgboost4j-spark-gpu_2.12 diff --git a/jvm-packages/xgboost4j-spark/pom.xml b/jvm-packages/xgboost4j-spark/pom.xml index 1858defd5b02..b95c52afd174 100644 --- a/jvm-packages/xgboost4j-spark/pom.xml +++ b/jvm-packages/xgboost4j-spark/pom.xml @@ -6,7 +6,7 @@ ml.dmlc xgboost-jvm_2.12 - 2.1.1 + 2.1.2 xgboost4j-spark xgboost4j-spark_2.12 diff --git a/jvm-packages/xgboost4j/pom.xml b/jvm-packages/xgboost4j/pom.xml index a4b8985c3493..2a5fa17c887e 100644 --- a/jvm-packages/xgboost4j/pom.xml +++ b/jvm-packages/xgboost4j/pom.xml @@ -6,11 +6,11 @@ ml.dmlc xgboost-jvm_2.12 - 2.1.1 + 2.1.2 xgboost4j xgboost4j_2.12 - 2.1.1 + 2.1.2 jar diff --git a/python-package/hatch_build.py b/python-package/hatch_build.py index 925c917b967f..d81a21cd55d6 100644 --- a/python-package/hatch_build.py +++ b/python-package/hatch_build.py @@ -3,16 +3,16 @@ Here, we customize the tag of the generated wheels. """ -import sysconfig from typing import Any, Dict from hatchling.builders.hooks.plugin.interface import BuildHookInterface +from packaging.tags import platform_tags def get_tag() -> str: """Get appropriate wheel tag according to system""" - tag_platform = sysconfig.get_platform().replace("-", "_").replace(".", "_") - return f"py3-none-{tag_platform}" + platform_tag = next(platform_tags()) + return f"py3-none-{platform_tag}" class CustomBuildHook(BuildHookInterface): diff --git a/python-package/pyproject.toml b/python-package/pyproject.toml index 05bbe998c44a..de67edc53b3c 100644 --- a/python-package/pyproject.toml +++ b/python-package/pyproject.toml @@ -1,6 +1,7 @@ [build-system] requires = [ - "hatchling>=1.12.1" + "hatchling>=1.12.1", + "packaging>=21.3", ] backend-path = ["."] build-backend = "packager.pep517" @@ -13,7 +14,7 @@ authors = [ { name = "Hyunsu Cho", email = "chohyu01@cs.washington.edu" }, { name = "Jiaming Yuan", email = "jm.yuan@outlook.com" } ] -version = "2.1.1" +version = "2.1.2" requires-python = ">=3.8" license = { text = "Apache-2.0" } classifiers = [ @@ -30,7 +31,7 @@ classifiers = [ dependencies = [ "numpy", "scipy", - "nvidia-nccl-cu12 ; platform_system == 'Linux' and platform_machine != 'aarch64'" + "nvidia-nccl-cu12 ; platform_system == 'Linux' and platform_machine != 'aarch64'", ] [project.urls] diff --git a/python-package/xgboost/VERSION b/python-package/xgboost/VERSION index 3e3c2f1e5edb..eca07e4c1a8c 100644 --- a/python-package/xgboost/VERSION +++ b/python-package/xgboost/VERSION @@ -1 +1 @@ -2.1.1 +2.1.2 diff --git a/python-package/xgboost/data.py b/python-package/xgboost/data.py index 7e0ae793ba6e..bd196e2e59f9 100644 --- a/python-package/xgboost/data.py +++ b/python-package/xgboost/data.py @@ -458,7 +458,7 @@ def pandas_pa_type(ser: Any) -> np.ndarray: # combine_chunks takes the most significant amount of time chunk: pa.Array = aa.combine_chunks() # When there's null value, we have to use copy - zero_copy = chunk.null_count == 0 + zero_copy = chunk.null_count == 0 and not pa.types.is_boolean(chunk.type) # Alternately, we can use chunk.buffers(), which returns a list of buffers and # we need to concatenate them ourselves. # FIXME(jiamingy): Is there a better way to access the arrow buffer along with @@ -825,37 +825,9 @@ def _arrow_transform(data: DataType) -> Any: data = cast(pa.Table, data) - def type_mapper(dtype: pa.DataType) -> Optional[str]: - """Maps pyarrow type to pandas arrow extension type.""" - if pa.types.is_int8(dtype): - return pd.ArrowDtype(pa.int8()) - if pa.types.is_int16(dtype): - return pd.ArrowDtype(pa.int16()) - if pa.types.is_int32(dtype): - return pd.ArrowDtype(pa.int32()) - if pa.types.is_int64(dtype): - return pd.ArrowDtype(pa.int64()) - if pa.types.is_uint8(dtype): - return pd.ArrowDtype(pa.uint8()) - if pa.types.is_uint16(dtype): - return pd.ArrowDtype(pa.uint16()) - if pa.types.is_uint32(dtype): - return pd.ArrowDtype(pa.uint32()) - if pa.types.is_uint64(dtype): - return pd.ArrowDtype(pa.uint64()) - if pa.types.is_float16(dtype): - return pd.ArrowDtype(pa.float16()) - if pa.types.is_float32(dtype): - return pd.ArrowDtype(pa.float32()) - if pa.types.is_float64(dtype): - return pd.ArrowDtype(pa.float64()) - if pa.types.is_boolean(dtype): - return pd.ArrowDtype(pa.bool_()) - return None - # For common cases, this is zero-copy, can check with: # pa.total_allocated_bytes() - df = data.to_pandas(types_mapper=type_mapper) + df = data.to_pandas(types_mapper=pd.ArrowDtype) return df diff --git a/python-package/xgboost/testing/data.py b/python-package/xgboost/testing/data.py index 0f2d65cc0afa..4071219c44ef 100644 --- a/python-package/xgboost/testing/data.py +++ b/python-package/xgboost/testing/data.py @@ -165,10 +165,6 @@ def pd_arrow_dtypes() -> Generator: # Integer dtypes = pandas_pyarrow_mapper - Null: Union[float, None, Any] = np.nan - orig = pd.DataFrame( - {"f0": [1, 2, Null, 3], "f1": [4, 3, Null, 1]}, dtype=np.float32 - ) # Create a dictionary-backed dataframe, enable this when the roundtrip is # implemented in pandas/pyarrow # @@ -191,24 +187,33 @@ def pd_arrow_dtypes() -> Generator: # pd_catcodes = pd_cat_df["f1"].cat.codes # assert pd_catcodes.equals(pa_catcodes) - for Null in (None, pd.NA): + for Null in (None, pd.NA, 0): for dtype in dtypes: if dtype.startswith("float16") or dtype.startswith("bool"): continue + # Use np.nan is a baseline + orig_null = Null if not pd.isna(Null) and Null == 0 else np.nan + orig = pd.DataFrame( + {"f0": [1, 2, orig_null, 3], "f1": [4, 3, orig_null, 1]}, + dtype=np.float32, + ) + df = pd.DataFrame( {"f0": [1, 2, Null, 3], "f1": [4, 3, Null, 1]}, dtype=dtype ) yield orig, df - orig = pd.DataFrame( - {"f0": [True, False, pd.NA, True], "f1": [False, True, pd.NA, True]}, - dtype=pd.BooleanDtype(), - ) - df = pd.DataFrame( - {"f0": [True, False, pd.NA, True], "f1": [False, True, pd.NA, True]}, - dtype=pd.ArrowDtype(pa.bool_()), - ) - yield orig, df + # If Null is `False`, then there's no missing value. + for Null in (pd.NA, False): + orig = pd.DataFrame( + {"f0": [True, False, Null, True], "f1": [False, True, Null, True]}, + dtype=pd.BooleanDtype(), + ) + df = pd.DataFrame( + {"f0": [True, False, Null, True], "f1": [False, True, Null, True]}, + dtype=pd.ArrowDtype(pa.bool_()), + ) + yield orig, df def check_inf(rng: RNG) -> None: diff --git a/rabit/include/rabit/internal/socket.h b/rabit/include/rabit/internal/socket.h index 97fb3d4be497..7eeda3181ff6 100644 --- a/rabit/include/rabit/internal/socket.h +++ b/rabit/include/rabit/internal/socket.h @@ -35,7 +35,7 @@ #if !defined(_WIN32) -#include +#include using SOCKET = int; using sock_size_t = size_t; // NOLINT diff --git a/src/collective/socket.cc b/src/collective/socket.cc index 5145c13a1366..dd6c05e6f964 100644 --- a/src/collective/socket.cc +++ b/src/collective/socket.cc @@ -3,6 +3,7 @@ */ #include "xgboost/collective/socket.h" +#include // for max #include // for array #include // std::size_t #include // std::int32_t @@ -58,6 +59,14 @@ SockAddrV4 SockAddrV4::InaddrAny() { return MakeSockAddress("0.0.0.0", 0).V4(); SockAddrV6 SockAddrV6::Loopback() { return MakeSockAddress("::1", 0).V6(); } SockAddrV6 SockAddrV6::InaddrAny() { return MakeSockAddress("::", 0).V6(); } +[[nodiscard]] Result TCPSocket::Listen(std::int32_t backlog) { + backlog = std::max(backlog, 256); + if (listen(this->handle_, backlog) != 0) { + return system::FailWithCode("Failed to listen."); + } + return Success(); +} + std::size_t TCPSocket::Send(StringView str) { CHECK(!this->IsClosed()); CHECK_LT(str.size(), std::numeric_limits::max()); diff --git a/src/collective/tracker.cc b/src/collective/tracker.cc index c8776f294690..9bffbc5590e5 100644 --- a/src/collective/tracker.cc +++ b/src/collective/tracker.cc @@ -120,7 +120,8 @@ RabitTracker::RabitTracker(Json const& config) : Tracker{config} { listener_ = TCPSocket::Create(addr.IsV4() ? SockDomain::kV4 : SockDomain::kV6); return listener_.Bind(host_, &this->port_); } << [&] { - return listener_.Listen(); + CHECK_GT(this->n_workers_, 0); + return listener_.Listen(this->n_workers_); }; SafeColl(rc); } diff --git a/src/common/bitfield.h b/src/common/bitfield.h index 62107876490f..6ecd7fcdf5a0 100644 --- a/src/common/bitfield.h +++ b/src/common/bitfield.h @@ -108,9 +108,11 @@ struct BitFieldContainer { #if defined(__CUDA_ARCH__) __device__ BitFieldContainer& operator|=(BitFieldContainer const& rhs) { auto tid = blockIdx.x * blockDim.x + threadIdx.x; - size_t min_size = min(NumValues(), rhs.NumValues()); + std::size_t min_size = std::min(this->Capacity(), rhs.Capacity()); if (tid < min_size) { - Data()[tid] |= rhs.Data()[tid]; + if (this->Check(tid) || rhs.Check(tid)) { + this->Set(tid); + } } return *this; } @@ -126,16 +128,20 @@ struct BitFieldContainer { #if defined(__CUDA_ARCH__) __device__ BitFieldContainer& operator&=(BitFieldContainer const& rhs) { - size_t min_size = min(NumValues(), rhs.NumValues()); auto tid = blockIdx.x * blockDim.x + threadIdx.x; + std::size_t min_size = std::min(this->Capacity(), rhs.Capacity()); if (tid < min_size) { - Data()[tid] &= rhs.Data()[tid]; + if (this->Check(tid) && rhs.Check(tid)) { + this->Set(tid); + } else { + this->Clear(tid); + } } return *this; } #else BitFieldContainer& operator&=(BitFieldContainer const& rhs) { - size_t min_size = std::min(NumValues(), rhs.NumValues()); + std::size_t min_size = std::min(NumValues(), rhs.NumValues()); for (size_t i = 0; i < min_size; ++i) { Data()[i] &= rhs.Data()[i]; } diff --git a/src/common/device_helpers.cuh b/src/common/device_helpers.cuh index f4fce42f84f8..03b8f2c0a40c 100644 --- a/src/common/device_helpers.cuh +++ b/src/common/device_helpers.cuh @@ -224,13 +224,6 @@ __global__ void LaunchNKernel(size_t begin, size_t end, L lambda) { lambda(i); } } -template -__global__ void LaunchNKernel(int device_idx, size_t begin, size_t end, - L lambda) { - for (auto i : GridStrideRange(begin, end)) { - lambda(i, device_idx); - } -} /* \brief A wrapper around kernel launching syntax, used to guard against empty input. * diff --git a/src/common/io.cc b/src/common/io.cc index 1715669b091a..b472de45ab4b 100644 --- a/src/common/io.cc +++ b/src/common/io.cc @@ -15,7 +15,7 @@ #if defined(__unix__) || defined(__APPLE__) #include // for open, O_RDONLY -#include // for mmap, mmap64, munmap +#include // for mmap, munmap, madvise #include // for close, getpagesize #elif defined(xgboost_IS_WIN) #define WIN32_LEAN_AND_MEAN @@ -233,9 +233,9 @@ std::unique_ptr Open(std::string path, std::size_t offset, std::size_t #if defined(__linux__) || defined(__GLIBC__) int prot{PROT_READ}; - ptr = reinterpret_cast(mmap64(nullptr, view_size, prot, MAP_PRIVATE, fd, view_start)); - madvise(ptr, view_size, MADV_WILLNEED); + ptr = reinterpret_cast(mmap(nullptr, view_size, prot, MAP_PRIVATE, fd, view_start)); CHECK_NE(ptr, MAP_FAILED) << "Failed to map: " << path << ". " << SystemErrorMsg(); + madvise(ptr, view_size, MADV_WILLNEED); auto handle = std::make_unique(fd, ptr, view_size, offset - view_start, std::move(path)); #elif defined(xgboost_IS_WIN) diff --git a/src/common/threading_utils.cc b/src/common/threading_utils.cc index 1f4d5be2f361..46a007e3c750 100644 --- a/src/common/threading_utils.cc +++ b/src/common/threading_utils.cc @@ -3,7 +3,7 @@ */ #include "threading_utils.h" -#include // for max +#include // for max, min #include // for exception #include // for path, exists #include // for ifstream @@ -99,17 +99,18 @@ std::int32_t GetCfsCPUCount() noexcept { return -1; } -std::int32_t OmpGetNumThreads(std::int32_t n_threads) { +std::int32_t OmpGetNumThreads(std::int32_t n_threads) noexcept(true) { // Don't use parallel if we are in a parallel region. if (omp_in_parallel()) { return 1; } + // Honor the openmp thread limit, which can be set via environment variable. + auto max_n_threads = std::min({omp_get_num_procs(), omp_get_max_threads(), OmpGetThreadLimit()}); // If -1 or 0 is specified by the user, we default to maximum number of threads. if (n_threads <= 0) { - n_threads = std::min(omp_get_num_procs(), omp_get_max_threads()); + n_threads = max_n_threads; } - // Honor the openmp thread limit, which can be set via environment variable. - n_threads = std::min(n_threads, OmpGetThreadLimit()); + n_threads = std::min(n_threads, max_n_threads); n_threads = std::max(n_threads, 1); return n_threads; } diff --git a/src/common/threading_utils.h b/src/common/threading_utils.h index ac71190353a7..38db8e3a5f99 100644 --- a/src/common/threading_utils.h +++ b/src/common/threading_utils.h @@ -257,9 +257,9 @@ inline std::int32_t OmpGetThreadLimit() { std::int32_t GetCfsCPUCount() noexcept; /** - * \brief Get the number of available threads based on n_threads specified by users. + * @brief Get the number of available threads based on n_threads specified by users. */ -std::int32_t OmpGetNumThreads(std::int32_t n_threads); +std::int32_t OmpGetNumThreads(std::int32_t n_threads) noexcept(true); /*! * \brief A C-style array with in-stack allocation. As long as the array is smaller than diff --git a/src/data/ellpack_page.cu b/src/data/ellpack_page.cu index d9ea85919bd8..b96db053edf1 100644 --- a/src/data/ellpack_page.cu +++ b/src/data/ellpack_page.cu @@ -70,7 +70,7 @@ __global__ void CompressBinEllpackKernel( // {feature_cuts, ncuts} forms the array of cuts of `feature'. const float* feature_cuts = &cuts[cut_ptrs[feature]]; int ncuts = cut_ptrs[feature + 1] - cut_ptrs[feature]; - bool is_cat = common::IsCat(feature_types, ifeature); + bool is_cat = common::IsCat(feature_types, feature); // Assigning the bin in current entry. // S.t.: fvalue < feature_cuts[bin] if (is_cat) { diff --git a/src/objective/lambdarank_obj.cc b/src/objective/lambdarank_obj.cc index 36495d0caa88..e9a49c6a76b9 100644 --- a/src/objective/lambdarank_obj.cc +++ b/src/objective/lambdarank_obj.cc @@ -314,7 +314,7 @@ class LambdaRankObj : public FitIntercept { CHECK_EQ(info.weights_.Size(), n_groups) << error::GroupWeight(); } - if (ti_plus_.Size() == 0 && param_.lambdarank_unbiased) { + if ((ti_plus_.Empty() || li_full_.Empty()) && param_.lambdarank_unbiased) { CHECK_EQ(iter, 0); ti_plus_ = linalg::Constant(ctx_, 1.0, p_cache_->MaxPositionSize()); tj_minus_ = linalg::Constant(ctx_, 1.0, p_cache_->MaxPositionSize()); diff --git a/src/tree/constraints.cu b/src/tree/constraints.cu index ae1d3073c7cc..26ff9f45478d 100644 --- a/src/tree/constraints.cu +++ b/src/tree/constraints.cu @@ -6,7 +6,6 @@ #include #include -#include #include #include @@ -279,10 +278,6 @@ __global__ void InteractionConstraintSplitKernel(LBitField64 feature, } // enable constraints from feature node |= feature; - // clear the buffer after use - if (tid < feature.Capacity()) { - feature.Clear(tid); - } // enable constraints from parent left |= node; @@ -304,7 +299,7 @@ void FeatureInteractionConstraintDevice::Split( << " Split node: " << node_id << " and its left child: " << left_id << " cannot be the same."; CHECK_NE(node_id, right_id) - << " Split node: " << node_id << " and its left child: " + << " Split node: " << node_id << " and its right child: " << right_id << " cannot be the same."; CHECK_LT(right_id, s_node_constraints_.size()); CHECK_NE(s_node_constraints_.size(), 0); @@ -330,6 +325,8 @@ void FeatureInteractionConstraintDevice::Split( feature_buffer_, feature_id, node, left, right); -} + // clear the buffer after use + thrust::fill_n(thrust::device, feature_buffer_.Data(), feature_buffer_.NumValues(), 0); +} } // namespace xgboost diff --git a/src/tree/gpu_hist/row_partitioner.cuh b/src/tree/gpu_hist/row_partitioner.cuh index fde6c4dd0fa9..7bc5a8338078 100644 --- a/src/tree/gpu_hist/row_partitioner.cuh +++ b/src/tree/gpu_hist/row_partitioner.cuh @@ -134,22 +134,23 @@ void SortPositionBatch(common::Span> d_batch_info, }); size_t temp_bytes = 0; if (tmp->empty()) { - cub::DeviceScan::InclusiveScan(nullptr, temp_bytes, input_iterator, discard_write_iterator, - IndexFlagOp(), total_rows); + dh::safe_cuda(cub::DeviceScan::InclusiveScan( + nullptr, temp_bytes, input_iterator, discard_write_iterator, IndexFlagOp(), total_rows)); tmp->resize(temp_bytes); } temp_bytes = tmp->size(); - cub::DeviceScan::InclusiveScan(tmp->data().get(), temp_bytes, input_iterator, - discard_write_iterator, IndexFlagOp(), total_rows); + dh::safe_cuda(cub::DeviceScan::InclusiveScan(tmp->data().get(), temp_bytes, input_iterator, + discard_write_iterator, IndexFlagOp(), total_rows)); constexpr int kBlockSize = 256; // Value found by experimentation const int kItemsThread = 12; - const int grid_size = xgboost::common::DivRoundUp(total_rows, kBlockSize * kItemsThread); - SortPositionCopyKernel - <<>>(batch_info_itr, ridx, ridx_tmp, total_rows); + std::uint32_t const kGridSize = + xgboost::common::DivRoundUp(total_rows, kBlockSize * kItemsThread); + dh::LaunchKernel{kGridSize, kBlockSize, 0}(SortPositionCopyKernel, + batch_info_itr, ridx, ridx_tmp, total_rows); } struct NodePositionInfo { @@ -328,11 +329,13 @@ class RowPartitioner { sizeof(NodePositionInfo) * ridx_segments_.size(), cudaMemcpyDefault)); - constexpr int kBlockSize = 512; + constexpr std::uint32_t kBlockSize = 512; const int kItemsThread = 8; - const int grid_size = xgboost::common::DivRoundUp(ridx_.size(), kBlockSize * kItemsThread); + const std::uint32_t grid_size = + xgboost::common::DivRoundUp(ridx_.size(), kBlockSize * kItemsThread); common::Span d_ridx(ridx_.data().get(), ridx_.size()); - FinalisePositionKernel<<>>( + dh::LaunchKernel{grid_size, kBlockSize}( + FinalisePositionKernel, dh::ToSpan(d_node_info_storage), d_ridx, d_out_position, op); } }; diff --git a/tests/buildkite/remove_nccl_dep.patch b/tests/buildkite/remove_nccl_dep.patch index a2a4a5c88289..c5a8fe3acee1 100644 --- a/tests/buildkite/remove_nccl_dep.patch +++ b/tests/buildkite/remove_nccl_dep.patch @@ -1,14 +1,12 @@ diff --git python-package/pyproject.toml python-package/pyproject.toml -index 8835def25..46c1451c2 100644 +index 20d3f9974..953087ff4 100644 --- python-package/pyproject.toml +++ python-package/pyproject.toml -@@ -30,8 +30,7 @@ classifiers = [ - ] +@@ -30,7 +30,6 @@ classifiers = [ dependencies = [ "numpy", -- "scipy", -- "nvidia-nccl-cu12 ; platform_system == 'Linux' and platform_machine != 'aarch64'" -+ "scipy" + "scipy", +- "nvidia-nccl-cu12 ; platform_system == 'Linux' and platform_machine != 'aarch64'", ] [project.urls] diff --git a/tests/ci_build/Dockerfile.gpu b/tests/ci_build/Dockerfile.gpu index d7b7084e7755..eab55672ec32 100644 --- a/tests/ci_build/Dockerfile.gpu +++ b/tests/ci_build/Dockerfile.gpu @@ -27,7 +27,7 @@ RUN \ "nccl>=${NCCL_SHORT_VER}" \ dask \ dask-cuda=$RAPIDS_VERSION_ARG* dask-cudf=$RAPIDS_VERSION_ARG* cupy \ - numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis \ + numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz "hypothesis<=6.112" \ "pyspark>=3.4.0" cloudpickle cuda-python && \ mamba clean --all && \ conda run --no-capture-output -n gpu_test pip install buildkite-test-collector diff --git a/tests/ci_build/conda_env/aarch64_test.yml b/tests/ci_build/conda_env/aarch64_test.yml index 2af0324c9770..207face0187a 100644 --- a/tests/ci_build/conda_env/aarch64_test.yml +++ b/tests/ci_build/conda_env/aarch64_test.yml @@ -14,7 +14,7 @@ dependencies: - matplotlib - dask - distributed -- hypothesis +- hypothesis<=6.112 - graphviz - python-graphviz - codecov diff --git a/tests/ci_build/conda_env/linux_cpu_test.yml b/tests/ci_build/conda_env/linux_cpu_test.yml index fd630c85a07f..b2120d981c2e 100644 --- a/tests/ci_build/conda_env/linux_cpu_test.yml +++ b/tests/ci_build/conda_env/linux_cpu_test.yml @@ -20,7 +20,7 @@ dependencies: - dask - distributed - python-graphviz -- hypothesis>=6.46 +- hypothesis<=6.112 - astroid - sh - mock diff --git a/tests/ci_build/conda_env/linux_sycl_test.yml b/tests/ci_build/conda_env/linux_sycl_test.yml index edac720c34f5..802f32e553c4 100644 --- a/tests/ci_build/conda_env/linux_sycl_test.yml +++ b/tests/ci_build/conda_env/linux_sycl_test.yml @@ -13,7 +13,7 @@ dependencies: - scipy - scikit-learn - pandas -- hypothesis>=6.46 +- hypothesis<=6.112 - pytest - pytest-timeout - pytest-cov diff --git a/tests/ci_build/conda_env/macos_cpu_test.yml b/tests/ci_build/conda_env/macos_cpu_test.yml index e2e377e2145d..a5eaa253f186 100644 --- a/tests/ci_build/conda_env/macos_cpu_test.yml +++ b/tests/ci_build/conda_env/macos_cpu_test.yml @@ -18,7 +18,7 @@ dependencies: - distributed - graphviz - python-graphviz -- hypothesis +- hypothesis<=6.112 - astroid - sphinx - sh diff --git a/tests/ci_build/conda_env/python_lint.yml b/tests/ci_build/conda_env/python_lint.yml index dc5105a84fab..fb3af3aa3cb6 100644 --- a/tests/ci_build/conda_env/python_lint.yml +++ b/tests/ci_build/conda_env/python_lint.yml @@ -17,6 +17,6 @@ dependencies: - isort - cloudpickle - pytest -- hypothesis +- hypothesis<=6.112 - hatchling - pyspark>=3.4.0 diff --git a/tests/ci_build/conda_env/win64_cpu_test.yml b/tests/ci_build/conda_env/win64_cpu_test.yml index 7789e94a6fcb..81fb3b1c73f3 100644 --- a/tests/ci_build/conda_env/win64_cpu_test.yml +++ b/tests/ci_build/conda_env/win64_cpu_test.yml @@ -14,7 +14,7 @@ dependencies: - python-graphviz - pytest - jsonschema -- hypothesis +- hypothesis<=6.112 - python-graphviz - pip - py-ubjson diff --git a/tests/ci_build/conda_env/win64_test.yml b/tests/ci_build/conda_env/win64_test.yml index 3f62c034c6e0..f131c6585026 100644 --- a/tests/ci_build/conda_env/win64_test.yml +++ b/tests/ci_build/conda_env/win64_test.yml @@ -10,7 +10,7 @@ dependencies: - pandas - pytest - boto3 -- hypothesis +- hypothesis<=6.112 - jsonschema - cupy - python-graphviz diff --git a/tests/cpp/common/test_threading_utils.cc b/tests/cpp/common/test_threading_utils.cc index 2b1a2580a90a..844adbc56477 100644 --- a/tests/cpp/common/test_threading_utils.cc +++ b/tests/cpp/common/test_threading_utils.cc @@ -1,17 +1,16 @@ /** - * Copyright 2019-2023 by XGBoost Contributors + * Copyright 2019-2024, XGBoost Contributors */ #include #include // std::size_t +#include // for std::thread #include "../../../src/common/threading_utils.h" // BlockedSpace2d,ParallelFor2d,ParallelFor #include "dmlc/omp.h" // omp_in_parallel #include "xgboost/context.h" // Context -namespace xgboost { -namespace common { - +namespace xgboost::common { TEST(ParallelFor2d, CreateBlockedSpace2d) { constexpr size_t kDim1 = 5; constexpr size_t kDim2 = 3; @@ -102,5 +101,14 @@ TEST(ParallelFor, Basic) { }); ASSERT_FALSE(omp_in_parallel()); } -} // namespace common -} // namespace xgboost + +TEST(OmpGetNumThreads, Max) { +#if defined(_OPENMP) + auto n_threads = OmpGetNumThreads(1 << 18); + ASSERT_LE(n_threads, std::thread::hardware_concurrency()); // le due to container + n_threads = OmpGetNumThreads(0); + ASSERT_GE(n_threads, 1); + ASSERT_LE(n_threads, std::thread::hardware_concurrency()); +#endif +} +} // namespace xgboost::common diff --git a/tests/cpp/data/test_ellpack_page.cu b/tests/cpp/data/test_ellpack_page.cu index ab4539fd411d..c3ef7983418f 100644 --- a/tests/cpp/data/test_ellpack_page.cu +++ b/tests/cpp/data/test_ellpack_page.cu @@ -5,11 +5,13 @@ #include -#include "../../../src/common/categorical.h" +#include "../../../src/common/categorical.h" // for AsCat +#include "../../../src/common/compressed_iterator.h" // for CompressedByteT #include "../../../src/common/hist_util.h" #include "../../../src/data/ellpack_page.cuh" #include "../../../src/data/ellpack_page.h" -#include "../../../src/tree/param.h" // TrainParam +#include "../../../src/data/gradient_index.h" // for GHistIndexMatrix +#include "../../../src/tree/param.h" // TrainParam #include "../helpers.h" #include "../histogram_helpers.h" #include "gtest/gtest.h" @@ -91,7 +93,7 @@ TEST(EllpackPage, FromCategoricalBasic) { auto& h_ft = m->Info().feature_types.HostVector(); h_ft.resize(kCols, FeatureType::kCategorical); - Context ctx{MakeCUDACtx(0)}; + auto ctx = MakeCUDACtx(0); auto p = BatchParam{max_bins, tree::TrainParam::DftSparseThreshold()}; auto ellpack = EllpackPage(&ctx, m.get(), p); auto accessor = ellpack.Impl()->GetDeviceAccessor(FstCU()); @@ -122,6 +124,37 @@ TEST(EllpackPage, FromCategoricalBasic) { } } +TEST(EllpackPage, FromCategoricalMissing) { + auto ctx = MakeCUDACtx(0); + + std::shared_ptr cuts; + auto nan = std::numeric_limits::quiet_NaN(); + // 2 rows and 3 columns. The second column is nan, row_stride is 2. + std::vector data{{0.1, nan, 1, 0.2, nan, 0}}; + auto p_fmat = GetDMatrixFromData(data, 2, 3); + p_fmat->Info().feature_types.HostVector() = {FeatureType::kNumerical, FeatureType::kNumerical, + FeatureType::kCategorical}; + p_fmat->Info().feature_types.SetDevice(ctx.Device()); + + auto p = BatchParam{256, tree::TrainParam::DftSparseThreshold()}; + for (auto const& page : p_fmat->GetBatches(&ctx, p)) { + cuts = std::make_shared(page.Cuts()); + } + cuts->cut_ptrs_.SetDevice(ctx.Device()); + cuts->cut_values_.SetDevice(ctx.Device()); + cuts->min_vals_.SetDevice(ctx.Device()); + for (auto const& page : p_fmat->GetBatches(&ctx, p)) { + std::vector h_buffer; + auto h_acc = page.Impl()->GetHostAccessor(p_fmat->Info().feature_types.ConstDeviceSpan()); + ASSERT_EQ(h_acc.n_rows, 2); + ASSERT_EQ(h_acc.row_stride, 2); + ASSERT_EQ(h_acc.gidx_iter[0], 0); + ASSERT_EQ(h_acc.gidx_iter[1], 4); // cat 1 + ASSERT_EQ(h_acc.gidx_iter[2], 1); + ASSERT_EQ(h_acc.gidx_iter[3], 3); // cat 0 + } +} + struct ReadRowFunction { EllpackDeviceAccessor matrix; int row; diff --git a/tests/cpp/tree/gpu_hist/test_row_partitioner.cu b/tests/cpp/tree/gpu_hist/test_row_partitioner.cu index 14ea6fd70a4e..54bf17247432 100644 --- a/tests/cpp/tree/gpu_hist/test_row_partitioner.cu +++ b/tests/cpp/tree/gpu_hist/test_row_partitioner.cu @@ -6,15 +6,12 @@ #include #include -#include #include #include "../../../../src/tree/gpu_hist/row_partitioner.cuh" #include "../../helpers.h" #include "xgboost/base.h" -#include "xgboost/context.h" -#include "xgboost/task.h" -#include "xgboost/tree_model.h" +#include "../../helpers.h" // for RandomDataGenerator namespace xgboost::tree { void TestUpdatePositionBatch() { @@ -55,7 +52,9 @@ void TestSortPositionBatch(const std::vector& ridx_in, const std::vector ridx_tmp(ridx_in.size()); thrust::device_vector counts(segments.size()); - auto op = [=] __device__(auto ridx, int split_index, int data) { return ridx % 2 == 0; }; + auto op = [=] __device__(auto ridx, int split_index, int data) { + return ridx % 2 == 0; + }; std::vector op_data(segments.size()); std::vector> h_batch_info(segments.size()); dh::TemporaryArray> d_batch_info(segments.size()); @@ -73,7 +72,9 @@ void TestSortPositionBatch(const std::vector& ridx_in, const std::vector& ridx_in, const std::vector #include #include -#include -#include -#include + +#include #include +#include + +#include "../../../src/common/device_helpers.cuh" #include "../../../src/tree/constraints.cuh" #include "../../../src/tree/param.h" -#include "../../../src/common/device_helpers.cuh" namespace xgboost { namespace { @@ -36,9 +37,7 @@ std::string GetConstraintsStr() { } tree::TrainParam GetParameter() { - std::vector> args{ - {"interaction_constraints", GetConstraintsStr()} - }; + Args args{{"interaction_constraints", GetConstraintsStr()}}; tree::TrainParam param; param.Init(args); return param; diff --git a/tests/python/test_ranking.py b/tests/python/test_ranking.py index 49508f594c52..d93d48a90561 100644 --- a/tests/python/test_ranking.py +++ b/tests/python/test_ranking.py @@ -202,6 +202,12 @@ def after_training(self, model) -> bool: # less biased on low ranks. assert df["ti+"].iloc[-1] < df["ti+"].iloc[0] + # Training continuation + ltr.fit(x, c, qid=q, eval_set=[(x, c)], eval_qid=[q], xgb_model=ltr) + # normalized + np.testing.assert_allclose(df["ti+"].iloc[0], 1.0) + np.testing.assert_allclose(df["tj-"].iloc[0], 1.0) + def test_normalization() -> None: run_normalization("cpu")