Merge branch 'vertical-federated-learning' into SecureBoostP2

ZiyueXu77 · Apr 29, 2024 · 82ad9a8 · 82ad9a8
2 parents 38e9d3d + 9ecfc84
commit 82ad9a8
Show file tree

Hide file tree

Showing 9 changed files with 155 additions and 45 deletions.
diff --git a/.github/workflows/python_wheels.yml b/.github/workflows/python_wheels.yml
@@ -5,37 +5,47 @@ on: [push, pull_request]
 permissions:
   contents: read # to fetch code (actions/checkout)
 
+defaults:
+  run:
+    shell: bash -l {0}
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 jobs:
   python-wheels:
     name: Build wheel for ${{ matrix.platform_id }}
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
         include:
-        - os: macos-latest
+        - os: macos-13
           platform_id: macosx_x86_64
-        - os: macos-latest
+        - os: macos-14
           platform_id: macosx_arm64
     steps:
     - uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
       with:
         submodules: 'true'
-    - name: Setup Python
-      uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
+    - uses: conda-incubator/setup-miniconda@v3.0.4
       with:
-        python-version: "3.8"
+        miniforge-variant: Mambaforge
+        miniforge-version: latest
+        python-version: 3.9
+        use-mamba: true
     - name: Build wheels
       run: bash tests/ci_build/build_python_wheels.sh ${{ matrix.platform_id }} ${{ github.sha }}
     - name: Extract branch name
-      shell: bash
-      run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})"
+      run: |
+        echo "branch=${GITHUB_REF#refs/heads/}" >> "$GITHUB_OUTPUT"
       id: extract_branch
       if: github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')
     - name: Upload Python wheel
       if: github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')
       run: |
         python -m pip install awscli
-        python -m awscli s3 cp wheelhouse/*.whl s3://xgboost-nightly-builds/${{ steps.extract_branch.outputs.branch }}/ --acl public-read
+        python -m awscli s3 cp wheelhouse/*.whl s3://xgboost-nightly-builds/${{ steps.extract_branch.outputs.branch }}/ --acl public-read --region us-west-2
       env:
         AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }}
         AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }}
diff --git a/.github/workflows/r_tests.yml b/.github/workflows/r_tests.yml
@@ -25,7 +25,7 @@ jobs:
       with:
         submodules: 'true'
 
-    - uses: r-lib/actions/setup-r@e40ad904310fc92e96951c1b0d64f3de6cbe9e14 # v2.6.5
+    - uses: r-lib/actions/setup-r@b7e68d63e51bdf225997973e2add36d551f60f02 # v2.8.7
       with:
         r-version: ${{ matrix.config.r }}
 
@@ -63,7 +63,7 @@ jobs:
       with:
         submodules: 'true'
 
-    - uses: r-lib/actions/setup-r@e40ad904310fc92e96951c1b0d64f3de6cbe9e14 # v2.6.5
+    - uses: r-lib/actions/setup-r@b7e68d63e51bdf225997973e2add36d551f60f02 # v2.8.7
       with:
         r-version: ${{ matrix.config.r }}
 

diff --git a/python-package/packager/nativelib.py b/python-package/packager/nativelib.py
@@ -53,12 +53,6 @@ def _build(*, generator: str) -> None:
         ]
         cmake_cmd.extend(build_config.get_cmake_args())
 
-        # Flag for cross-compiling for Apple Silicon
-        # We use environment variable because it's the only way to pass down custom
-        # flags through the cibuildwheel package, which calls `pip wheel` command.
-        if "CIBW_TARGET_OSX_ARM64" in os.environ:
-            cmake_cmd.append("-DCMAKE_OSX_ARCHITECTURES=arm64")
-
         logger.info("CMake args: %s", str(cmake_cmd))
         subprocess.check_call(cmake_cmd, cwd=build_dir)
 

diff --git a/tests/buildkite/infrastructure/README.md b/tests/buildkite/infrastructure/README.md
@@ -0,0 +1,106 @@
+BuildKite CI Infrastructure
+===========================
+
+# Worker image builder (`worker-image-pipeline/`)
+
+Use EC2 Image Builder to build machine images in a deterministic fashion.
+The machine images are used to initialize workers in the CI/CD pipelines.
+
+## Editing bootstrap scripts
+
+Currently, we create two pipelines for machine images: one for Linux workers and another
+for Windows workers.
+You can edit the bootstrap scripts to change how the worker machines are initialized.
+
+* `linux-amd64-gpu-bootstrap.yml`: Bootstrap script for Linux worker machines
+* `windows-gpu-bootstrap.yml`: Bootstrap script for Windows worker machines
+
+## Creating and running Image Builder pipelines
+
+Run the following commands to create and run pipelines in EC2 Image Builder service:
+```bash
+python worker-image-pipeline/create_worker_image_pipelines.py --aws-region us-west-2
+python worker-image-pipeline/run_pipelines.py --aws-region us-west-2
+```
+Go to the AWS CloudFormation console and verify the existence of two CloudFormation stacks:
+* `buildkite-windows-gpu-worker`
+* `buildkite-linux-amd64-gpu-worker`
+
+Then go to the EC2 Image Builder console to check the status of the image builds. You may
+want to inspect the log output should a build fails.
+Once the new machine images are done building, see the next section to deploy the new
+images to the worker machines.
+
+# Elastic CI Stack for AWS (`aws-stack-creator/`)
+
+Use EC2 Autoscaling groups to launch worker machines in EC2. BuildKite periodically sends
+messages to the Autoscaling groups to increase or decrease the number of workers according
+to the number of outstanding testing jobs.
+
+## Deploy an updated CI stack with new machine images
+
+First, edit `aws-stack-creator/metadata.py` to update the `AMI_ID` fields:
+```python
+AMI_ID = {
+    # Managed by XGBoost team
+    "linux-amd64-gpu": {
+        "us-west-2": "...",
+    },
+    "linux-amd64-mgpu": {
+        "us-west-2": "...",
+    },
+    "windows-gpu": {
+        "us-west-2": "...",
+    },
+    "windows-cpu": {
+        "us-west-2": "...",
+    },
+    # Managed by BuildKite
+    # from https://s3.amazonaws.com/buildkite-aws-stack/latest/aws-stack.yml
+    "linux-amd64-cpu": {
+        "us-west-2": "...",
+    },
+    "pipeline-loader": {
+        "us-west-2": "...",
+    },
+    "linux-arm64-cpu": {
+        "us-west-2": "...",
+    },
+}
+```
+AMI IDs uniquely identify the machine images in the EC2 service.
+Go to the EC2 Image Builder console to find the AMI IDs for the new machine images
+(see the previous section), and update the following fields:
+
+* `AMI_ID["linux-amd64-gpu"]["us-west-2"]`:
+  Use the latest output from the `buildkite-linux-amd64-gpu-worker` pipeline
+* `AMI_ID["linux-amd64-mgpu"]["us-west-2"]`:
+  Should be identical to `AMI_ID["linux-amd64-gpu"]["us-west-2"]`
+* `AMI_ID["windows-gpu"]["us-west-2"]`:
+  Use the latest output from the `buildkite-windows-gpu-worker` pipeline
+* `AMI_ID["windows-cpu"]["us-west-2"]`:
+  Should be identical to  `AMI_ID["windows-gpu"]["us-west-2"]`
+
+Next, visit https://s3.amazonaws.com/buildkite-aws-stack/latest/aws-stack.yml
+to look up the AMI IDs for the following fields:
+
+* `AMI_ID["linux-amd64-cpu"]["us-west-2"]`: Copy and paste the AMI ID from the field
+  `Mappings/AWSRegion2AMI/us-west-2/linuxamd64`
+* `AMI_ID["pipeline-loader"]["us-west-2"]`:
+   Should be identical to `AMI_ID["linux-amd64-cpu"]["us-west-2"]`
+* `AMI_ID["linux-arm64-cpu"]["us-west-2"]`: Copy and paste the AMI ID from the field
+  `Mappings/AWSRegion2AMI/us-west-2/linuxarm64`
+
+Finally, run the following commands to deploy the new machine images:
+```
+python aws-stack-creator/create_stack.py --aws-region us-west-2 --agent-token AGENT_TOKEN
+```
+Go to the AWS CloudFormation console and verify the existence of the following
+CloudFormation stacks:
+* `buildkite-pipeline-loader-autoscaling-group`
+* `buildkite-linux-amd64-cpu-autoscaling-group`
+* `buildkite-linux-amd64-gpu-autoscaling-group`
+* `buildkite-linux-amd64-mgpu-autoscaling-group`
+* `buildkite-linux-arm64-cpu-autoscaling-group`
+* `buildkite-windows-cpu-autoscaling-group`
+* `buildkite-windows-gpu-autoscaling-group`
diff --git a/tests/buildkite/infrastructure/aws-stack-creator/metadata.py b/tests/buildkite/infrastructure/aws-stack-creator/metadata.py
@@ -1,27 +1,27 @@
 AMI_ID = {
     # Managed by XGBoost team
     "linux-amd64-gpu": {
-        "us-west-2": "ami-08c3bc1dd5ec8bc5c",
+        "us-west-2": "ami-070080d04e81c5e39",
     },
     "linux-amd64-mgpu": {
-        "us-west-2": "ami-08c3bc1dd5ec8bc5c",
+        "us-west-2": "ami-070080d04e81c5e39",
     },
     "windows-gpu": {
-        "us-west-2": "ami-03c7f2156f93b22a7",
+        "us-west-2": "ami-07c14abcf529d816a",
     },
     "windows-cpu": {
-        "us-west-2": "ami-03c7f2156f93b22a7",
+        "us-west-2": "ami-07c14abcf529d816a",
     },
     # Managed by BuildKite
     # from https://s3.amazonaws.com/buildkite-aws-stack/latest/aws-stack.yml
     "linux-amd64-cpu": {
-        "us-west-2": "ami-015e64acb52b3e595",
+        "us-west-2": "ami-0180f7fb0f07eb0bc",
     },
     "pipeline-loader": {
-        "us-west-2": "ami-015e64acb52b3e595",
+        "us-west-2": "ami-0180f7fb0f07eb0bc",
     },
     "linux-arm64-cpu": {
-        "us-west-2": "ami-0884e9c23a2fa98d0",
+        "us-west-2": "ami-00686bdc2043a5505",
     },
 }
 

diff --git a/tests/buildkite/infrastructure/worker-image-pipeline/windows-gpu-bootstrap.yml b/tests/buildkite/infrastructure/worker-image-pipeline/windows-gpu-bootstrap.yml
@@ -15,9 +15,9 @@ phases:
               choco --version
               choco feature enable -n=allowGlobalConfirmation
 
-              # CMake 3.27
-              Write-Host '>>> Installing CMake 3.27...'
-              choco install cmake --version 3.27.9 --installargs "ADD_CMAKE_TO_PATH=System"
+              # CMake 3.29.2
+              Write-Host '>>> Installing CMake 3.29.2...'
+              choco install cmake --version 3.29.2 --installargs "ADD_CMAKE_TO_PATH=System"
               if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
 
               # Notepad++
@@ -53,9 +53,9 @@ phases:
                   "--wait --passive --norestart --includeOptional"
               if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
 
-              # Install CUDA 11.8
-              Write-Host '>>> Installing CUDA 11.8...'
-              choco install cuda --version=11.8.0.52206
+              # Install CUDA 12.4
+              Write-Host '>>> Installing CUDA 12.4...'
+              choco install cuda --version=12.4.1.551
               if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
 
               # Install R

diff --git a/tests/ci_build/Dockerfile.gpu b/tests/ci_build/Dockerfile.gpu
@@ -21,14 +21,14 @@ ENV PATH=/opt/mambaforge/bin:$PATH
 
 # Create new Conda environment with cuDF, Dask, and cuPy
 RUN \
-    conda install -c conda-forge mamba && \
-    mamba create -n gpu_test -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults \
+    export NCCL_SHORT_VER=$(echo "$NCCL_VERSION_ARG" | cut -d "-" -f 1) && \
+    mamba create -y -n gpu_test -c rapidsai -c nvidia -c conda-forge \
         python=3.10 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG \
-        nccl>=$(cut -d "-" -f 1 << $NCCL_VERSION_ARG) \
-        dask \
+        "nccl>=${NCCL_SHORT_VER}" \
+        dask=2024.1.1 \
         dask-cuda=$RAPIDS_VERSION_ARG* dask-cudf=$RAPIDS_VERSION_ARG* cupy \
         numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis \
-        pyspark>=3.4.0 cloudpickle cuda-python && \
+        "pyspark>=3.4.0" cloudpickle cuda-python && \
     mamba clean --all && \
     conda run --no-capture-output -n gpu_test pip install buildkite-test-collector
 

diff --git a/tests/ci_build/build_python_wheels.sh b/tests/ci_build/build_python_wheels.sh
@@ -25,16 +25,16 @@ if [[ "$platform_id" == macosx_* ]]; then
         # arm64 builds must cross compile because CI is on x64
         # cibuildwheel will take care of cross-compilation.
         wheel_tag=macosx_12_0_arm64
-        cpython_ver=38
-        setup_env_var='CIBW_TARGET_OSX_ARM64=1'  # extra flag to be passed to xgboost.packager backend
-        export PYTHON_CROSSENV=1
+        cpython_ver=39
+        cibw_archs=arm64
         export MACOSX_DEPLOYMENT_TARGET=12.0
         #OPENMP_URL="https://anaconda.org/conda-forge/llvm-openmp/11.1.0/download/osx-arm64/llvm-openmp-11.1.0-hf3c4609_1.tar.bz2"
         OPENMP_URL="https://xgboost-ci-jenkins-artifacts.s3.us-west-2.amazonaws.com/llvm-openmp-11.1.0-hf3c4609_1-osx-arm64.tar.bz2"
     elif [[ "$platform_id" == macosx_x86_64 ]]; then
         # MacOS, Intel
         wheel_tag=macosx_10_15_x86_64.macosx_11_0_x86_64.macosx_12_0_x86_64
-        cpython_ver=38
+        cpython_ver=39
+        cibw_archs=x86_64
         export MACOSX_DEPLOYMENT_TARGET=10.15
         #OPENMP_URL="https://anaconda.org/conda-forge/llvm-openmp/11.1.0/download/osx-64/llvm-openmp-11.1.0-hda6cdc1_1.tar.bz2"
         OPENMP_URL="https://xgboost-ci-jenkins-artifacts.s3.us-west-2.amazonaws.com/llvm-openmp-11.1.0-hda6cdc1_1-osx-64.tar.bz2"
@@ -44,13 +44,13 @@ if [[ "$platform_id" == macosx_* ]]; then
     fi
     # Set up environment variables to configure cibuildwheel
     export CIBW_BUILD=cp${cpython_ver}-${platform_id}
-    export CIBW_ARCHS=all
+    export CIBW_ARCHS=${cibw_archs}
     export CIBW_ENVIRONMENT=${setup_env_var}
     export CIBW_TEST_SKIP='*-macosx_arm64'
     export CIBW_BUILD_VERBOSITY=3
 
-    sudo conda create -n build $OPENMP_URL
-    PREFIX="/usr/local/miniconda/envs/build"
+    mamba create -n build $OPENMP_URL
+    PREFIX="$HOME/miniconda3/envs/build"
 
     # Set up build flags for cibuildwheel
     # This is needed to bundle libomp lib we downloaded earlier

diff --git a/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py b/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py
@@ -113,8 +113,8 @@ def run_with_dask_array(DMatrixT: Type, client: Client) -> None:
     cp.cuda.runtime.setDevice(0)
     X, y, _ = generate_array()
 
-    X = X.map_blocks(cp.asarray)
-    y = y.map_blocks(cp.asarray)
+    X = X.map_blocks(cp.asarray)  # type: ignore
+    y = y.map_blocks(cp.asarray)  # type: ignore
     dtrain = DMatrixT(client, X, y)
     out = dxgb.train(
         client,
@@ -648,8 +648,8 @@ async def run_from_dask_array_asyncio(scheduler_address: str) -> dxgb.TrainRetur
         import cupy as cp
 
         X, y, _ = generate_array()
-        X = X.map_blocks(cp.array)
-        y = y.map_blocks(cp.array)
+        X = X.map_blocks(cp.array)  # type: ignore
+        y = y.map_blocks(cp.array)  # type: ignore
 
         m = await xgb.dask.DaskQuantileDMatrix(client, X, y)
         output = await xgb.dask.train(