From 3402a5373325f68bfdecfcac77548540930fba67 Mon Sep 17 00:00:00 2001 From: Sergey Lebedev Date: Mon, 5 Dec 2022 08:32:23 +0000 Subject: [PATCH] TL/SHARP: fix assert check --- .github/workflows/clang-tidy-nvidia.yaml | 11 ++++++++++- src/components/tl/sharp/tl_sharp_context.c | 6 +++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/.github/workflows/clang-tidy-nvidia.yaml b/.github/workflows/clang-tidy-nvidia.yaml index bef88a5b3e..f44c7c6563 100644 --- a/.github/workflows/clang-tidy-nvidia.yaml +++ b/.github/workflows/clang-tidy-nvidia.yaml @@ -4,6 +4,8 @@ on: [push, pull_request] env: OPEN_UCX_LINK: https://github.com/openucx/ucx + HPCX_LINK: http://content.mellanox.com/hpc/hpc-x/v2.13/hpcx-v2.13-gcc-MLNX_OFED_LINUX-5-ubuntu20.04-cuda11-gdrcopy2-nccl2.12-x86_64.tbz + SHARP_DIR: /tmp/ OPEN_UCX_BRANCH: master CLANG_VER: 12 CUDA_VER: 11-4 @@ -15,6 +17,7 @@ jobs: run: | sudo apt-get update sudo apt-get install -y --no-install-recommends wget lsb-core software-properties-common + sudo apt-get install -y --no-install-recommends libibverbs-dev rdma-core rdmacm-utils libibumad-dev wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key| sudo apt-key add - sudo apt-add-repository "deb http://apt.llvm.org/focal/ llvm-toolchain-focal-${CLANG_VER} main" sudo apt-get install -y --no-install-recommends doxygen doxygen-latex clang-tidy-${CLANG_VER} bear @@ -31,11 +34,17 @@ jobs: cd /tmp/ucx && ./autogen.sh CC=clang-${CLANG_VER} CXX=clang++-${CLANG_VER} ./contrib/configure-release --without-java --without-go --disable-numa --prefix $PWD/install make -j install + - name: Download HPCX + run: | + cd /tmp + wget ${HPCX_LINK} + tar xjf hpcx-v2.13-gcc-MLNX_OFED_LINUX-5-ubuntu20.04-cuda11-gdrcopy2-nccl2.12-x86_64.tbz + mv hpcx-v2.13-gcc-MLNX_OFED_LINUX-5-ubuntu20.04-cuda11-gdrcopy2-nccl2.12-x86_64 hpcx - uses: actions/checkout@v1 - name: Build UCC run: | ./autogen.sh - CC=clang-${CLANG_VER} CXX=clang++-${CLANG_VER} ./configure --prefix=/tmp/ucc/install --with-ucx=/tmp/ucx/install --with-cuda=/usr/local/cuda --with-nvcc-gencode="-gencode=arch=compute_80,code=sm_80" --enable-assert + LDFLAGS="-L/tmp/ucx/install/lib/ -lucp -luct -lucs -lucm" CC=clang-${CLANG_VER} CXX=clang++-${CLANG_VER} ./configure --with-sharp=/tmp/hpcx/sharp --prefix=/tmp/ucc/install --with-ucx=/tmp/ucx/install --with-cuda=/usr/local/cuda --with-nvcc-gencode="-gencode=arch=compute_80,code=sm_80" --enable-assert bear --exclude ${GITHUB_WORKSPACE}/src/components/ec/cuda/kernel/ --exclude ${GITHUB_WORKSPACE}/src/components/mc/cuda/kernel/ --cdb /tmp/compile_commands.json make - name: Run clang-tidy run: | diff --git a/src/components/tl/sharp/tl_sharp_context.c b/src/components/tl/sharp/tl_sharp_context.c index 589a9441a4..6def07ea46 100644 --- a/src/components/tl/sharp/tl_sharp_context.c +++ b/src/components/tl/sharp/tl_sharp_context.c @@ -28,7 +28,7 @@ static int ucc_tl_sharp_oob_barrier(void *arg) status = oob_coll->allgather(&sbuf, rbuf, sizeof(char), oob_coll->coll_info, &req); if (UCC_OK == status) { - ucc_assert(req); + ucc_assert(req != NULL); while (UCC_OK != (status = oob_coll->req_test(req))) { if (status < 0) { tl_error(ctx->super.super.lib, "failed to test oob req"); @@ -66,7 +66,7 @@ static int ucc_tl_sharp_oob_gather(void *arg, int root, void *sbuf, status = oob_coll->allgather(sbuf, rbuf, msg_size, oob_coll->coll_info, &req); if (UCC_OK == status) { - ucc_assert(req); + ucc_assert(req != NULL); while (UCC_OK != (status = oob_coll->req_test(req))) { if (status < 0) { tl_error(ctx->super.super.lib, "failed to test oob req"); @@ -103,7 +103,7 @@ static int ucc_tl_sharp_oob_bcast(void *arg, void *buf, int size, int root) status = oob_coll->allgather(buf, tmp_rbuf, msg_size, oob_coll ->coll_info, &req); if (UCC_OK == status) { - ucc_assert(req); + ucc_assert(req != NULL); while (UCC_OK != (status = oob_coll ->req_test(req))) { if (status < 0) { tl_error(ctx->super.super.lib, "failed to test oob req");