diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..2e45df5 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,767 @@ +name: Test + +on: [pull_request, merge_group] + +jobs: + test-ubuntu-24-04: + name: Ubuntu 24.04 + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@v4 + + - uses: conda-incubator/setup-miniconda@v3 + with: + auto-update-conda: true + + - name: Install Toolchain + run: | + sudo apt-get update + sudo apt-get -y install {gcc,g++}-{9,10,11,12,13,14,mingw-w64-x86-64} clang-{14,15,16,17,18} ninja-build + conda create -y --name cuda-env + conda install -y --name cuda-env cuda-minimal-build + rm -f "$CONDA/envs/cuda-env/bin/ld" + + - name: Build (GCC 9) + run: | + cd tests + mkdir build-gcc-9 + cd build-gcc-9 + cmake -G Ninja -DCMAKE_C_COMPILER=gcc-9 -DCMAKE_CXX_COMPILER=g++-9 -DCMAKE_CUDA_COMPILER="$CONDA/envs/cuda-env/bin/nvcc" .. + ninja + - name: Build (GCC 10) + run: | + cd tests + mkdir build-gcc-10 + cd build-gcc-10 + cmake -G Ninja -DCMAKE_C_COMPILER=gcc-10 -DCMAKE_CXX_COMPILER=g++-10 -DCMAKE_CUDA_COMPILER="$CONDA/envs/cuda-env/bin/nvcc" .. + ninja + - name: Build (GCC 11) + run: | + cd tests + mkdir build-gcc-11 + cd build-gcc-11 + cmake -G Ninja -DCMAKE_C_COMPILER=gcc-11 -DCMAKE_CXX_COMPILER=g++-11 -DCMAKE_CUDA_COMPILER="$CONDA/envs/cuda-env/bin/nvcc" .. + ninja + - name: Build (GCC 12) + run: | + cd tests + mkdir build-gcc-12 + cd build-gcc-12 + cmake -G Ninja -DCMAKE_C_COMPILER=gcc-12 -DCMAKE_CXX_COMPILER=g++-12 -DCMAKE_CUDA_COMPILER="$CONDA/envs/cuda-env/bin/nvcc" .. + ninja + - name: Build (GCC 13) + run: | + cd tests + mkdir build-gcc-13 + cd build-gcc-13 + cmake -G Ninja -DCMAKE_C_COMPILER=gcc-13 -DCMAKE_CXX_COMPILER=g++-13 -DCMAKE_CUDA_COMPILER="$CONDA/envs/cuda-env/bin/nvcc" .. + ninja + + - name: Build (Clang 14) + run: | + cd tests + mkdir build-clang-14 + cd build-clang-14 + cmake -G Ninja -DCMAKE_C_COMPILER=clang-14 -DCMAKE_CXX_COMPILER=clang++-14 -DCMAKE_CUDA_COMPILER="$CONDA/envs/cuda-env/bin/nvcc" .. + ninja + - name: Build (Clang 15) + run: | + cd tests + mkdir build-clang-15 + cd build-clang-15 + cmake -G Ninja -DCMAKE_C_COMPILER=clang-15 -DCMAKE_CXX_COMPILER=clang++-15 -DCMAKE_CUDA_COMPILER="$CONDA/envs/cuda-env/bin/nvcc" .. + ninja + - name: Build (Clang 16) + run: | + cd tests + mkdir build-clang-16 + cd build-clang-16 + cmake -G Ninja -DCMAKE_C_COMPILER=clang-16 -DCMAKE_CXX_COMPILER=clang++-16 -DCMAKE_CUDA_COMPILER="$CONDA/envs/cuda-env/bin/nvcc" .. + ninja + - name: Build (Clang 17) + run: | + cd tests + mkdir build-clang-17 + cd build-clang-17 + cmake -G Ninja -DCMAKE_C_COMPILER=clang-17 -DCMAKE_CXX_COMPILER=clang++-17 -DCMAKE_CUDA_COMPILER="$CONDA/envs/cuda-env/bin/nvcc" .. + ninja + + - name: Build (MinGW-w64 GCC) + run: | + cd tests + mkdir build-mingw + cd build-mingw + cmake -G Ninja -DCMAKE_TOOLCHAIN_FILE=../mingw-w64-x86_64.cmake -DENABLE_CUDA=False .. + ninja + + - name: Run (GCC 9) + run: | + cd tests/build-gcc-9 + ctest -V + - name: Run (GCC 10) + run: | + cd tests/build-gcc-10 + ctest -V + - name: Run (GCC 11) + run: | + cd tests/build-gcc-11 + ctest -V + - name: Run (GCC 12) + run: | + cd tests/build-gcc-12 + ctest -V + - name: Run (GCC 13) + run: | + cd tests/build-gcc-13 + ctest -V + + - name: Run (Clang 14) + run: | + cd tests/build-clang-14 + ctest -V + - name: Run (Clang 15) + run: | + cd tests/build-clang-15 + ctest -V + - name: Run (Clang 16) + run: | + cd tests/build-clang-16 + ctest -V + - name: Run (Clang 17) + run: | + cd tests/build-clang-17 + ctest -V + + test-ubuntu-22-04: + name: Ubuntu 22.04 + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v4 + + - uses: conda-incubator/setup-miniconda@v3 + with: + auto-update-conda: true + + - name: Install Toolchain + run: | + sudo apt-get update + sudo apt-get -y install {gcc,g++}-{9,10,11,12,mingw-w64-x86-64} clang-{11,12,13,14,15} ninja-build + conda create -y --name cuda-env + conda install -y --name cuda-env cuda-minimal-build + rm -f "$CONDA/envs/cuda-env/bin/ld" + + - name: Build (GCC 9) + run: | + cd tests + mkdir build-gcc-9 + cd build-gcc-9 + cmake -G Ninja -DCMAKE_C_COMPILER=gcc-9 -DCMAKE_CXX_COMPILER=g++-9 -DCMAKE_CUDA_COMPILER="$CONDA/envs/cuda-env/bin/nvcc" .. + ninja + - name: Build (GCC 10) + run: | + cd tests + mkdir build-gcc-10 + cd build-gcc-10 + cmake -G Ninja -DCMAKE_C_COMPILER=gcc-10 -DCMAKE_CXX_COMPILER=g++-10 -DCMAKE_CUDA_COMPILER="$CONDA/envs/cuda-env/bin/nvcc" .. + ninja + - name: Build (GCC 11) + run: | + cd tests + mkdir build-gcc-11 + cd build-gcc-11 + cmake -G Ninja -DCMAKE_C_COMPILER=gcc-11 -DCMAKE_CXX_COMPILER=g++-11 -DCMAKE_CUDA_COMPILER="$CONDA/envs/cuda-env/bin/nvcc" .. + ninja + - name: Build (GCC 12) + run: | + cd tests + mkdir build-gcc-12 + cd build-gcc-12 + cmake -G Ninja -DCMAKE_C_COMPILER=gcc-12 -DCMAKE_CXX_COMPILER=g++-12 -DCMAKE_CUDA_COMPILER="$CONDA/envs/cuda-env/bin/nvcc" .. + ninja + + - name: Build (Clang 11) + run: | + cd tests + mkdir build-clang-11 + cd build-clang-11 + cmake -G Ninja -DCMAKE_C_COMPILER=clang-11 -DCMAKE_CXX_COMPILER=clang++-11 -DCMAKE_CUDA_COMPILER="$CONDA/envs/cuda-env/bin/nvcc" .. + ninja + - name: Build (Clang 12) + run: | + cd tests + mkdir build-clang-12 + cd build-clang-12 + cmake -G Ninja -DCMAKE_C_COMPILER=clang-12 -DCMAKE_CXX_COMPILER=clang++-12 -DCMAKE_CUDA_COMPILER="$CONDA/envs/cuda-env/bin/nvcc" .. + ninja + - name: Build (Clang 13) + run: | + cd tests + mkdir build-clang-13 + cd build-clang-13 + cmake -G Ninja -DCMAKE_C_COMPILER=clang-13 -DCMAKE_CXX_COMPILER=clang++-13 -DCMAKE_CUDA_COMPILER="$CONDA/envs/cuda-env/bin/nvcc" .. + ninja + - name: Build (Clang 14) + run: | + cd tests + mkdir build-clang-14 + cd build-clang-14 + cmake -G Ninja -DCMAKE_C_COMPILER=clang-14 -DCMAKE_CXX_COMPILER=clang++-14 -DCMAKE_CUDA_COMPILER="$CONDA/envs/cuda-env/bin/nvcc" .. + ninja + - name: Build (Clang 15) + run: | + cd tests + mkdir build-clang-15 + cd build-clang-15 + cmake -G Ninja -DCMAKE_C_COMPILER=clang-15 -DCMAKE_CXX_COMPILER=clang++-15 -DCMAKE_CUDA_COMPILER="$CONDA/envs/cuda-env/bin/nvcc" .. + ninja + + - name: Build (MinGW-w64 GCC) + run: | + cd tests + mkdir build-mingw + cd build-mingw + cmake -G Ninja -DCMAKE_TOOLCHAIN_FILE=../mingw-w64-x86_64.cmake -DENABLE_CUDA=False .. + ninja + + - name: Run (GCC 9) + run: | + cd tests/build-gcc-9 + ctest -V + - name: Run (GCC 10) + run: | + cd tests/build-gcc-10 + ctest -V + - name: Run (GCC 11) + run: | + cd tests/build-gcc-11 + ctest -V + - name: Run (GCC 12) + run: | + cd tests/build-gcc-12 + ctest -V + + - name: Run (Clang 11) + run: | + cd tests/build-clang-11 + ctest -V + - name: Run (Clang 12) + run: | + cd tests/build-clang-12 + ctest -V + - name: Run (Clang 13) + run: | + cd tests/build-clang-13 + ctest -V + - name: Run (Clang 14) + run: | + cd tests/build-clang-14 + ctest -V + - name: Run (Clang 15) + run: | + cd tests/build-clang-15 + ctest -V + + test-ubuntu-20-04: + name: Ubuntu 20.04 + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v4 + + - uses: conda-incubator/setup-miniconda@v3 + with: + auto-update-conda: true + + - name: Install Toolchain + run: | + sudo apt-get update + sudo apt-get -y install {gcc,g++}-{7,8,9,10,mingw-w64-x86-64} clang-{7,8,9,10,11,12} ninja-build + conda create -y --name cuda-env + conda install -y --name cuda-env cuda-minimal-build + rm -f "$CONDA/envs/cuda-env/bin/ld" + + - name: Build (GCC 7) + run: | + cd tests + mkdir build-gcc-7 + cd build-gcc-7 + cmake -G Ninja -DCMAKE_C_COMPILER=gcc-7 -DCMAKE_CXX_COMPILER=g++-7 -DCMAKE_CUDA_COMPILER="$CONDA/envs/cuda-env/bin/nvcc" .. + ninja + - name: Build (GCC 8) + run: | + cd tests + mkdir build-gcc-8 + cd build-gcc-8 + cmake -G Ninja -DCMAKE_C_COMPILER=gcc-8 -DCMAKE_CXX_COMPILER=g++-8 -DCMAKE_CUDA_COMPILER="$CONDA/envs/cuda-env/bin/nvcc" .. + ninja + - name: Build (GCC 9) + run: | + cd tests + mkdir build-gcc-9 + cd build-gcc-9 + cmake -G Ninja -DCMAKE_C_COMPILER=gcc-9 -DCMAKE_CXX_COMPILER=g++-9 -DCMAKE_CUDA_COMPILER="$CONDA/envs/cuda-env/bin/nvcc" .. + ninja + - name: Build (GCC 10) + run: | + cd tests + mkdir build-gcc-10 + cd build-gcc-10 + cmake -G Ninja -DCMAKE_C_COMPILER=gcc-10 -DCMAKE_CXX_COMPILER=g++-10 -DCMAKE_CUDA_COMPILER="$CONDA/envs/cuda-env/bin/nvcc" .. + ninja + + - name: Build (Clang 7) + run: | + cd tests + mkdir build-clang-7 + cd build-clang-7 + cmake -G Ninja -DCMAKE_C_COMPILER=clang-7 -DCMAKE_CXX_COMPILER=clang++-7 -DCMAKE_CUDA_COMPILER="$CONDA/envs/cuda-env/bin/nvcc" .. + ninja + - name: Build (Clang 8) + run: | + cd tests + mkdir build-clang-8 + cd build-clang-8 + cmake -G Ninja -DCMAKE_C_COMPILER=clang-8 -DCMAKE_CXX_COMPILER=clang++-8 -DCMAKE_CUDA_COMPILER="$CONDA/envs/cuda-env/bin/nvcc" .. + ninja + - name: Build (Clang 9) + run: | + cd tests + mkdir build-clang-9 + cd build-clang-9 + cmake -G Ninja -DCMAKE_C_COMPILER=clang-9 -DCMAKE_CXX_COMPILER=clang++-9 -DCMAKE_CUDA_COMPILER="$CONDA/envs/cuda-env/bin/nvcc" .. + ninja + - name: Build (Clang 10) + run: | + cd tests + mkdir build-clang-10 + cd build-clang-10 + cmake -G Ninja -DCMAKE_C_COMPILER=clang-10 -DCMAKE_CXX_COMPILER=clang++-10 -DCMAKE_CUDA_COMPILER="$CONDA/envs/cuda-env/bin/nvcc" .. + ninja + - name: Build (Clang 11) + run: | + cd tests + mkdir build-clang-11 + cd build-clang-11 + cmake -G Ninja -DCMAKE_C_COMPILER=clang-11 -DCMAKE_CXX_COMPILER=clang++-11 -DCMAKE_CUDA_COMPILER="$CONDA/envs/cuda-env/bin/nvcc" .. + ninja + - name: Build (Clang 12) + run: | + cd tests + mkdir build-clang-12 + cd build-clang-12 + cmake -G Ninja -DCMAKE_C_COMPILER=clang-12 -DCMAKE_CXX_COMPILER=clang++-12 -DCMAKE_CUDA_COMPILER="$CONDA/envs/cuda-env/bin/nvcc" .. + ninja + + - name: Build (MinGW-w64 GCC) + run: | + cd tests + mkdir build-mingw + cd build-mingw + cmake -G Ninja -DCMAKE_TOOLCHAIN_FILE=../mingw-w64-x86_64.cmake -DENABLE_CUDA=False .. + ninja + + - name: Run (GCC 7) + run: | + cd tests/build-gcc-7 + ctest -V + - name: Run (GCC 8) + run: | + cd tests/build-gcc-8 + ctest -V + - name: Run (GCC 9) + run: | + cd tests/build-gcc-9 + ctest -V + - name: Run (GCC 10) + run: | + cd tests/build-gcc-10 + ctest -V + + - name: Run (Clang 7) + run: | + cd tests/build-clang-7 + ctest -V + - name: Run (Clang 8) + run: | + cd tests/build-clang-8 + ctest -V + - name: Run (Clang 9) + run: | + cd tests/build-clang-9 + ctest -V + - name: Run (Clang 10) + run: | + cd tests/build-clang-10 + ctest -V + - name: Run (Clang 11) + run: | + cd tests/build-clang-11 + ctest -V + - name: Run (Clang 12) + run: | + cd tests/build-clang-12 + ctest -V + + test-windows-2022: + name: Windows Server 2022 + runs-on: windows-2022 + steps: + - uses: actions/checkout@v4 + + - uses: conda-incubator/setup-miniconda@v3 + with: + auto-update-conda: true + + - name: Conda - Create Environment + shell: cmd + run: | + conda create -y --name cuda-env + + - name: Conda - Install CUDA Toolchain + shell: cmd + run: | + conda install -y --name cuda-env cuda-minimal-build + + - name: Build (VS 2022) + shell: cmd + run: | + cd tests + mkdir build-vs + cd build-vs + "%ProgramFiles%\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat" && "%ProgramFiles%\Microsoft Visual Studio\2022\Enterprise\Common7\IDE\CommonExtensions\Microsoft\CMake\CMake\bin\cmake.exe" -G Ninja -DCMAKE_CUDA_COMPILER="%CONDA:\=/%/envs/cuda-env/Library/bin/nvcc.exe" .. && ninja + + - name: Build (Clang-CL) + shell: cmd + run: | + cd tests + mkdir build-clang-cl + cd build-clang-cl + "%ProgramFiles%\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat" && "%ProgramFiles%\Microsoft Visual Studio\2022\Enterprise\Common7\IDE\CommonExtensions\Microsoft\CMake\CMake\bin\cmake.exe" -G Ninja -DCMAKE_C_COMPILER="%ProgramFiles:\=/%/Microsoft Visual Studio/2022/Enterprise/VC/Tools/Llvm/x64/bin/clang-cl.exe" -DCMAKE_CXX_COMPILER="%ProgramFiles:\=/%/Microsoft Visual Studio/2022/Enterprise/VC/Tools/Llvm/x64/bin/clang-cl.exe" -DCMAKE_LINKER="%ProgramFiles:\=/%/Microsoft Visual Studio/2022/Enterprise/VC/Tools/Llvm/x64/bin/lld-link.exe" -DENABLE_CUDA=False .. && ninja + + - name: Run (VS 2022) + shell: cmd + run: | + cd tests/build-vs + "%ProgramFiles%\Microsoft Visual Studio\2022\Enterprise\Common7\IDE\CommonExtensions\Microsoft\CMake\CMake\bin\ctest.exe" -V + + - name: Run (Clang-CL) + shell: cmd + run: | + cd tests/build-clang-cl + "%ProgramFiles%\Microsoft Visual Studio\2022\Enterprise\Common7\IDE\CommonExtensions\Microsoft\CMake\CMake\bin\ctest.exe" -V + + test-windows-2022-parallel: + name: Windows Server 2022 (parallel) + runs-on: windows-2022 + steps: + - uses: actions/checkout@v4 + + - name: Build (VS 2022 x86) + shell: cmd + run: | + cd tests + mkdir build-vs-x86 + cd build-vs-x86 + "%ProgramFiles%\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsamd64_x86.bat" && "%ProgramFiles%\Microsoft Visual Studio\2022\Enterprise\Common7\IDE\CommonExtensions\Microsoft\CMake\CMake\bin\cmake.exe" -G Ninja -DENABLE_CUDA=False .. && ninja + + - name: Build (LLVM Clang) + shell: cmd + run: | + cd tests + mkdir build-llvm-clang + cd build-llvm-clang + "%ProgramFiles%\CMake\bin\cmake.exe" -G Ninja -DCMAKE_MAKE_PROGRAM="%ProgramFiles%\Microsoft Visual Studio\2022\Enterprise\Common7\IDE\CommonExtensions\Microsoft\CMake\Ninja\ninja.exe" -DCMAKE_C_COMPILER="%ProgramFiles%/LLVM/bin/clang.exe" -DCMAKE_CXX_COMPILER="%ProgramFiles%/LLVM/bin/clang++.exe" -DCMAKE_LINKER="%ProgramFiles%/LLVM/bin/lld.exe" -DENABLE_CUDA=False .. + "%ProgramFiles%\Microsoft Visual Studio\2022\Enterprise\Common7\IDE\CommonExtensions\Microsoft\CMake\Ninja\ninja.exe" + + - name: Build (VS 2022 Arm64) + shell: cmd + run: | + cd tests + mkdir build-vs-arm64 + cd build-vs-arm64 + "%ProgramFiles%\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsamd64_arm64.bat" && "%ProgramFiles%\Microsoft Visual Studio\2022\Enterprise\Common7\IDE\CommonExtensions\Microsoft\CMake\CMake\bin\cmake.exe" -G Ninja -DENABLE_CUDA=False .. && ninja + + - name: Run (VS 2022 x86) + shell: cmd + run: | + cd tests/build-vs-x86 + "%ProgramFiles%\Microsoft Visual Studio\2022\Enterprise\Common7\IDE\CommonExtensions\Microsoft\CMake\CMake\bin\ctest.exe" -V + + - name: Run (LLVM Clang) + shell: cmd + run: | + cd tests/build-llvm-clang + "%ProgramFiles%\CMake\bin\ctest.exe" -V + + test-windows-2019: + name: Windows Server 2019 + runs-on: windows-2019 + steps: + - uses: actions/checkout@v4 + + - uses: conda-incubator/setup-miniconda@v3 + with: + auto-update-conda: true + + - name: Conda - Create Environment + shell: cmd + run: | + conda create -y --name cuda-env + + - name: Conda - Install CUDA Toolchain + shell: cmd + run: | + conda install -y --name cuda-env cuda-minimal-build + + - name: Build (VS 2019) + shell: cmd + run: | + cd tests + mkdir build-vs + cd build-vs + "%ProgramFiles(x86)%\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat" && "%ProgramFiles(x86)%\Microsoft Visual Studio\2019\Enterprise\Common7\IDE\CommonExtensions\Microsoft\CMake\CMake\bin\cmake.exe" -G Ninja -DCMAKE_CUDA_COMPILER="%CONDA:\=/%/envs/cuda-env/Library/bin/nvcc.exe" .. && ninja + + - name: Build (Clang-CL) + shell: cmd + run: | + cd tests + mkdir build-clang-cl + cd build-clang-cl + "%ProgramFiles(x86)%\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat" && "%ProgramFiles(x86)%\Microsoft Visual Studio\2019\Enterprise\Common7\IDE\CommonExtensions\Microsoft\CMake\CMake\bin\cmake.exe" -G Ninja -DCMAKE_C_COMPILER="%ProgramFiles(x86):\=/%/Microsoft Visual Studio/2019/Enterprise/VC/Tools/Llvm/x64/bin/clang-cl.exe" -DCMAKE_CXX_COMPILER="%ProgramFiles(x86):\=/%/Microsoft Visual Studio/2019/Enterprise/VC/Tools/Llvm/x64/bin/clang-cl.exe" -DCMAKE_LINKER="%ProgramFiles(x86):\=/%/Microsoft Visual Studio/2019/Enterprise/VC/Tools/Llvm/x64/bin/lld-link.exe" -DENABLE_CUDA=False .. && ninja + + - name: Run (VS 2019) + shell: cmd + run: | + cd tests/build-vs + "%ProgramFiles(x86)%\Microsoft Visual Studio\2019\Enterprise\Common7\IDE\CommonExtensions\Microsoft\CMake\CMake\bin\ctest.exe" -V + + - name: Run (Clang-CL) + shell: cmd + run: | + cd tests/build-clang-cl + "%ProgramFiles(x86)%\Microsoft Visual Studio\2019\Enterprise\Common7\IDE\CommonExtensions\Microsoft\CMake\CMake\bin\ctest.exe" -V + + test-windows-2019-parallel: + name: Windows Server 2019 (parallel) + runs-on: windows-2019 + steps: + - uses: actions/checkout@v4 + + - name: Build (VS 2019 x86) + shell: cmd + run: | + cd tests + mkdir build-vs-x86 + cd build-vs-x86 + "%ProgramFiles(x86)%\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsamd64_x86.bat" && "%ProgramFiles(x86)%\Microsoft Visual Studio\2019\Enterprise\Common7\IDE\CommonExtensions\Microsoft\CMake\CMake\bin\cmake.exe" -G Ninja -DENABLE_CUDA=False .. && ninja + + - name: Build (LLVM Clang) + shell: cmd + run: | + cd tests + mkdir build-llvm-clang + cd build-llvm-clang + "%ProgramFiles%\CMake\bin\cmake.exe" -G Ninja -DCMAKE_MAKE_PROGRAM="%ProgramFiles(x86)%\Microsoft Visual Studio\2019\Enterprise\Common7\IDE\CommonExtensions\Microsoft\CMake\Ninja\ninja.exe" -DCMAKE_C_COMPILER="%ProgramFiles%/LLVM/bin/clang.exe" -DCMAKE_CXX_COMPILER="%ProgramFiles%/LLVM/bin/clang++.exe" -DCMAKE_LINKER="%ProgramFiles%/LLVM/bin/lld.exe" -DENABLE_CUDA=False .. + "%ProgramFiles(x86)%\Microsoft Visual Studio\2019\Enterprise\Common7\IDE\CommonExtensions\Microsoft\CMake\Ninja\ninja.exe" + + - name: Build (VS 2019 Arm64) + shell: cmd + run: | + cd tests + mkdir build-vs-arm64 + cd build-vs-arm64 + "%ProgramFiles(x86)%\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsamd64_arm64.bat" && "%ProgramFiles(x86)%\Microsoft Visual Studio\2019\Enterprise\Common7\IDE\CommonExtensions\Microsoft\CMake\CMake\bin\cmake.exe" -G Ninja -DENABLE_CUDA=False .. && ninja + + - name: Run (VS 2019 x86) + shell: cmd + run: | + cd tests/build-vs-x86 + "%ProgramFiles(x86)%\Microsoft Visual Studio\2019\Enterprise\Common7\IDE\CommonExtensions\Microsoft\CMake\CMake\bin\ctest.exe" -V + + - name: Run (LLVM Clang) + shell: cmd + run: | + cd tests/build-llvm-clang + "%ProgramFiles%\CMake\bin\ctest.exe" -V + + test-macos-15: + name: macOS 15 + runs-on: macos-15 + steps: + - uses: actions/checkout@v4 + + - name: Install Toolchain + run: | + brew install --force-bottle ninja llvm lld mingw-w64 + + - name: Build (Apple Clang) + run: | + cd tests + mkdir build-appleclang + cd build-appleclang + cmake -G Ninja .. + ninja + + - name: Build (GCC) + run: | + cd tests + mkdir build-gcc + cd build-gcc + GCC_VERSION="$(brew list --versions gcc | cut '-d ' -f2 | cut '-d.' -f1)" + cmake -G Ninja -DCMAKE_C_COMPILER="gcc-$GCC_VERSION" -DCMAKE_CXX_COMPILER="g++-$GCC_VERSION" .. + ninja + + - name: Build (Clang) + run: | + cd tests + mkdir build-clang + cd build-clang + export PATH="/opt/homebrew/opt/llvm/bin:$PATH" + export LDFLAGS="-L/opt/homebrew/opt/llvm/lib" + export CPPFLAGS="-I/opt/homebrew/opt/llvm/include" + cmake -G Ninja -DCMAKE_C_COMPILER="/opt/homebrew/opt/llvm/bin/clang" -DCMAKE_CXX_COMPILER="/opt/homebrew/opt/llvm/bin/clang++" -DCMAKE_LINKER="ld64.lld" .. + ninja + + - name: Build (MinGW-w64 GCC) + run: | + cd tests + mkdir build-mingw + cd build-mingw + cmake -G Ninja -DCMAKE_TOOLCHAIN_FILE=../mingw-w64-x86_64.cmake -DENABLE_CUDA=False .. + ninja + + - name: Run (Apple Clang) + run: | + cd tests/build-appleclang + ctest -V + + - name: Run (GCC) + run: | + cd tests/build-gcc + ctest -V + + - name: Run (Clang) + run: | + cd tests/build-clang + ctest -V + + test-macos-14: + name: macOS 14 + runs-on: macos-14 + steps: + - uses: actions/checkout@v4 + + - name: Install Toolchain + run: | + brew install --force-bottle ninja llvm lld mingw-w64 + + - name: Build (Apple Clang) + run: | + cd tests + mkdir build-appleclang + cd build-appleclang + cmake -G Ninja .. + ninja + + - name: Build (GCC) + run: | + cd tests + mkdir build-gcc + cd build-gcc + GCC_VERSION="$(brew list --versions gcc | cut '-d ' -f2 | cut '-d.' -f1)" + cmake -G Ninja -DCMAKE_C_COMPILER="gcc-$GCC_VERSION" -DCMAKE_CXX_COMPILER="g++-$GCC_VERSION" .. + ninja + + - name: Build (Clang) + run: | + cd tests + mkdir build-clang + cd build-clang + export PATH="/opt/homebrew/opt/llvm/bin:$PATH" + export LDFLAGS="-L/opt/homebrew/opt/llvm/lib" + export CPPFLAGS="-I/opt/homebrew/opt/llvm/include" + cmake -G Ninja -DCMAKE_C_COMPILER="/opt/homebrew/opt/llvm/bin/clang" -DCMAKE_CXX_COMPILER="/opt/homebrew/opt/llvm/bin/clang++" -DCMAKE_LINKER="ld64.lld" .. + ninja + + - name: Build (MinGW-w64 GCC) + run: | + cd tests + mkdir build-mingw + cd build-mingw + cmake -G Ninja -DCMAKE_TOOLCHAIN_FILE=../mingw-w64-x86_64.cmake -DENABLE_CUDA=False .. + ninja + + - name: Run (Apple Clang) + run: | + cd tests/build-appleclang + ctest -V + + - name: Run (GCC) + run: | + cd tests/build-gcc + ctest -V + + - name: Run (Clang) + run: | + cd tests/build-clang + ctest -V + + test-macos-13: + name: macOS 13 + runs-on: macos-13 + steps: + - uses: actions/checkout@v4 + + - name: Install Toolchain + run: | + brew install --force-bottle ninja llvm lld mingw-w64 + + - name: Build (Apple Clang) + run: | + cd tests + mkdir build-appleclang + cd build-appleclang + cmake -G Ninja .. + ninja + + - name: Build (GCC) + run: | + cd tests + mkdir build-gcc + cd build-gcc + GCC_VERSION="$(brew list --versions gcc | cut '-d ' -f2 | cut '-d.' -f1)" + cmake -G Ninja -DCMAKE_C_COMPILER="gcc-$GCC_VERSION" -DCMAKE_CXX_COMPILER="g++-$GCC_VERSION" .. + ninja + + - name: Build (Clang) + run: | + cd tests + mkdir build-clang + cd build-clang + export PATH="/usr/local/opt/llvm/bin:$PATH" + export LDFLAGS="-L/usr/local/opt/llvm/lib" + export CPPFLAGS="-I/usr/local/opt/llvm/include" + cmake -G Ninja -DCMAKE_C_COMPILER="/usr/local/opt/llvm/bin/clang" -DCMAKE_CXX_COMPILER="/usr/local/opt/llvm/bin/clang++" -DCMAKE_LINKER="ld64.lld" .. + ninja + + - name: Build (MinGW-w64 GCC) + run: | + cd tests + mkdir build-mingw + cd build-mingw + cmake -G Ninja -DCMAKE_TOOLCHAIN_FILE=../mingw-w64-x86_64.cmake -DENABLE_CUDA=False .. + ninja + + - name: Run (Apple Clang) + run: | + cd tests/build-appleclang + ctest -V + + - name: Run (GCC) + run: | + cd tests/build-gcc + ctest -V + + - name: Run (Clang) + run: | + cd tests/build-clang + ctest -V diff --git a/c/include/nvtx3/nvtxDetail/nvtxImpl.h b/c/include/nvtx3/nvtxDetail/nvtxImpl.h index a63138a..7548aa7 100644 --- a/c/include/nvtx3/nvtxDetail/nvtxImpl.h +++ b/c/include/nvtx3/nvtxDetail/nvtxImpl.h @@ -145,8 +145,8 @@ typedef struct nvtxGlobals_t nvtxNameClEventA_fakeimpl_fntype nvtxNameClEventA_impl_fnptr; nvtxNameClEventW_fakeimpl_fntype nvtxNameClEventW_impl_fnptr; - nvtxNameCudaDeviceA_impl_fntype nvtxNameCudaDeviceA_impl_fnptr; - nvtxNameCudaDeviceW_impl_fntype nvtxNameCudaDeviceW_impl_fnptr; + nvtxNameCudaDeviceA_fakeimpl_fntype nvtxNameCudaDeviceA_impl_fnptr; + nvtxNameCudaDeviceW_fakeimpl_fntype nvtxNameCudaDeviceW_impl_fnptr; nvtxNameCudaStreamA_fakeimpl_fntype nvtxNameCudaStreamA_impl_fnptr; nvtxNameCudaStreamW_fakeimpl_fntype nvtxNameCudaStreamW_impl_fnptr; nvtxNameCudaEventA_fakeimpl_fntype nvtxNameCudaEventA_impl_fnptr; @@ -168,12 +168,12 @@ typedef struct nvtxGlobals_t nvtxDomainDestroy_impl_fntype nvtxDomainDestroy_impl_fnptr; nvtxInitialize_impl_fntype nvtxInitialize_impl_fnptr; - nvtxDomainSyncUserCreate_impl_fntype nvtxDomainSyncUserCreate_impl_fnptr; - nvtxDomainSyncUserDestroy_impl_fntype nvtxDomainSyncUserDestroy_impl_fnptr; - nvtxDomainSyncUserAcquireStart_impl_fntype nvtxDomainSyncUserAcquireStart_impl_fnptr; - nvtxDomainSyncUserAcquireFailed_impl_fntype nvtxDomainSyncUserAcquireFailed_impl_fnptr; - nvtxDomainSyncUserAcquireSuccess_impl_fntype nvtxDomainSyncUserAcquireSuccess_impl_fnptr; - nvtxDomainSyncUserReleasing_impl_fntype nvtxDomainSyncUserReleasing_impl_fnptr; + nvtxDomainSyncUserCreate_fakeimpl_fntype nvtxDomainSyncUserCreate_impl_fnptr; + nvtxDomainSyncUserDestroy_fakeimpl_fntype nvtxDomainSyncUserDestroy_impl_fnptr; + nvtxDomainSyncUserAcquireStart_fakeimpl_fntype nvtxDomainSyncUserAcquireStart_impl_fnptr; + nvtxDomainSyncUserAcquireFailed_fakeimpl_fntype nvtxDomainSyncUserAcquireFailed_impl_fnptr; + nvtxDomainSyncUserAcquireSuccess_fakeimpl_fntype nvtxDomainSyncUserAcquireSuccess_impl_fnptr; + nvtxDomainSyncUserReleasing_fakeimpl_fntype nvtxDomainSyncUserReleasing_impl_fnptr; /* Tables of function pointers -- Extra null added to the end to ensure * a crash instead of silent corruption if a tool reads off the end. */ diff --git a/c/include/nvtx3/nvtxDetail/nvtxInitDecls.h b/c/include/nvtx3/nvtxDetail/nvtxInitDecls.h index 8e906e2..40fc1af 100644 --- a/c/include/nvtx3/nvtxDetail/nvtxInitDecls.h +++ b/c/include/nvtx3/nvtxDetail/nvtxInitDecls.h @@ -85,9 +85,9 @@ NVTX_LINKONCE_FWDDECL_FUNCTION nvtxDomainHandle_t NVTX_API NVTX_VERSIONED_IDENTI NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainDestroy_impl_init)(nvtxDomainHandle_t domain); NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxInitialize_impl_init)(const void* reserved); -NVTX_LINKONCE_FWDDECL_FUNCTION nvtxSyncUser_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserCreate_impl_init)(nvtxDomainHandle_t domain, const nvtxSyncUserAttributes_t* attribs); -NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserDestroy_impl_init)(nvtxSyncUser_t handle); -NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireStart_impl_init)(nvtxSyncUser_t handle); -NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireFailed_impl_init)(nvtxSyncUser_t handle); -NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireSuccess_impl_init)(nvtxSyncUser_t handle); -NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserReleasing_impl_init)(nvtxSyncUser_t handle); +NVTX_LINKONCE_FWDDECL_FUNCTION nvtx_nvtxSyncUser_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserCreate_impl_init)(nvtxDomainHandle_t domain, const nvtx_nvtxSyncUserAttributes_t* attribs); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserDestroy_impl_init)(nvtx_nvtxSyncUser_t handle); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireStart_impl_init)(nvtx_nvtxSyncUser_t handle); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireFailed_impl_init)(nvtx_nvtxSyncUser_t handle); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireSuccess_impl_init)(nvtx_nvtxSyncUser_t handle); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserReleasing_impl_init)(nvtx_nvtxSyncUser_t handle); diff --git a/c/include/nvtx3/nvtxDetail/nvtxInitDefs.h b/c/include/nvtx3/nvtxDetail/nvtxInitDefs.h index 1d8b814..ef156d6 100644 --- a/c/include/nvtx3/nvtxDetail/nvtxInitDefs.h +++ b/c/include/nvtx3/nvtxDetail/nvtxInitDefs.h @@ -237,7 +237,7 @@ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCu } NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceA_impl_init)(int device, const char* name){ - nvtxNameCudaDeviceA_impl_fntype local; + nvtxNameCudaDeviceA_fakeimpl_fntype local; NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceA_impl_fnptr; if (local) @@ -245,7 +245,7 @@ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCu } NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceW_impl_init)(int device, const wchar_t* name){ - nvtxNameCudaDeviceW_impl_fntype local; + nvtxNameCudaDeviceW_fakeimpl_fntype local; NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceW_impl_fnptr; if (local) @@ -396,50 +396,50 @@ NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCl local(evnt, name); } -NVTX_LINKONCE_DEFINE_FUNCTION nvtxSyncUser_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserCreate_impl_init)(nvtxDomainHandle_t domain, const nvtxSyncUserAttributes_t* attribs){ - nvtxDomainSyncUserCreate_impl_fntype local; +NVTX_LINKONCE_DEFINE_FUNCTION nvtx_nvtxSyncUser_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserCreate_impl_init)(nvtxDomainHandle_t domain, const nvtx_nvtxSyncUserAttributes_t* attribs){ + nvtxDomainSyncUserCreate_fakeimpl_fntype local; NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserCreate_impl_fnptr; if (local) { return local(domain, attribs); } - return (nvtxSyncUser_t)0; + return (nvtx_nvtxSyncUser_t)0; } -NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserDestroy_impl_init)(nvtxSyncUser_t handle){ - nvtxDomainSyncUserDestroy_impl_fntype local; +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserDestroy_impl_init)(nvtx_nvtxSyncUser_t handle){ + nvtxDomainSyncUserDestroy_fakeimpl_fntype local; NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserDestroy_impl_fnptr; if (local) local(handle); } -NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireStart_impl_init)(nvtxSyncUser_t handle){ - nvtxDomainSyncUserAcquireStart_impl_fntype local; +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireStart_impl_init)(nvtx_nvtxSyncUser_t handle){ + nvtxDomainSyncUserAcquireStart_fakeimpl_fntype local; NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireStart_impl_fnptr; if (local) local(handle); } -NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireFailed_impl_init)(nvtxSyncUser_t handle){ - nvtxDomainSyncUserAcquireFailed_impl_fntype local; +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireFailed_impl_init)(nvtx_nvtxSyncUser_t handle){ + nvtxDomainSyncUserAcquireFailed_fakeimpl_fntype local; NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireFailed_impl_fnptr; if (local) local(handle); } -NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireSuccess_impl_init)(nvtxSyncUser_t handle){ - nvtxDomainSyncUserAcquireSuccess_impl_fntype local; +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireSuccess_impl_init)(nvtx_nvtxSyncUser_t handle){ + nvtxDomainSyncUserAcquireSuccess_fakeimpl_fntype local; NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireSuccess_impl_fnptr; if (local) local(handle); } -NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserReleasing_impl_init)(nvtxSyncUser_t handle){ - nvtxDomainSyncUserReleasing_impl_fntype local; +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserReleasing_impl_init)(nvtx_nvtxSyncUser_t handle){ + nvtxDomainSyncUserReleasing_fakeimpl_fntype local; NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserReleasing_impl_fnptr; if (local) diff --git a/c/include/nvtx3/nvtxDetail/nvtxTypes.h b/c/include/nvtx3/nvtxDetail/nvtxTypes.h index 94be056..0242a7e 100644 --- a/c/include/nvtx3/nvtxDetail/nvtxTypes.h +++ b/c/include/nvtx3/nvtxDetail/nvtxTypes.h @@ -52,9 +52,8 @@ typedef void* nvtx_cl_kernel; typedef void* nvtx_cl_event; typedef void* nvtx_cl_sampler; -typedef struct nvtxSyncUser* nvtxSyncUser_t; -struct nvtxSyncUserAttributes_v0; -typedef struct nvtxSyncUserAttributes_v0 nvtxSyncUserAttributes_t; +typedef void* nvtx_nvtxSyncUser_t; +typedef void nvtx_nvtxSyncUserAttributes_t; /* --------- Types for function pointers (with fake API types) ---------- */ @@ -101,8 +100,8 @@ typedef void (NVTX_API * nvtxNameClEventA_fakeimpl_fntype)(nvtx_cl_event evnt, c typedef void (NVTX_API * nvtxNameClEventW_fakeimpl_fntype)(nvtx_cl_event evnt, const wchar_t* name); /* Real impl types are defined in nvtxImplCudaRt_v3.h, where CUDART headers are included */ -typedef void (NVTX_API * nvtxNameCudaDeviceA_impl_fntype)(int device, const char* name); -typedef void (NVTX_API * nvtxNameCudaDeviceW_impl_fntype)(int device, const wchar_t* name); +typedef void (NVTX_API * nvtxNameCudaDeviceA_fakeimpl_fntype)(int device, const char* name); +typedef void (NVTX_API * nvtxNameCudaDeviceW_fakeimpl_fntype)(int device, const wchar_t* name); typedef void (NVTX_API * nvtxNameCudaStreamA_fakeimpl_fntype)(nvtx_cudaStream_t stream, const char* name); typedef void (NVTX_API * nvtxNameCudaStreamW_fakeimpl_fntype)(nvtx_cudaStream_t stream, const wchar_t* name); typedef void (NVTX_API * nvtxNameCudaEventA_fakeimpl_fntype)(nvtx_cudaEvent_t event, const char* name); @@ -124,12 +123,12 @@ typedef nvtxDomainHandle_t (NVTX_API * nvtxDomainCreateW_impl_fntype)(const wcha typedef void (NVTX_API * nvtxDomainDestroy_impl_fntype)(nvtxDomainHandle_t domain); typedef void (NVTX_API * nvtxInitialize_impl_fntype)(const void* reserved); -typedef nvtxSyncUser_t (NVTX_API * nvtxDomainSyncUserCreate_impl_fntype)(nvtxDomainHandle_t domain, const nvtxSyncUserAttributes_t* attribs); -typedef void (NVTX_API * nvtxDomainSyncUserDestroy_impl_fntype)(nvtxSyncUser_t handle); -typedef void (NVTX_API * nvtxDomainSyncUserAcquireStart_impl_fntype)(nvtxSyncUser_t handle); -typedef void (NVTX_API * nvtxDomainSyncUserAcquireFailed_impl_fntype)(nvtxSyncUser_t handle); -typedef void (NVTX_API * nvtxDomainSyncUserAcquireSuccess_impl_fntype)(nvtxSyncUser_t handle); -typedef void (NVTX_API * nvtxDomainSyncUserReleasing_impl_fntype)(nvtxSyncUser_t handle); +typedef nvtx_nvtxSyncUser_t (NVTX_API * nvtxDomainSyncUserCreate_fakeimpl_fntype)(nvtxDomainHandle_t domain, const nvtx_nvtxSyncUserAttributes_t* attribs); +typedef void (NVTX_API * nvtxDomainSyncUserDestroy_fakeimpl_fntype)(nvtx_nvtxSyncUser_t handle); +typedef void (NVTX_API * nvtxDomainSyncUserAcquireStart_fakeimpl_fntype)(nvtx_nvtxSyncUser_t handle); +typedef void (NVTX_API * nvtxDomainSyncUserAcquireFailed_fakeimpl_fntype)(nvtx_nvtxSyncUser_t handle); +typedef void (NVTX_API * nvtxDomainSyncUserAcquireSuccess_fakeimpl_fntype)(nvtx_nvtxSyncUser_t handle); +typedef void (NVTX_API * nvtxDomainSyncUserReleasing_fakeimpl_fntype)(nvtx_nvtxSyncUser_t handle); /* ---------------- Types for callback subscription --------------------- */ diff --git a/tests/.gitignore b/tests/.gitignore new file mode 100644 index 0000000..0ec9e7f --- /dev/null +++ b/tests/.gitignore @@ -0,0 +1 @@ +/build* diff --git a/tests/Attributes.cpp b/tests/Attributes.cpp new file mode 100644 index 0000000..582088b --- /dev/null +++ b/tests/Attributes.cpp @@ -0,0 +1,224 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Licensed under the Apache License v2.0 with LLVM Exceptions. + * See https://nvidia.github.io/NVTX/LICENSE.txt for license information. + */ + +#include +// Include again to catch bad guards +#include + +#include +#include + +#include "PrettyPrintersNvtxCpp.h" + +struct a_lib +{ + static constexpr const char* name{"Library A"}; +}; + +extern "C" NVTX_DYNAMIC_EXPORT +int RunTest(int argc, const char** argv) +{ + NVTX_EXPORT_UNMANGLED_FUNCTION_NAME + + (void)argc; + (void)argv; + + { + std::cout << "Default attributes:\n"; + nvtx3::event_attributes attr; + std::cout << attr; + } + std::cout << "-------------------------------------\n"; + + { + std::cout << "Set a payload:\n"; + nvtx3::event_attributes attr{nvtx3::payload{5.0f}}; + std::cout << attr; + } + std::cout << "-------------------------------------\n"; + + { + std::cout << "Set a color with RGB hex code 0xFF7F00:\n"; + nvtx3::event_attributes attr{nvtx3::color{0xFFFF7F00}}; + std::cout << attr; + } + std::cout << "-------------------------------------\n"; + + + { + std::cout << "Set a color with RGB=255,127,0:\n"; + nvtx3::event_attributes attr{nvtx3::rgb{255,127,0}}; + std::cout << attr; + } + std::cout << "-------------------------------------\n"; + + + { + std::cout << "Set a color & payload:\n"; + nvtx3::event_attributes attr{nvtx3::rgb{255,127,0}, nvtx3::payload{5.0f}}; + std::cout << attr; + } + std::cout << "-------------------------------------\n"; + + { + std::cout << "Set a color (red), payload, color again (green)... first color wins:\n"; + + nvtx3::event_attributes attr{ + nvtx3::rgb{255,0,0}, + nvtx3::payload{5.0f}, + nvtx3::rgb{0, 255, 0}}; + + std::cout << attr; + } + std::cout << "-------------------------------------\n"; + + { + std::cout << "Set a message (ascii), payload, color, and category:\n"; + + nvtx3::event_attributes attr{ + nvtx3::message{"Hello"}, + nvtx3::category{11}, + nvtx3::payload{5.0f}, + nvtx3::rgb{0,255,0}}; + + std::cout << attr; + } + std::cout << "-------------------------------------\n"; + + { + std::cout << "Set a message with different string types:\n"; + + nvtx3::event_attributes a{nvtx3::message{"Hello"}}; + std::cout << a; + + nvtx3::event_attributes wa{nvtx3::message{L"Hello"}}; + std::cout << wa; + + std::string hello{"Hello"}; + nvtx3::event_attributes b{nvtx3::message{hello}}; + std::cout << b; + + std::wstring whello{L"Hello"}; + nvtx3::event_attributes wb{nvtx3::message{whello}}; + std::cout << wb; + + // Important! Neither of following will compile: + // + // nvtx3::event_attributes c{nvtx3::message{std::string{"foo"}}}; + // std::cout << c; + // + // std::string foo{"foo"}; + // nvtx3::event_attributes d{nvtx3::message{hello + "bar"}}; + // std::cout << d; + // + // Both of those usages fail with: + // "error C2280: 'nvtx3::message::message(std::string &&)': + // attempting to reference a deleted function" + // + // nvtx3::message is a "view" class, not an owning class. + // It cannot take ownership of a temporary string and + // destroy it when it goes out of scope. Similarly, + // nvtx3::event_attributes is not an owning class, so it cannot take + // ownership of an nvtx3::message either. + // + // TODO: Could we add implicit support for this? + } + std::cout << "-------------------------------------\n"; + + { + std::cout << "Set a message (registered):\n"; + auto hTacobell = reinterpret_cast(0x7ac0be11); + nvtx3::event_attributes attr{nvtx3::message{hTacobell}}; + std::cout << attr; + } + std::cout << "-------------------------------------\n"; + + { + std::cout << "Set category/message/payload/color, with \"using\":\n"; + + using namespace nvtx3; + + event_attributes a{ + category{11}, + message{"Hello"}, + payload{5.0f}, + rgb{1,2,3}}; + + std::cout << a; + } + std::cout << "-------------------------------------\n"; + + { + std::cout << "Convenience: Set a message without the helper type:\n"; + + nvtx3::event_attributes a{"Hello"}; + std::cout << a; + + std::string hello{"Hello"}; + nvtx3::event_attributes b{hello}; + std::cout << b; + } + std::cout << "-------------------------------------\n"; + + { + std::cout << "Examples: \"using\", skip helper type for msg, set other fields:\n"; + + using namespace nvtx3; + + event_attributes a{"Hello", payload{7.0}}; + std::cout << a; + + event_attributes b{"Hello", rgb{255,255,0}}; + std::cout << b; + + event_attributes c{"Hello", category{4}}; + std::cout << c; + + // Order doesn't matter + event_attributes d{"Hello", rgb{255,255,0}, payload{7.0}, category{4}}; + std::cout << d; + + event_attributes e{payload{7.0}, "Hello", category{4}, rgb{255,255,0}}; + std::cout << e; + + event_attributes f{category{4}, rgb{255,255,0}, payload{7.0}, "Hello"}; + std::cout << f; + + // Vertical formatting is nice too: + event_attributes g{ + "Hello", + category{4}, + rgb{255,255,0}, + payload{7.0}}; + std::cout << g; + + event_attributes h + { + "Hello", + category{4}, + rgb{255,255,0}, + payload{7.0} + }; + std::cout << h; + } + std::cout << "-------------------------------------\n"; + + return 0; +} diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt new file mode 100644 index 0000000..8f09410 --- /dev/null +++ b/tests/CMakeLists.txt @@ -0,0 +1,227 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://nvidia.github.io/NVTX/LICENSE.txt for license information. + +cmake_minimum_required (VERSION 3.19) + +if(NOT DEFINED ENABLE_CUDA) + set(ENABLE_CUDA True) +endif() +if(APPLE) + set(ENABLE_CUDA False) +endif() + +set(NVTX_LANGUAGES C CXX) +if(ENABLE_CUDA) + set(NVTX_LANGUAGES ${NVTX_LANGUAGES} CUDA) +endif() + +project ("NvtxTests" LANGUAGES ${NVTX_LANGUAGES}) + +# Enforce standard C/C++ with sensible warnings and minimal compiler output on all platforms +set(CMAKE_C_STANDARD 90) +set(CMAKE_C_EXTENSIONS OFF) +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_EXTENSIONS OFF) +set(CMAKE_CUDA_HOST_COMPILER "${CMAKE_CXX_COMPILER}") +set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -ccbin \"${CMAKE_CXX_COMPILER}\"") +if(MSVC) + # Must use - instead of / for option prefix when using NVCC, because it forwards args + # it doesn't know to the host compiler, but being a UNIX-heritage program, it thinks + # an argument like "/nologo" is an input file path. Luckily, MSVC accepts - prefixes. + if(CMAKE_C_COMPILER_VERSION VERSION_LESS "19.14.0.0") + # Enable options to behave closer to standard + else() + add_compile_options(-permissive-) + endif() + # The following line can be uncommented to test with WIN32_LEAN_AND_MEAN + #add_compile_definitions(WIN32_LEAN_AND_MEAN) +endif() + +# Build with minimal or no dependencies on installed C/C++ runtime libraries +if(MSVC) + # For Non-debug, change /MD (MultiThreadedDLL) to /MT (MultiThreaded) + # For Debug, change /MDd (MultiThreadedDebugDLL) to /MTd ((MultiThreadedDebug) + set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>") +else() + # Statically link libstdc++ and libgcc. Do not statically link libc, though. + # Use an old sysroot if compatibility with old GLIBC versions is required. + # In non-DEBUG builds, use `-s` (or `-x -S`) to strip unneeded symbols + add_link_options( + $<$:-static-libstdc++> + $<$:-static-libgcc> + $<$,$>:-Wl,-x,-S> + $<$,$>>:-Wl,-s> + ) +endif() + +# Compiler-specific idiosyncracies +if(MSVC) + # Must use - instead of / for option prefix when using NVCC, because it forwards args + # it doesn't know to the host compiler, but being a UNIX-heritage program, it thinks + # an argument like "/nologo" is an input file path. Luckily, MSVC accepts - prefixes. + add_compile_options(-nologo) + #add_compile_options(-wd26812) # Disable warning: prefer enum class over unscoped enum + add_link_options(-NOLOGO -INCREMENTAL:NO) + # On some platforms, CMake doesn't automatically add C++ flags to enable RTTI (/GR) or + # configure C++ exceptions to the commonly preferred value (/EHsc or /GX). Add these + # if they are missing. + if(NOT CMAKE_CXX_FLAGS MATCHES "(/|-)GR( |$)") + string(APPEND CMAKE_CXX_FLAGS " -GR") + endif() + if(NOT CMAKE_CXX_FLAGS MATCHES "(/|-)(EHsc|GX)( |$)") + string(APPEND CMAKE_CXX_FLAGS " -EHsc") + endif() + # Improve debugging + if (CMAKE_BUILD_TYPE STREQUAL "Debug") + # This for some reason also adds "MDd" even though above we asked for MTd, + # so add the /JMC option manually + #set(CMAKE_VS_JUST_MY_CODE_DEBUGGING ON) + add_compile_options(-JMC) + endif() +else() + # Stop compiling immediately after first error + add_compile_options(-Wfatal-errors) + # Check for initializing unions without required braces + add_compile_options(-Wmissing-braces) +endif() + + +add_subdirectory("../c" "ImportNvtx") + +#if(DOMAINS_ERROR_TEST_NAME_IS_MISSING) +# target_compile_definitions(domains PRIVATE ERROR_TEST_NAME_IS_MISSING) +#endif() + +add_executable(runtest "RunTest.cpp") +target_link_libraries(runtest PRIVATE nvtx3-cpp) + +add_library(inj SHARED "PrintInjection.cpp") +target_link_libraries(inj PRIVATE nvtx3-cpp) +set_target_properties(inj PROPERTIES C_VISIBILITY_PRESET hidden CXX_VISIBILITY_PRESET hidden) + +add_library(self SHARED "TestSelfInjection.cpp" "SelfInjection.cpp") +target_link_libraries(self PRIVATE nvtx3-cpp) +set_target_properties(self PROPERTIES C_VISIBILITY_PRESET hidden CXX_VISIBILITY_PRESET hidden) + +add_library(calls SHARED "Calls.cpp" "SelfInjection.cpp") +target_link_libraries(calls PRIVATE nvtx3-cpp) +set_target_properties(calls PROPERTIES C_VISIBILITY_PRESET hidden CXX_VISIBILITY_PRESET hidden) + +add_library(coverage SHARED "Coverage.cpp") +target_link_libraries(coverage PRIVATE nvtx3-cpp) +set_target_properties(coverage PROPERTIES C_VISIBILITY_PRESET hidden CXX_VISIBILITY_PRESET hidden) + +add_library(coveragec SHARED "CoverageC.c") +target_link_libraries(coveragec PRIVATE nvtx3-c) +set_target_properties(coveragec PROPERTIES C_VISIBILITY_PRESET hidden CXX_VISIBILITY_PRESET hidden) + +if(ENABLE_CUDA) + add_library(coverage-cu SHARED "CoverageCuda.cu") + target_link_libraries(coverage-cu PRIVATE nvtx3-cpp) + set_target_properties(coverage-cu PROPERTIES C_VISIBILITY_PRESET hidden CXX_VISIBILITY_PRESET hidden) +endif() + +add_library(coverage-mem SHARED "CoverageMem.c") +target_link_libraries(coverage-mem PRIVATE nvtx3-c) +set_target_properties(coverage-mem PROPERTIES C_VISIBILITY_PRESET hidden CXX_VISIBILITY_PRESET hidden) + +if(ENABLE_CUDA) + add_library(coverage-memcudart SHARED "CoverageMemCudaRt.cu") + target_link_libraries(coverage-memcudart PRIVATE nvtx3-c) + set_target_properties(coverage-memcudart PROPERTIES C_VISIBILITY_PRESET hidden CXX_VISIBILITY_PRESET hidden) +endif() + +add_library(coverage-payload SHARED "CoveragePayload.c") +target_link_libraries(coverage-payload PRIVATE nvtx3-c) +set_target_properties(coverage-payload PROPERTIES C_VISIBILITY_PRESET hidden CXX_VISIBILITY_PRESET hidden) + +add_library(coverage-counter SHARED "CoverageCounter.c") +target_link_libraries(coverage-counter PRIVATE nvtx3-c) +set_target_properties(coverage-counter PROPERTIES C_VISIBILITY_PRESET hidden CXX_VISIBILITY_PRESET hidden) + +add_library(attributes SHARED "Attributes.cpp") +target_link_libraries(attributes PRIVATE nvtx3-cpp) +set_target_properties(attributes PROPERTIES C_VISIBILITY_PRESET hidden CXX_VISIBILITY_PRESET hidden) + +add_library(domains SHARED "Domains.cpp") +target_link_libraries(domains PRIVATE nvtx3-cpp) +set_target_properties(domains PROPERTIES C_VISIBILITY_PRESET hidden CXX_VISIBILITY_PRESET hidden) + +add_library(categories SHARED "NamedCategories.cpp") +target_link_libraries(categories PRIVATE nvtx3-cpp) +set_target_properties(categories PROPERTIES C_VISIBILITY_PRESET hidden CXX_VISIBILITY_PRESET hidden) + +add_library(regstrings SHARED "RegisteredStrings.cpp") +target_link_libraries(regstrings PRIVATE nvtx3-cpp) +set_target_properties(regstrings PROPERTIES C_VISIBILITY_PRESET hidden CXX_VISIBILITY_PRESET hidden) + +add_library(linkerdupes SHARED "LinkerDupesMain.cpp" "LinkerDupesFileA.cpp" "LinkerDupesFileB.cpp") +target_link_libraries(linkerdupes PRIVATE nvtx3-cpp) +set_target_properties(linkerdupes PROPERTIES C_VISIBILITY_PRESET hidden CXX_VISIBILITY_PRESET hidden) + +add_library(export-api SHARED "ExportApi.c") +target_link_libraries(export-api PRIVATE nvtx3-c) +target_include_directories(export-api PRIVATE "Imports/cuda_lite" "Imports/opencl_lite") +set_target_properties(export-api PROPERTIES C_VISIBILITY_PRESET hidden CXX_VISIBILITY_PRESET hidden) + +target_link_options(export-api PRIVATE + $<$>,$>:-Wl,--kill-at> +) + +add_library(use-exported-api SHARED "UseExportedApi.cpp") +target_link_libraries(use-exported-api PRIVATE nvtx3-cpp) +target_include_directories(use-exported-api PRIVATE "Imports/cuda_lite" "Imports/opencl_lite") +set_target_properties(use-exported-api PROPERTIES C_VISIBILITY_PRESET hidden CXX_VISIBILITY_PRESET hidden) + +enable_testing() +add_test(NAME "Self" COMMAND runtest -t self) +add_test(NAME "Self with SelfInjection" COMMAND runtest -t self -i self) +add_test(NAME "Self with PrintInjection" COMMAND runtest -t self -i inj) +add_test(NAME "Calls" COMMAND runtest -t calls) +add_test(NAME "Calls with CallsInjection" COMMAND runtest -t calls -i calls) +add_test(NAME "Calls with PrintInjection" COMMAND runtest -t calls -i inj) +add_test(NAME "Coverage" COMMAND runtest -t coverage) +add_test(NAME "Coverage with PrintInjection" COMMAND runtest -t coverage -i inj) +add_test(NAME "CoverageC" COMMAND runtest -t coveragec) +add_test(NAME "CoverageC with PrintInjection" COMMAND runtest -t coveragec -i inj) +if(ENABLE_CUDA) + add_test(NAME "CoverageCuda" COMMAND runtest -t coverage-cu) + add_test(NAME "CoverageCuda with PrintInjection" COMMAND runtest -t coverage-cu -i inj) +endif() +add_test(NAME "CoverageMem" COMMAND runtest -t coverage-mem) +add_test(NAME "CoverageMem with PrintInjection" COMMAND runtest -t coverage-mem -i inj) +if(ENABLE_CUDA) + add_test(NAME "CoverageMemCudaRt" COMMAND runtest -t coverage-memcudart) + add_test(NAME "CoverageMemCudaRt with PrintInjection" COMMAND runtest -t coverage-memcudart -i inj) +endif() +add_test(NAME "CoveragePayload" COMMAND runtest -t coverage-payload) +add_test(NAME "CoveragePayload with PrintInjection" COMMAND runtest -t coverage-payload -i inj) +add_test(NAME "CoverageCounter" COMMAND runtest -t coverage-counter) +add_test(NAME "CoverageCounter with PrintInjection" COMMAND runtest -t coverage-counter -i inj) +add_test(NAME "Attributes" COMMAND runtest -t attributes) +add_test(NAME "Attributes with PrintInjection" COMMAND runtest -t attributes -i inj) +add_test(NAME "Domains" COMMAND runtest -t domains) +add_test(NAME "Domains with PrintInjection" COMMAND runtest -t domains -i inj) +add_test(NAME "NamedCategories" COMMAND runtest -t categories) +add_test(NAME "NamedCategories with PrintInjection" COMMAND runtest -t categories -i inj) +add_test(NAME "RegisteredStrings" COMMAND runtest -t regstrings) +add_test(NAME "RegisteredStrings with PrintInjection" COMMAND runtest -t regstrings -i inj) +add_test(NAME "LinkerDupes" COMMAND runtest -t linkerdupes) +add_test(NAME "LinkerDupes with PrintInjection" COMMAND runtest -t linkerdupes -i inj) +add_test(NAME "UseExportedApi" COMMAND runtest -t use-exported-api) +add_test(NAME "UseExportedApi with PrintInjection" COMMAND runtest -t use-exported-api -i inj) diff --git a/tests/Calls.cpp b/tests/Calls.cpp new file mode 100644 index 0000000..46d7b3a --- /dev/null +++ b/tests/Calls.cpp @@ -0,0 +1,433 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Licensed under the Apache License v2.0 with LLVM Exceptions. + * See https://nvidia.github.io/NVTX/LICENSE.txt for license information. + */ + +#include + +#include +#include + +#include +#include +#include + +#include "SelfInjection.h" +#include "PrettyPrintersNvtxC.h" + +class CallbackTester +{ + Callbacks stored; + std::vector calls; +public: + +public: + void Record(Call const& call) { calls.push_back(call); } + + CallbackTester() : stored(g_callbacks) + { + g_callbacks.Default = [&](Call const& call) { Record(call); }; + } + ~CallbackTester() { g_callbacks = stored; } + + bool CallsMatch(std::vector expCalls, bool verbose = false) const + { + auto cmp = [&](Call const& lhs, Call const& rhs) + { + return Same(lhs, rhs, true, verbose, "NVTX call"); + }; + + bool match = std::equal(calls.begin(), calls.end(), expCalls.begin(), cmp); + if (verbose && !match) + { + auto printCall = [](Call const& c) { std::cout << " " << *c << "\n"; }; + std::cout << "Did not get expected NVTX C API call sequence! Expected:\n"; + for (auto& c : expCalls) printCall(c); + std::cout << "Recorded:\n"; + for (auto& c : calls) printCall(c); + } + + return match; + } +}; + +template struct a_lib { static constexpr const char* name = "LibA"; }; +template struct b_lib { static constexpr const char* name = "LibB"; }; +template struct c_lib { static constexpr const char* name = "LibC"; }; + +template struct cat1 { static constexpr const char* name = "Cat1"; static constexpr const uint32_t id = 1; }; +template struct cat2 { static constexpr const char* name = "Cat2"; static constexpr const uint32_t id = 2; }; +template struct cat3 { static constexpr const char* name = "Cat3"; static constexpr const uint32_t id = 3; }; + +template struct reg1 { static constexpr const char* message = "Reg1"; }; +template struct reg2 { static constexpr const char* message = "Reg2"; }; +template struct reg3 { static constexpr const char* message = "Reg3"; }; + +extern "C" NVTX_DYNAMIC_EXPORT +int RunTest(int argc, const char** argv) +{ + NVTX_EXPORT_UNMANGLED_FUNCTION_NAME + + bool verbose = false; + const std::string verboseArg = "-v"; + for (; *argv; ++argv) + { + if (*argv == verboseArg) verbose = true; + } + + using namespace nvtx3; + + //---------------------------- Tests -------------------------------------- + + if (verbose) std::cout << "--------- Testing injection loader\n"; + + { + CallbackTester t; + + nvtxInitialize(nullptr); + nvtxInitialize(nullptr); + + if (!t.CallsMatch({ + CALL_LOAD(1), + CALL(CORE2, Initialize, nullptr), + CALL(CORE2, Initialize, nullptr) + }, verbose)) return 1; + } + + if (verbose) std::cout << "--------- Testing C API\n"; + + { + CallbackTester t; + + const char* teststr = "Testing 1 2 3!"; + nvtxMarkA(teststr); + + if (!t.CallsMatch({ + CALL(CORE, MarkA, teststr) + }, verbose)) return 1; + } + + { + CallbackTester t; + + char teststr[] = "Testing 1 2 3!"; + nvtxMarkA(teststr); + memcpy(teststr, "Overwritten!!!", sizeof(teststr)); + + if (!t.CallsMatch({ + CALL(CORE, MarkA, "Testing 1 2 3!") + }, verbose)) return 1; + } + + { + CallbackTester t; + + wchar_t teststr[] = L"Testing 1 2 3!"; + nvtxMarkW(teststr); + memcpy(teststr, L"Overwritten!!!", sizeof(teststr)); + + if (!t.CallsMatch({ + CALL(CORE, MarkW, L"Testing 1 2 3!") + }, verbose)) return 1; + } + + { + CallbackTester t; + + nvtxEventAttributes_t attr{NVTX_VERSION, sizeof(nvtxEventAttributes_t)}; + attr.category = 123; + attr.colorType = NVTX_COLOR_ARGB; + attr.color = 0xFF4466BB; + attr.messageType = NVTX_MESSAGE_TYPE_ASCII; + attr.message = MakeMessage("Test MarkEX"); + attr.category = 123; + attr.payloadType = NVTX_PAYLOAD_TYPE_DOUBLE; + attr.payload = MakePayload(3.14159); + nvtxMarkEx(&attr); + + nvtxEventAttributes_t attr2 = attr; + memset(&attr, 0, sizeof(attr)); + + if (!t.CallsMatch({ + CALL(CORE, MarkEx, &attr2) + }, verbose)) return 1; + } + + if (verbose) std::cout << "--------- Testing C++ API\n"; + + { + CallbackTester t; + + mark("Testing 1 2 3!"); + mark(L"Testing 1 2 3!"); + + if (!t.CallsMatch({ + CALL(CORE2, DomainMarkEx, nullptr, event_attributes{"Testing 1 2 3!"}.get()), + CALL(CORE2, DomainMarkEx, nullptr, event_attributes{L"Testing 1 2 3!"}.get()) + }, verbose)) return 1; + } + + { + CallbackTester t; + + nvtxEventAttributes_t attrExpected{NVTX_VERSION, sizeof(nvtxEventAttributes_t), + 123, // category + NVTX_COLOR_ARGB, 0xFF4466BB, + NVTX_PAYLOAD_TYPE_DOUBLE, 0, MakePayload(3.14159), + NVTX_MESSAGE_TYPE_ASCII, MakeMessage("Test msg") + }; + + // Same args, different order + mark("Test msg", rgb(0x44, 0x66, 0xBB), category(123), payload(3.14159)); + mark(payload(3.14159), "Test msg", rgb(0x44, 0x66, 0xBB), category(123)); + mark(category(123), payload(3.14159), "Test msg", rgb(0x44, 0x66, 0xBB)); + mark(rgb(0x44, 0x66, 0xBB), category(123), payload(3.14159), "Test msg"); + + // Same args with duplicates, test first-one-wins behavior (including union type changes) + mark("Test msg", rgb(0x44, 0x66, 0xBB), category(123), payload(3.14159), + "Bad msg", rgb(0x10, 0x20, 0x30), category(321), payload(3.0)); + mark("Test msg", rgb(0x44, 0x66, 0xBB), category(123), payload(3.14159), + L"Bad message"); + mark("Test msg", rgb(0x44, 0x66, 0xBB), category(123), payload(3.14159), + payload(3.14159f)); + + if (!t.CallsMatch({ + 7, CALL(CORE2, DomainMarkEx, nullptr, &attrExpected) + }, verbose)) return 1; + } + + { + CallbackTester t; + constexpr int N = 1; + auto hA = (nvtxDomainHandle_t)1; + + mark_in>("First call"); + mark_in>("Second call"); + mark_in>("Third call"); + + if (!t.CallsMatch({ + CALL(CORE2, DomainCreateA, "LibA"), + CALL(CORE2, DomainMarkEx, hA, event_attributes{"First call"}.get()), + CALL(CORE2, DomainMarkEx, hA, event_attributes{"Second call"}.get()), + CALL(CORE2, DomainMarkEx, hA, event_attributes{"Third call"}.get()) + }, verbose)) return 1; + } + + { + CallbackTester t; + constexpr int N = 2; + auto hA = (nvtxDomainHandle_t)1; + auto hB = (nvtxDomainHandle_t)2; + + mark_in>("First call"); + mark_in>("Second call"); + mark_in>("First call"); + mark_in>("Second call"); + + if (!t.CallsMatch({ + CALL(CORE2, DomainCreateA, "LibA"), + CALL(CORE2, DomainMarkEx, hA, event_attributes{"First call"}.get()), + CALL(CORE2, DomainMarkEx, hA, event_attributes{"Second call"}.get()), + CALL(CORE2, DomainCreateA, "LibB"), + CALL(CORE2, DomainMarkEx, hB, event_attributes{"First call"}.get()), + CALL(CORE2, DomainMarkEx, hB, event_attributes{"Second call"}.get()) + }, verbose)) return 1; + } + + { + CallbackTester t; + constexpr int N = 3; + auto hA = (nvtxDomainHandle_t)1; + auto hB = (nvtxDomainHandle_t)2; + + mark_in>("DA, Cat 1, call 1", named_category_in>::get>()); + mark_in>("DA, Cat 1, call 2", named_category_in>::get>()); + mark_in>("DA, Cat 2, call 1", named_category_in>::get>()); + mark_in>("DA, Cat 2, call 2", named_category_in>::get>()); + mark_in>("DB, Cat 1, call 1", named_category_in>::get>()); + mark_in>("DB, Cat 1, call 2", named_category_in>::get>()); + mark_in>("DB, Cat 2, call 1", named_category_in>::get>()); + mark_in>("DB, Cat 2, call 2", named_category_in>::get>()); + + if (!t.CallsMatch({ + CALL(CORE2, DomainCreateA, "LibA"), + CALL(CORE2, DomainNameCategoryA, hA, 1, "Cat1"), + CALL(CORE2, DomainMarkEx, hA, event_attributes{"DA, Cat 1, call 1", category(1)}.get()), + CALL(CORE2, DomainMarkEx, hA, event_attributes{"DA, Cat 1, call 2", category(1)}.get()), + CALL(CORE2, DomainNameCategoryA, hA, 2, "Cat2"), + CALL(CORE2, DomainMarkEx, hA, event_attributes{"DA, Cat 2, call 1", category(2)}.get()), + CALL(CORE2, DomainMarkEx, hA, event_attributes{"DA, Cat 2, call 2", category(2)}.get()), + CALL(CORE2, DomainCreateA, "LibB"), + CALL(CORE2, DomainNameCategoryA, hB, 1, "Cat1"), + CALL(CORE2, DomainMarkEx, hB, event_attributes{"DB, Cat 1, call 1", category(1)}.get()), + CALL(CORE2, DomainMarkEx, hB, event_attributes{"DB, Cat 1, call 2", category(1)}.get()), + CALL(CORE2, DomainNameCategoryA, hB, 2, "Cat2"), + CALL(CORE2, DomainMarkEx, hB, event_attributes{"DB, Cat 2, call 1", category(2)}.get()), + CALL(CORE2, DomainMarkEx, hB, event_attributes{"DB, Cat 2, call 2", category(2)}.get()), + }, verbose)) return 1; + } + + { + CallbackTester t; + constexpr int N = 4; + auto hA = (nvtxDomainHandle_t)1; + auto hB = (nvtxDomainHandle_t)2; + auto hReg1 = (nvtxStringHandle_t)1; + auto hReg2 = (nvtxStringHandle_t)2; + + mark_in>(registered_string_in>::get>()); + mark_in>(registered_string_in>::get>()); + mark_in>(registered_string_in>::get>()); + mark_in>(registered_string_in>::get>()); + mark_in>(registered_string_in>::get>()); + mark_in>(registered_string_in>::get>()); + mark_in>(registered_string_in>::get>()); + mark_in>(registered_string_in>::get>()); + + if (!t.CallsMatch({ + CALL(CORE2, DomainCreateA, "LibA"), + CALL(CORE2, DomainRegisterStringA, hA, "Reg1"), + CALL(CORE2, DomainMarkEx, hA, event_attributes{hReg1}.get()), + CALL(CORE2, DomainMarkEx, hA, event_attributes{hReg1}.get()), + CALL(CORE2, DomainRegisterStringA, hA, "Reg2"), + CALL(CORE2, DomainMarkEx, hA, event_attributes{hReg2}.get()), + CALL(CORE2, DomainMarkEx, hA, event_attributes{hReg2}.get()), + CALL(CORE2, DomainCreateA, "LibB"), + CALL(CORE2, DomainRegisterStringA, hB, "Reg1"), + CALL(CORE2, DomainMarkEx, hB, event_attributes{hReg1}.get()), + CALL(CORE2, DomainMarkEx, hB, event_attributes{hReg1}.get()), + CALL(CORE2, DomainRegisterStringA, hB, "Reg2"), + CALL(CORE2, DomainMarkEx, hB, event_attributes{hReg2}.get()), + CALL(CORE2, DomainMarkEx, hB, event_attributes{hReg2}.get()), + }, verbose)) return 1; + } + + { + CallbackTester t; + constexpr int N = 5; + auto hA = (nvtxDomainHandle_t)1; + auto hB = (nvtxDomainHandle_t)2; + auto hReg1 = (nvtxStringHandle_t)1; + auto hReg2 = (nvtxStringHandle_t)2; + + auto& a_regstr1 = registered_string_in>::get>(); + auto& a_regstr2 = registered_string_in>::get>(); + auto& b_regstr1 = registered_string_in>::get>(); + auto& b_regstr2 = registered_string_in>::get>(); + + auto& a_cat1 = named_category_in>::get>(); + auto& a_cat2 = named_category_in>::get>(); + auto& b_cat1 = named_category_in>::get>(); + auto& b_cat2 = named_category_in>::get>(); + + mark_in>(a_cat1, a_regstr1); + mark_in>(a_cat1, a_regstr1); + mark_in>(a_cat2, a_regstr2); + mark_in>(a_cat2, a_regstr2); + mark_in>(b_cat1, b_regstr1); + mark_in>(b_cat1, b_regstr1); + mark_in>(b_cat2, b_regstr2); + mark_in>(b_cat2, b_regstr2); + + if (!t.CallsMatch({ + CALL(CORE2, DomainCreateA, "LibA"), + CALL(CORE2, DomainRegisterStringA, hA, "Reg1"), + CALL(CORE2, DomainRegisterStringA, hA, "Reg2"), + CALL(CORE2, DomainCreateA, "LibB"), + CALL(CORE2, DomainRegisterStringA, hB, "Reg1"), + CALL(CORE2, DomainRegisterStringA, hB, "Reg2"), + CALL(CORE2, DomainNameCategoryA, hA, 1, "Cat1"), + CALL(CORE2, DomainNameCategoryA, hA, 2, "Cat2"), + CALL(CORE2, DomainNameCategoryA, hB, 1, "Cat1"), + CALL(CORE2, DomainNameCategoryA, hB, 2, "Cat2"), + CALL(CORE2, DomainMarkEx, hA, event_attributes{hReg1, category(1)}.get()), + CALL(CORE2, DomainMarkEx, hA, event_attributes{hReg1, category(1)}.get()), + CALL(CORE2, DomainMarkEx, hA, event_attributes{hReg2, category(2)}.get()), + CALL(CORE2, DomainMarkEx, hA, event_attributes{hReg2, category(2)}.get()), + CALL(CORE2, DomainMarkEx, hB, event_attributes{hReg1, category(1)}.get()), + CALL(CORE2, DomainMarkEx, hB, event_attributes{hReg1, category(1)}.get()), + CALL(CORE2, DomainMarkEx, hB, event_attributes{hReg2, category(2)}.get()), + CALL(CORE2, DomainMarkEx, hB, event_attributes{hReg2, category(2)}.get()), + }, verbose)) return 1; + } + + { + CallbackTester t; + constexpr int N = 6; + auto hA = (nvtxDomainHandle_t)1; + auto hB = (nvtxDomainHandle_t)2; + + { + scoped_range_in> r1("Sequential range 1"); + mark_in>("Mark in range"); + } + { + scoped_range_in> r2("Sequential range 2"); + mark_in>("Mark in range"); + } + { + scoped_range_in> r1("Nested range 1"); + scoped_range_in> r2("Nested range 2"); + mark_in>("Mark in range"); + } + + { + scoped_range_in> r1("Sequential range 1"); + mark_in>("Mark in range"); + } + { + scoped_range_in> r2("Sequential range 2"); + mark_in>("Mark in range"); + } + { + scoped_range_in> r1("Nested range 1"); + scoped_range_in> r2("Nested range 2"); + mark_in>("Mark in range"); + } + + if (!t.CallsMatch({ + CALL(CORE2, DomainCreateA, "LibA"), + CALL(CORE2, DomainRangePushEx, hA, event_attributes{"Sequential range 1"}.get()), + CALL(CORE2, DomainMarkEx, hA, event_attributes{"Mark in range"}.get()), + CALL(CORE2, DomainRangePop, hA), + CALL(CORE2, DomainRangePushEx, hA, event_attributes{"Sequential range 2"}.get()), + CALL(CORE2, DomainMarkEx, hA, event_attributes{"Mark in range"}.get()), + CALL(CORE2, DomainRangePop, hA), + CALL(CORE2, DomainRangePushEx, hA, event_attributes{"Nested range 1"}.get()), + CALL(CORE2, DomainRangePushEx, hA, event_attributes{"Nested range 2"}.get()), + CALL(CORE2, DomainMarkEx, hA, event_attributes{"Mark in range"}.get()), + CALL(CORE2, DomainRangePop, hA), + CALL(CORE2, DomainRangePop, hA), + CALL(CORE2, DomainCreateA, "LibB"), + CALL(CORE2, DomainRangePushEx, hB, event_attributes{"Sequential range 1"}.get()), + CALL(CORE2, DomainMarkEx, hB, event_attributes{"Mark in range"}.get()), + CALL(CORE2, DomainRangePop, hB), + CALL(CORE2, DomainRangePushEx, hB, event_attributes{"Sequential range 2"}.get()), + CALL(CORE2, DomainMarkEx, hB, event_attributes{"Mark in range"}.get()), + CALL(CORE2, DomainRangePop, hB), + CALL(CORE2, DomainRangePushEx, hB, event_attributes{"Nested range 1"}.get()), + CALL(CORE2, DomainRangePushEx, hB, event_attributes{"Nested range 2"}.get()), + CALL(CORE2, DomainMarkEx, hB, event_attributes{"Mark in range"}.get()), + CALL(CORE2, DomainRangePop, hB), + CALL(CORE2, DomainRangePop, hB), + }, verbose)) return 1; + } + + if (verbose) std::cout << "--------- Success!\n"; + return 0; +} diff --git a/tests/Coverage.cpp b/tests/Coverage.cpp new file mode 100644 index 0000000..43cf69b --- /dev/null +++ b/tests/Coverage.cpp @@ -0,0 +1,29 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Licensed under the Apache License v2.0 with LLVM Exceptions. + * See https://nvidia.github.io/NVTX/LICENSE.txt for license information. + */ + +#include "TestCoverage.h" + +extern "C" NVTX_DYNAMIC_EXPORT +int RunTest(int argc, const char** argv) +{ + NVTX_EXPORT_UNMANGLED_FUNCTION_NAME + + return RunTestCommon(argc, argv); +} diff --git a/tests/CoverageC.c b/tests/CoverageC.c new file mode 100644 index 0000000..06c94f6 --- /dev/null +++ b/tests/CoverageC.c @@ -0,0 +1,93 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Licensed under the Apache License v2.0 with LLVM Exceptions. + * See https://nvidia.github.io/NVTX/LICENSE.txt for license information. + */ + +#include + +static void TestCore(void) +{ + nvtxEventAttributes_t attributes; + nvtxRangeId_t rangeId; + + attributes.version = NVTX_VERSION; + attributes.size = sizeof(attributes); + attributes.category = 0; + attributes.colorType = NVTX_COLOR_ARGB; + attributes.color = 0xFF1133FF; + attributes.payloadType = NVTX_PAYLOAD_UNKNOWN; + attributes.payload.llValue = 0; + attributes.messageType = NVTX_MESSAGE_TYPE_ASCII; + attributes.message.ascii = "Test message"; + + nvtxMarkEx(&attributes); + nvtxMarkA("MarkA"); + nvtxMarkW(L"MarkW"); + rangeId = nvtxRangeStartEx(&attributes); + nvtxRangeEnd(rangeId); + rangeId = nvtxRangeStartA("RangeStartA"); + nvtxRangeEnd(rangeId); + rangeId = nvtxRangeStartW(L"RangeStartW"); + nvtxRangeEnd(rangeId); + nvtxRangePushEx(&attributes); + nvtxRangePop(); + nvtxRangePushA("RangePushA"); + nvtxRangePop(); + nvtxRangePushW(L"RangePushW"); + nvtxRangePop(); +} + +static void TestCore2(void) +{ + nvtxEventAttributes_t attributes; + nvtxRangeId_t rangeId; + nvtxDomainHandle_t domain, domainW; + + attributes.version = NVTX_VERSION; + attributes.size = sizeof(attributes); + attributes.category = 0; + attributes.colorType = NVTX_COLOR_ARGB; + attributes.color = 0xFF1133FF; + attributes.payloadType = NVTX_PAYLOAD_UNKNOWN; + attributes.payload.llValue = 0; + attributes.messageType = NVTX_MESSAGE_TYPE_ASCII; + attributes.message.ascii = "Test message"; + + domain = nvtxDomainCreateA("DomainA"); + domainW = nvtxDomainCreateW(L"DomainW"); + + nvtxDomainMarkEx(domain, &attributes); + rangeId = nvtxDomainRangeStartEx(domain, &attributes); + nvtxDomainRangeEnd(domain, rangeId); + nvtxDomainRangePushEx(domain, &attributes); + nvtxDomainRangePop(domain); +} + +NVTX_DYNAMIC_EXPORT +int RunTest(int argc, const char** argv) +{ + NVTX_EXPORT_UNMANGLED_FUNCTION_NAME + + (void)argc; + (void)argv; + + TestCore(); + TestCore2(); + + return 0; +} diff --git a/tests/CoverageCounter.c b/tests/CoverageCounter.c new file mode 100644 index 0000000..904ec9e --- /dev/null +++ b/tests/CoverageCounter.c @@ -0,0 +1,50 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Licensed under the Apache License v2.0 with LLVM Exceptions. + * See https://nvidia.github.io/NVTX/LICENSE.txt for license information. + */ + +#include + +static void TestMem(void) +{ + nvtxDomainHandle_t domain; + uint64_t counter; + nvtxCounterAttr_t attr; + int64_t i64 = 0; + double f64 = 0.0; + + domain = nvtxDomainCreateA("Domain"); + + counter = nvtxCounterRegister(domain, &attr); + nvtxCounterSampleInt64(domain, counter, i64); + nvtxCounterSampleFloat64(domain, counter, f64); + nvtxCounterSampleNoValue(domain, counter, NVTX_COUNTER_SAMPLE_UNCHANGED); +} + +NVTX_DYNAMIC_EXPORT +int RunTest(int argc, const char** argv) +{ + NVTX_EXPORT_UNMANGLED_FUNCTION_NAME + + (void)argc; + (void)argv; + + TestMem(); + + return 0; +} diff --git a/tests/CoverageCuda.cu b/tests/CoverageCuda.cu new file mode 100644 index 0000000..43cf69b --- /dev/null +++ b/tests/CoverageCuda.cu @@ -0,0 +1,29 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Licensed under the Apache License v2.0 with LLVM Exceptions. + * See https://nvidia.github.io/NVTX/LICENSE.txt for license information. + */ + +#include "TestCoverage.h" + +extern "C" NVTX_DYNAMIC_EXPORT +int RunTest(int argc, const char** argv) +{ + NVTX_EXPORT_UNMANGLED_FUNCTION_NAME + + return RunTestCommon(argc, argv); +} diff --git a/tests/CoverageMem.c b/tests/CoverageMem.c new file mode 100644 index 0000000..f7d000a --- /dev/null +++ b/tests/CoverageMem.c @@ -0,0 +1,46 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Licensed under the Apache License v2.0 with LLVM Exceptions. + * See https://nvidia.github.io/NVTX/LICENSE.txt for license information. + */ + +#include + +static void TestMem(void) +{ + nvtxDomainHandle_t domain; + nvtxMemHeapHandle_t heap; + nvtxMemHeapDesc_t heapDesc; + + domain = nvtxDomainCreateA("Domain"); + + heap = nvtxMemHeapRegister(domain, &heapDesc); + nvtxMemPermissionsUnbind(domain, 0); +} + +NVTX_DYNAMIC_EXPORT +int RunTest(int argc, const char** argv) +{ + NVTX_EXPORT_UNMANGLED_FUNCTION_NAME + + (void)argc; + (void)argv; + + TestMem(); + + return 0; +} diff --git a/tests/CoverageMemCudaRt.cu b/tests/CoverageMemCudaRt.cu new file mode 100644 index 0000000..4e2519b --- /dev/null +++ b/tests/CoverageMemCudaRt.cu @@ -0,0 +1,45 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Licensed under the Apache License v2.0 with LLVM Exceptions. + * See https://nvidia.github.io/NVTX/LICENSE.txt for license information. + */ + +#include + +static void TestMemCudaRt(void) +{ + nvtxDomainHandle_t domain; + nvtxMemPermissionsHandle_t perm; + + domain = nvtxDomainCreateA("Domain"); + + perm = nvtxMemCudaGetProcessWidePermissions(domain); + nvtxMemCudaSetPeerAccess(domain, perm, 0, 0); +} + +extern "C" NVTX_DYNAMIC_EXPORT +int RunTest(int argc, const char** argv) +{ + NVTX_EXPORT_UNMANGLED_FUNCTION_NAME + + (void)argc; + (void)argv; + + TestMemCudaRt(); + + return 0; +} diff --git a/tests/CoveragePayload.c b/tests/CoveragePayload.c new file mode 100644 index 0000000..9e2aa23 --- /dev/null +++ b/tests/CoveragePayload.c @@ -0,0 +1,46 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Licensed under the Apache License v2.0 with LLVM Exceptions. + * See https://nvidia.github.io/NVTX/LICENSE.txt for license information. + */ + +#include + +static void TestMem(void) +{ + nvtxDomainHandle_t domain; + uint8_t enabled; + uint64_t handle; + nvtxPayloadSchemaAttr_t attr; + + domain = nvtxDomainCreateA("Domain"); + enabled = nvtxDomainIsEnabled(domain); + handle = nvtxPayloadSchemaRegister(domain, &attr); +} + +NVTX_DYNAMIC_EXPORT +int RunTest(int argc, const char** argv) +{ + NVTX_EXPORT_UNMANGLED_FUNCTION_NAME + + (void)argc; + (void)argv; + + TestMem(); + + return 0; +} diff --git a/tests/DllHelper.h b/tests/DllHelper.h new file mode 100644 index 0000000..7da525c --- /dev/null +++ b/tests/DllHelper.h @@ -0,0 +1,73 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Licensed under the Apache License v2.0 with LLVM Exceptions. + * See https://nvidia.github.io/NVTX/LICENSE.txt for license information. + */ + +#pragma once + +/* To export a function from a DLL, include nvtx3/nvToolsExt.h and use: + * - Use extern "C" (if C++) and NVTX_DYNAMIC_EXPORT in front of the function declaration/definition + * - Use NVTX_EXPORT_UNMANGLED_FUNCTION_NAME inside the function body to prevent name-mangling + * + * On GCC and similar compilers, it's best to build with -fvisibility=hidden. This ensures normal + * functions will not be dynamic exports. In CMake, that can be done with: + * set_target_properties(MyTarget PROPERTIES C_VISIBILITY_PRESET hidden CXX_VISIBILITY_PRESET hidden) + * + * If you can't build with that flag, then push visibility=hidden and never pop it: + * #ifdef __GNUC__ + * #pragma GCC visibility push(hidden) + * #endif + * + * Note that NVTX_DYNAMIC_EXPORT will export a function even if the default visibility is hidden. + * NVTX_EXPORT_UNMANGLED_FUNCTION_NAME isn't necessary on many platforms, but using it will ensure + * success when loading function pointers via GET_DLL_FUNC (see below) on any platform, and from + * other languages' C bindings. + */ + +#if defined(_WIN32) + +#include + +/* Don't try to use wide chars here -- stick with char* for simpler cross-plat coding */ +#define DLL_HANDLE HMODULE +#define DLL_OPEN(x) LoadLibraryA(x) +#define DLL_CLOSE(x) FreeLibraryA(x) +#define GET_DLL_FUNC GetProcAddress +#if defined(_MSC_VER) +#define DLL_PREFIX "" +#else +#define DLL_PREFIX "lib" +#endif +#define DLL_SUFFIX ".dll" + +#else /* Assume GCC-like compiler, but don't require defined(__GNUC__) */ + +#include + +#define DLL_HANDLE void* +#define DLL_OPEN(lib) dlopen(lib, RTLD_LAZY) +#define DLL_CLOSE(h) dlclose(h) +#define GET_DLL_FUNC dlsym +#define DLL_PREFIX "lib" +#if defined(__APPLE__) +#define DLL_SUFFIX ".dylib" +#else +#define DLL_SUFFIX ".so" +#endif + +#endif diff --git a/tests/Domains.cpp b/tests/Domains.cpp new file mode 100644 index 0000000..0497aa1 --- /dev/null +++ b/tests/Domains.cpp @@ -0,0 +1,117 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Licensed under the Apache License v2.0 with LLVM Exceptions. + * See https://nvidia.github.io/NVTX/LICENSE.txt for license information. + */ + +#if defined(_MSC_VER) && _MSC_VER < 1914 +#define STATIC_ASSERT_TESTING 0 +#else +#define STATIC_ASSERT_TESTING 1 +#endif + +#if defined(STATIC_ASSERT_TESTING) +#include +#define NVTX3_STATIC_ASSERT(c, m) do { if (!(c)) printf("static_assert would fail: %s\n", m); } while (0) +#endif + +#include + +#include + +// Domain description types +struct char_test { static constexpr const char* name{"Test char"}; }; +struct wchar_test { static constexpr const wchar_t* name{L"Test wchar_t"}; }; +struct error_name_missing { static constexpr const char* not_name{"Test name is missing"}; }; +struct error_name_is_bad_type { static constexpr const int name{5}; }; + +extern "C" NVTX_DYNAMIC_EXPORT +int RunTest(int argc, const char** argv) +{ + NVTX_EXPORT_UNMANGLED_FUNCTION_NAME + + (void)argc; + (void)argv; + + using namespace nvtx3; + + if (0) + { + std::cout << std::boolalpha; + std::cout << "is_c_string = " << detail::is_c_string::value << '\n'; + std::cout << "is_c_string = " << detail::is_c_string::value << '\n'; + std::cout << "is_c_string = " << detail::is_c_string::value << '\n'; + std::cout << "is_c_string = " << detail::is_c_string::value << '\n'; + std::cout << "is_c_string = " << detail::is_c_string::value << '\n'; + std::cout << "is_c_string= " << detail::is_c_string::value << '\n'; + std::cout << "is_c_string = " << detail::is_c_string::value << '\n'; + std::cout << "is_c_string = " << detail::is_c_string::value << '\n'; + std::cout << "is_c_string = " << detail::is_c_string::value << '\n'; + std::cout << "is_c_string = " << detail::is_c_string::value << '\n'; + std::cout << "is_c_string = " << detail::is_c_string::value << '\n'; + std::cout << "is_c_string= " << detail::is_c_string::value << '\n'; + + std::cout << "is_c_string = " << detail::is_c_string::value << '\n'; + std::cout << "is_c_string= " << detail::is_c_string::value << '\n'; + std::cout << "is_c_string = " << detail::is_c_string::value << '\n'; + + std::cout << "-------------\n"; + } + + std::cout << "- Global domain (mark alias):\n"; + mark("Mark in global domain (implicit)"); + + std::cout << "- Global domain implicit:\n"; + auto& gi = domain::get<>(); + mark_in<>("Mark in global domain (implicit)"); + + std::cout << "- Global domain explicit:\n"; + auto& ge = domain::get(); + mark_in("Mark in global domain (explicit)"); + + std::cout << "- Test domain (char):\n"; + auto& d1 = domain::get(); + mark_in("Mark in char_test domain"); + + std::cout << "- Test domain (wchar_t):\n"; + auto& d2 = domain::get(); + mark_in("Mark in wchar_test domain"); + +#if STATIC_ASSERT_TESTING + +#if 1 // defined(ERROR_TEST_NAME_IS_MISSING) + { + std::cout << "- Error test - domain is missing name member:\n"; + auto& d3 = domain::get(); + mark_in("Mark in error_name_missing domain"); + scoped_range_in r3("Mark in error_name_missing domain"); + } +#endif + +#if 1 // defined(ERROR_TEST_NAME_IS_BAD_TYPE) + { + std::cout << "- Error test - domain name member isn't narrow or wide char array:\n"; + auto& d4 = domain::get(); + mark_in("Mark in error_name_is_bad_type domain"); + scoped_range_in r4("Mark in error_name_is_bad_type domain"); + } +#endif + +#endif // STATIC_ASSERT_TESTING + + return 0; +} diff --git a/tests/ExportApi.c b/tests/ExportApi.c new file mode 100644 index 0000000..68f316f --- /dev/null +++ b/tests/ExportApi.c @@ -0,0 +1,26 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Licensed under the Apache License v2.0 with LLVM Exceptions. + * See https://nvidia.github.io/NVTX/LICENSE.txt for license information. + */ + +#define NVTX_EXPORT_API +#include +#include +#include +#include +#include diff --git a/tests/Imports/cuda_lite/cuda.h b/tests/Imports/cuda_lite/cuda.h new file mode 100644 index 0000000..7c2ad51 --- /dev/null +++ b/tests/Imports/cuda_lite/cuda.h @@ -0,0 +1,9 @@ +#ifndef CUDA_H_ +#define CUDA_H_ + +typedef int CUdevice; +typedef struct CUctx_st *CUcontext; +typedef struct CUstream_st *CUstream; +typedef struct CUevent_st *CUevent; + +#endif /* CUDA_H_ */ diff --git a/tests/Imports/cuda_lite/driver_types.h b/tests/Imports/cuda_lite/driver_types.h new file mode 100644 index 0000000..3e41c50 --- /dev/null +++ b/tests/Imports/cuda_lite/driver_types.h @@ -0,0 +1,7 @@ +#ifndef DRIVER_TYPES_H_ +#define DRIVER_TYPES_H_ + +typedef struct CUstream_st *cudaStream_t; +typedef struct CUevent_st *cudaEvent_t; + +#endif /*DRIVER_TYPES_H_*/ diff --git a/tests/Imports/opencl_lite/CL/cl.h b/tests/Imports/opencl_lite/CL/cl.h new file mode 100644 index 0000000..1c333fa --- /dev/null +++ b/tests/Imports/opencl_lite/CL/cl.h @@ -0,0 +1,14 @@ +#ifndef CL_H_ +#define CL_H_ + +typedef struct _cl_platform_id * cl_platform_id; +typedef struct _cl_device_id * cl_device_id; +typedef struct _cl_context * cl_context; +typedef struct _cl_command_queue * cl_command_queue; +typedef struct _cl_mem * cl_mem; +typedef struct _cl_program * cl_program; +typedef struct _cl_kernel * cl_kernel; +typedef struct _cl_event * cl_event; +typedef struct _cl_sampler * cl_sampler; + +#endif /* CL_H_ */ diff --git a/tests/InjectionHelper.h b/tests/InjectionHelper.h new file mode 100644 index 0000000..82acb31 --- /dev/null +++ b/tests/InjectionHelper.h @@ -0,0 +1,580 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Licensed under the Apache License v2.0 with LLVM Exceptions. + * See https://nvidia.github.io/NVTX/LICENSE.txt for license information. + */ + +#pragma once + +// [Best practices for injection implementions] +// Set NVTX_NO_IMPL to make the NVTX headers define the API types and function +// prototypes only, not the inline impls. Be sure on GCC to use -Wno-unused-function +// to avoid warnings for undefined static prototypes. +#define NVTX_NO_IMPL + +// [Best practices for injection implementions] +// Microsoft's compiler issues warning 26812 when compiling a C-style enum in C++ +// instead of using the new "enum class" style. Since the NVTX headers are written in +// C, the enums defined there will trigger this warning. Use this code to disable it. +#if defined(_MSC_VER) +#pragma warning (disable : 26812) +#endif +#include + +#include +#include +#include +#include + +namespace NvtxInjectionHelper { + +//============ Generic utility functions ====================================== + +inline namespace detail_generic { + +//--- maxVal --- + +// Variadic alternative to std::max that doesn't need an initializer list, +// doesn't conflict with MSVC's #define for max, and has no trouble with +// constexpr usage. Handles having zero parameters passed, returning +// std::numeric_limits::min in that case, as long as the template +// parameter T is explicitly specified. Takes arguments by value, which +// avoids the issue of returning a reference to something when called +// with no parameters. Example uses: +// + +template +constexpr inline T maxVal() { return std::numeric_limits::min(); } + +template +constexpr inline T maxVal(T first, Rest... rest) +{ + T restMax = maxVal(rest...); + return (first > restMax) ? first : restMax; +} + +//--- tuple size helper --- + +// Generic utility for getting the size of a std::tuple, using its value +// as opposed to std::tuple_size<> which takes the tuple's type. In a +// generic lambda where the parameter's type is "auto", it's extra work +// to figure out the type +template +constexpr inline size_t size_of_tuple(std::tuple const&) +{ + return sizeof...(Ts); +} + +//--- tuple helpers to loop over items --- + +// We need a way to call a function f on each element of a tuple, like this: +// +// f(std::get<0>(t)); +// f(std::get<1>(t)); +// f(std::get<2>(t)); etc. +// +// We want something like this, where "Is" is a parameter pack of 0,1,2,etc.: +// +// f(std::get(t))...; +// +// ...but parameter pack expansion is only allowed within the context of args +// to a function call or a braced init list. We also must handle the case +// where the tuple is empty, we should discard the results of all the calls +// to f, even if it returns different types for each call. Easiest way to +// do this is by forwarding the elements of the tuple as args to a helper +// function that calls f on each arg, like this: +// +// for_each_in_parameter_pack(f, std::get(t)...); +// +// But we also want perfect forwarding of the function and the tuple. +// The following utilites "for_each_in_tuple", "for_each_in_tuple_helper", +// and "for_each_in_parameter_pack" are provided to allow code such as this +// "loop" over tuple elements. Note that "thing" in each iteration can be +// a different type, because a tuple's elements may be different types, so +// generic lambdas are very convenient here: +// +// for_each_in_tuple(tuple_of_things, +// [](auto const& thing) +// { +// std::cout << thing << std::endl; +// } +// ); + +template +inline void for_each_in_parameter_pack(F&& f) {} + +template +inline void for_each_in_parameter_pack(F&& f, First const& first, Rest const&... rest) +{ + // Call f on the first argument, and explicitly discard the result by casting to void + static_cast(std::forward(f)(first)); + + // Recurse to call f on the rest of the arguments + for_each_in_parameter_pack(std::forward(f), rest...); +} + +// Generic utility for calling a function f for each element of a tuple t +template +inline void for_each_in_tuple_helper(T const& t, F&& f, std::index_sequence) +{ + for_each_in_parameter_pack( + std::forward(f), + std::get(t)... + ); +} +template +inline void for_each_in_tuple(std::tuple const& t, F&& f) +{ + for_each_in_tuple_helper(t, std::forward(f), std::make_index_sequence()); +} + +} // namespace detail_generic + +//============ NVTX injection helper internal utilities ======================= + +inline namespace detail_nvtx { + +//--- id_t --- +// Define generic integer type for holding all modules' callback id enum values. +// These are used as indexes into the handler arrays for each module. +using id_t = unsigned int; + +//--- id_v --- +// Nickname for std::integral_constant, which is used for all callback enum values. +// Using an integral constant allows performing correctness checks at compile time, +// which is not possible in C++ with function parameter values, only their types. +// Including the value in the type works around this problem. +template +using id_v = std::integral_constant; + +//--- NVTX_CBID --- +// Macro to succinctly turn an NVTX_CBID_* enum value into a compile-time constant, +// using std::integral_constant. This makes it possible to perform correctness +// checks at compile time, for example ensuring a handler's signature is compatible +// with the NVTX API call it is being installed to handle. Syntax is meant to look +// familiar. For example, replace: +// NVTX_CBID_CORE_MarkA +// with: +// NVTX_CBID(CORE_MarkA) +// when passing CBID values to NvtxInjectionHelper::MakeHandlerTable. +#define NVTX_CBID(func) NvtxInjectionHelper::id_v{} + +//--- EnumTypeToModuleId --- +// Template variable to map from call id enum types to module id values (see nvtxTypes.h) +// For example, EnumTypeToModuleId == NVTX_CB_MODULE_CORE. +template +constexpr static NvtxCallbackModule EnumTypeToModuleId = NVTX_CB_MODULE_INVALID; + +template<> constexpr NvtxCallbackModule EnumTypeToModuleId = NVTX_CB_MODULE_CORE; +template<> constexpr NvtxCallbackModule EnumTypeToModuleId = NVTX_CB_MODULE_CUDA; +template<> constexpr NvtxCallbackModule EnumTypeToModuleId = NVTX_CB_MODULE_OPENCL; +template<> constexpr NvtxCallbackModule EnumTypeToModuleId = NVTX_CB_MODULE_CUDART; +template<> constexpr NvtxCallbackModule EnumTypeToModuleId = NVTX_CB_MODULE_CORE2; +template<> constexpr NvtxCallbackModule EnumTypeToModuleId = NVTX_CB_MODULE_SYNC; + +//--- IdToModuleId --- +// Helper for EnumTypeToModuleId to convert directly from an integral_constant of a call id enum +// to its module id. For example, since NVTX_CBID(CORE_MarkA) is an integral_constant, it cannot +// be used directly as in EnumTypeToModuleId, since NVTX_CBID(CORE_MarkA)'s +// type is std::integral_constant. This helper extracts +// the enum's type from the integral_constant, allowing EnumConstToModuleId. +template +constexpr static NvtxCallbackModule IdToModuleId = EnumTypeToModuleId; + + +//--- IdToHandlerType +// Template using to map from call id values to matching function pointer types. +template struct IdToHandlerType { using type = nullptr_t; }; + +// Macro for defining IdToHandlerType specializations for each id. +// mod = module, i.e. CORE, CORE2 +// func = prefixless function name, i.e. MarkEx, DomainCreateA +// impl = impl or fakeimpl, depending on whether or not to use real types or the +// nvtxTypes.h "fakeimpl" types, which don't depend on CUDA/OpenCL headers. +#define NVTX_ID_TO_TYPE(mod, func, impl) \ +template <> struct IdToHandlerType { using type = nvtx##func##_##impl##_fntype; } + +NVTX_ID_TO_TYPE(CORE, MarkEx , impl); +NVTX_ID_TO_TYPE(CORE, MarkA , impl); +NVTX_ID_TO_TYPE(CORE, MarkW , impl); +NVTX_ID_TO_TYPE(CORE, RangeStartEx , impl); +NVTX_ID_TO_TYPE(CORE, RangeStartA , impl); +NVTX_ID_TO_TYPE(CORE, RangeStartW , impl); +NVTX_ID_TO_TYPE(CORE, RangeEnd , impl); +NVTX_ID_TO_TYPE(CORE, RangePushEx , impl); +NVTX_ID_TO_TYPE(CORE, RangePushA , impl); +NVTX_ID_TO_TYPE(CORE, RangePushW , impl); +NVTX_ID_TO_TYPE(CORE, RangePop , impl); +NVTX_ID_TO_TYPE(CORE, NameCategoryA, impl); +NVTX_ID_TO_TYPE(CORE, NameCategoryW, impl); +NVTX_ID_TO_TYPE(CORE, NameOsThreadA, impl); +NVTX_ID_TO_TYPE(CORE, NameOsThreadW, impl); + +NVTX_ID_TO_TYPE(CORE2, DomainMarkEx , impl); +NVTX_ID_TO_TYPE(CORE2, DomainRangeStartEx , impl); +NVTX_ID_TO_TYPE(CORE2, DomainRangeEnd , impl); +NVTX_ID_TO_TYPE(CORE2, DomainRangePushEx , impl); +NVTX_ID_TO_TYPE(CORE2, DomainRangePop , impl); +NVTX_ID_TO_TYPE(CORE2, DomainResourceCreate , impl); +NVTX_ID_TO_TYPE(CORE2, DomainResourceDestroy, impl); +NVTX_ID_TO_TYPE(CORE2, DomainNameCategoryA , impl); +NVTX_ID_TO_TYPE(CORE2, DomainNameCategoryW , impl); +NVTX_ID_TO_TYPE(CORE2, DomainRegisterStringA, impl); +NVTX_ID_TO_TYPE(CORE2, DomainRegisterStringW, impl); +NVTX_ID_TO_TYPE(CORE2, DomainCreateA , impl); +NVTX_ID_TO_TYPE(CORE2, DomainCreateW , impl); +NVTX_ID_TO_TYPE(CORE2, DomainDestroy , impl); +NVTX_ID_TO_TYPE(CORE2, Initialize , impl); + +#undef NVTX_ID_TO_TYPE + +//--- CheckHandlerTypeMatchesId --- +// Compile-time check provides easy-to-read error if FuncT isn't compatible with EnumT +template +constexpr inline void CheckHandlerTypeMatchesId() +{ + using ExpectedFuncT = typename IdToHandlerType::type; + + static_assert(std::is_same(), + "NVTX Injection Helper: The provided handler function's signature does not match the NVTX API for the given call id."); +} + +//--- Handler --- +// Represents id/handler pair for an NVTX call. Provides: +// - the call's id (NVTX_CBID_* enum values) +// - handler function pointer +// Preserves the type of the function as a template parameter. +// Erases the type of the enum, so it's not module-specific anymore. +// Allows being constructed and placed into a container at compile time, then +// later at run time doing the run-time-only cast of the function pointer. +// This enables processing of ids to occur at compile time. +template +class Handler +{ +public: + id_t id; + FuncT pfn; + + template + constexpr Handler(id_v e, FuncT pfn_) + : id(static_cast(EnumVal)) // Erase enum's type + , pfn(pfn_) + {} + + NvtxFunctionPointer Address() const noexcept + { + return reinterpret_cast(pfn); + } +}; + +//--- MakeHandler --- +// "Make" function for Handler to automatically deduce types from parameters +template +constexpr inline Handler MakeHandler(IdT id_, FuncT func) +{ + CheckHandlerTypeMatchesId(); + return Handler(id_, func); +} + +//--- ModuleHandlerTable --- +// Represents the set of Handlers for one module. Provides: +// - the module's id (NVTX_CB_MODULE_* enum values) +// - iterable container of id/handler pairs (empty means skip getting etbl for module) +// - highest call id value of handler in module (to confirm client has sufficient size) +// - a method to assign all the stored handlers into a client's handler table +// These objects can be constructed at compile time, including the highest call id used. +template +class ModuleHandlerTable +{ +public: + using tuple_t = std::tuple...>; + + static constexpr NvtxCallbackModule moduleId = mod; + tuple_t handlers; + id_t highestIdUsed; + + constexpr ModuleHandlerTable(tuple_t t) + : handlers(t) + , highestIdUsed(FindHighestId(t)) + {} + + void AssignToClient(NvtxFunctionTable clientTable) const noexcept + { + for_each_in_tuple(handlers, + [clientTable](auto const& handler) + { + if (handler.id != 0 && handler.pfn != nullptr) + { + *clientTable[handler.id] = handler.Address(); + } + } + ); + } + +private: + template + static constexpr id_t FindHighestIdHelper(tuple_t t, std::index_sequence) + { + return maxVal(std::get(t).id...); + } + + static constexpr id_t FindHighestId(tuple_t t) + { + return FindHighestIdHelper(t, std::make_index_sequence()); + } +}; + + +//--- MakeModuleHandlerTuple --- +// MakeModuleHandlerTuple takes NvtxCallbackModule "mod" as a template parameter, +// and loops over pairs of arguments (an enum and a handler function), building a +// tuple of Handler objects for the enums that are in module "mod", and ignoring +// ones that aren't. This lets the user pass in handlers for for all modules in +// one simple call, and we can build up separate handler tables for each module. +// MakeModuleHandlerTuple is recursive, peeling off two arguments in each recursive +// case, and having no args be the base case. The recursive case has a pair of +// overloads for whether or not the enum's type matches "mod" or not. Since these +// overloads are separate functions, it's mutual recursion, so both are declared +// first before the definitions. + +// Base case: no more arguments +template +constexpr inline auto MakeModuleHandlerTuple() +{ + return std::tuple<>{}; +} + +// Prototypes of recursive cases -- needed since they can call each other +template == mod, int> = 0, + typename... Args> +constexpr inline auto MakeModuleHandlerTuple(IdT, FuncT, Args...); + +template != mod, int> = 0, + typename... Args> +constexpr inline auto MakeModuleHandlerTuple(IdT, FuncT, Args...); + +// Recursive case 1: enum's type matches mod, so add it to the tuple +template == mod, int>, + typename... Args> +constexpr inline auto MakeModuleHandlerTuple(IdT id, FuncT f, Args... rest) +{ + // Verify types of id and function, using static_assert to provide a + // clear compile error if the types don't meet the requirements. + static_assert(IdToModuleId != NVTX_CB_MODULE_INVALID, + "MakeHandlerTable arguments must be pairs of IDs and handler functions. IDs must be enums starting with NVTX_CBID_. An invalid ID value was provided."); + + // Before adding this id/handler pair to the tuple, check to make sure + // there's not already an entry in the tuple with the same id. If so, + // provide a clear compile-time error message. + auto restTuple = MakeModuleHandlerTuple(rest...); + + return std::tuple_cat( + std::make_tuple(MakeHandler(id, f)), + restTuple); +} + +// Recursive case 2: id is not in module, so fwd result from remaining args +template != mod, int>, + typename... Args> +constexpr inline auto MakeModuleHandlerTuple(IdT id, FuncT f, Args... rest) +{ + return MakeModuleHandlerTuple(rest...); +} + +//--- MakeModuleHandlerFromTuple --- +// Helper function for MakeModuleHandlerTable. Coverts type of Handlers into +// a ModuleHandlerTable object. This approach was simpler than building up the +// ModuleHandlerTable incrementally, since std::tuple_cat makes it so easy to +// build up a tuple. +template +constexpr inline auto MakeModuleHandlerFromTuple(std::tuple...> t) +{ + return ModuleHandlerTable(t); +} + +//--- "Make" function for ModuleHandlerTable to automatically deduce type --- +// First, create a tuple of just the handlers in the argument list in module "mod". +// Uses the mutually-recursive MakeModuleHandlerTuple overloads, which only add +// handlers into the tuple if the module matches. Then, MakeModuleHandlerFromTuple +// converts the tuple into a properly-typed ModuleHandlerTable object. +template +constexpr inline auto MakeModuleHandlerTable(Args... args) +{ + const auto handlerTuple = MakeModuleHandlerTuple(args...); + return MakeModuleHandlerFromTuple(handlerTuple); +} + +} // namespace detail_nvtx + +//============ NVTX injection helper public interface ========================= + +// Define sentinel-value constants for use in handler implementations +namespace ReturnCodes { + constexpr auto NVTX_TOOL_ATTACHED_UNUSED_RANGE_ID = static_cast(-1LL); + constexpr int NVTX_TOOL_ATTACHED_UNUSED_PUSH_POP_ID = -1; + const auto NVTX_TOOL_ATTACHED_UNUSED_DOMAIN_HANDLE = reinterpret_cast(-1LL); + const auto NVTX_TOOL_ATTACHED_UNUSED_STRING_HANDLE = reinterpret_cast(-1LL); + // Note: In C++20, use bit_cast instead of reinterpret_cast, so the handles + // (which are pointer types) can also be made constexpr. +} + +template +constexpr inline auto MakeHandlerTable(Args... args) +{ + return std::make_tuple( + MakeModuleHandlerTable(args...), + MakeModuleHandlerTable(args...), + MakeModuleHandlerTable(args...), + MakeModuleHandlerTable(args...), + MakeModuleHandlerTable(args...), + MakeModuleHandlerTable(args...) + ); +} + +enum class InstallResult +{ + Success, + ExportTableVersionInfoMissing, + ExportTableVersionInfoTooSmall, + ClientVersionTooOld, + ExportTableCallbacksMissing, + ExportTableCallbacksTooSmall, + ModuleNotSupported, + ModuleTableTooSmall +}; + +template +inline InstallResult InstallHandlers( + NvtxGetExportTableFunc_t getExportTable, + HandlerTableT const& injectionHandlerTable, + std::ostringstream* errStream = nullptr, + uint32_t* pVersion = nullptr) +{ + uint32_t version = 0; + auto pVersionInfo = + reinterpret_cast(getExportTable(NVTX_ETID_VERSIONINFO)); + if (!pVersionInfo) + { + if (errStream) *errStream + << "Client NVTX instance doesn't support NVTX_ETID_VERSIONINFO"; + return InstallResult::ExportTableVersionInfoMissing; + } + + if (pVersionInfo->struct_size < sizeof(*pVersionInfo)) + { + if (errStream) *errStream + << "NvtxExportTableVersionInfo structure size is " << pVersionInfo->struct_size + << ", expected " << sizeof(*pVersionInfo) << "!"; + return InstallResult::ExportTableVersionInfoTooSmall; + } + + version = pVersionInfo->version; + if (version < 2) + { + if (errStream) *errStream + << "client's NVTX version is " << version << ", expected 2+"; + return InstallResult::ClientVersionTooOld; + } + + if (pVersion) *pVersion = version; + + auto pCallbacks = + reinterpret_cast(getExportTable(NVTX_ETID_CALLBACKS)); + if (!pCallbacks) + { + if (errStream) *errStream + << "Client NVTX instance doesn't support NVTX_ETID_CALLBACKS"; + return InstallResult::ExportTableCallbacksMissing; + } + + if (pCallbacks->struct_size < sizeof(*pCallbacks)) + { + if (errStream) *errStream + << "NvtxExportTableCallbacks structure size is " << pCallbacks->struct_size + << ", expected " << sizeof(*pCallbacks) << "!"; + return InstallResult::ExportTableCallbacksTooSmall; + } + +#if defined(DEBUG) || true + // Simple loop to print handler table internal details + for_each_in_tuple(injectionHandlerTable, + [](auto const& handlerModule) + { + auto count = size_of_tuple(handlerModule.handlers); + printf("Module: %d Count: %d Highest: %d\n", + (int)handlerModule.moduleId, (int)count, (int)handlerModule.highestIdUsed); + + if (count > 0) + { + for_each_in_tuple(handlerModule.handlers, + [](auto const& handler) + { + auto addr = (long long)handler.Address(); + printf(" Id: %d Address: 0x%llx\n", + (int)handler.id, addr); + } + ); + } + } + ); +#endif + + // Loop over module handler tables and install handlers into client + bool errors = false; + for_each_in_tuple(injectionHandlerTable, + [&](auto const& handlerModule) + { + NvtxFunctionTable clientTable = 0; + unsigned int clientTableSize = 0; + int success; + + if (handlerModule.moduleId == NVTX_CB_MODULE_INVALID) return; + + success = pCallbacks->GetModuleFunctionTable(handlerModule.moduleId, &clientTable, &clientTableSize); + if (!success || !clientTable) + { + if (errStream) *errStream + << "Client NVTX instance doesn't support callback module with id " << handlerModule.moduleId; + // TODO: return InstallResult::ModuleNotSupported; + errors = true; + } + + // Ensure client's table is new enough to support the function pointers we want to register + if (clientTableSize <= handlerModule.highestIdUsed) + { + if (errStream) *errStream + << "Size of client NVTX instance's handler table with module id " << handlerModule.moduleId + << " too small. Size is " << clientTableSize + << ", but injection needs to assign table[" << handlerModule.highestIdUsed << "]"; + // TODO: return InstallResult::ModuleTableTooSmall; + errors = true; + } + + handlerModule.AssignToClient(clientTable); + } + ); + + if (errors) return InstallResult::ModuleNotSupported; + + return InstallResult::Success; +} + +} // namespace NvtxInjectionHelper diff --git a/tests/LinkerDupesFileA.cpp b/tests/LinkerDupesFileA.cpp new file mode 100644 index 0000000..669c870 --- /dev/null +++ b/tests/LinkerDupesFileA.cpp @@ -0,0 +1,26 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Licensed under the Apache License v2.0 with LLVM Exceptions. + * See https://nvidia.github.io/NVTX/LICENSE.txt for license information. + */ + +#include "TestCoverage.h" + +void FileA(int argc, const char** argv) +{ + RunTestCommon(argc, argv); +} diff --git a/tests/LinkerDupesFileB.cpp b/tests/LinkerDupesFileB.cpp new file mode 100644 index 0000000..5235579 --- /dev/null +++ b/tests/LinkerDupesFileB.cpp @@ -0,0 +1,26 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Licensed under the Apache License v2.0 with LLVM Exceptions. + * See https://nvidia.github.io/NVTX/LICENSE.txt for license information. + */ + +#include "TestCoverage.h" + +void FileB(int argc, const char** argv) +{ + RunTestCommon(argc, argv); +} diff --git a/tests/LinkerDupesMain.cpp b/tests/LinkerDupesMain.cpp new file mode 100644 index 0000000..3907084 --- /dev/null +++ b/tests/LinkerDupesMain.cpp @@ -0,0 +1,35 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Licensed under the Apache License v2.0 with LLVM Exceptions. + * See https://nvidia.github.io/NVTX/LICENSE.txt for license information. + */ + +#include // Just for export macros + +void FileA(int argc, const char** argv); +void FileB(int argc, const char** argv); + +extern "C" NVTX_DYNAMIC_EXPORT +int RunTest(int argc, const char** argv) +{ + NVTX_EXPORT_UNMANGLED_FUNCTION_NAME + + FileA(argc, argv); + FileB(argc, argv); + + return 0; +} diff --git a/tests/NamedCategories.cpp b/tests/NamedCategories.cpp new file mode 100644 index 0000000..d39c7ec --- /dev/null +++ b/tests/NamedCategories.cpp @@ -0,0 +1,148 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Licensed under the Apache License v2.0 with LLVM Exceptions. + * See https://nvidia.github.io/NVTX/LICENSE.txt for license information. + */ + +#if defined(_MSC_VER) && _MSC_VER < 1914 +#define STATIC_ASSERT_TESTING 0 +#else +#define STATIC_ASSERT_TESTING 1 +#endif + +#if defined(STATIC_ASSERT_TESTING) +#include +#define NVTX3_STATIC_ASSERT(c, m) do { if (!(c)) printf("static_assert would fail: %s\n", m); } while (0) +#endif + +#include + +#include + +// Domain description types +struct d { static constexpr const char* name{"Test domain"}; }; + +// Named category types +struct cat_char_test { static constexpr const char* name{"Cat char"}; static constexpr uint32_t id{1}; }; +struct cat_wchar_test { static constexpr const wchar_t* name{L"Cat wchar_t"}; static constexpr uint32_t id{2}; }; +struct error_name_missing { static constexpr const char* x {"Name"}; static constexpr uint32_t id{3}; }; +struct error_name_is_bad_type { static constexpr const int name{5}; static constexpr uint32_t id{4}; }; +struct error_id_missing { static constexpr const char* name{"Name"}; static constexpr uint32_t y {5}; }; +struct error_id_is_bad_type { static constexpr const char* name{"Name"}; static constexpr float id{6}; }; +struct error_both_missing { static constexpr const char* x {"Name"}; static constexpr uint32_t y {7}; }; +struct error_both_bad_type { static constexpr const int name{5}; static constexpr float id{8}; }; +struct error_no_name_bad_id { static constexpr const char* x {"Name"}; static constexpr float id{9}; }; +struct error_bad_name_no_id { static constexpr const int name{5}; static constexpr uint32_t y {10}; }; +struct cat_global_domain1 { static constexpr const char* name{"Global1"}; static constexpr uint32_t id{11}; }; +struct cat_global_domain2 { static constexpr const char* name{"Global2"}; static constexpr uint32_t id{12}; }; +struct cat_global_domain3 { static constexpr const char* name{"Global3"}; static constexpr uint32_t id{13}; }; + +extern "C" NVTX_DYNAMIC_EXPORT +int RunTest(int argc, const char** argv) +{ + NVTX_EXPORT_UNMANGLED_FUNCTION_NAME + + (void)argc; + (void)argv; + + using namespace nvtx3; + + auto& d1 = domain::get(); + +#if 1 + std::cout << "- Named category (char):\n"; + auto& c1 = named_category_in::get(); + mark_in("Mark in cat_char_test category", named_category_in::get()); + + std::cout << "- Named category (wchar_t):\n"; + auto& c2 = named_category_in::get(); + mark_in("Mark in cat_wchar_test category", named_category_in::get()); +#endif + +#if 1 + std::cout << "- Named category in global domain (alias):\n"; + auto& cd1 = named_category::get(); + + std::cout << "- Named category in global domain (implicit):\n"; + auto& cd2 = named_category_in<>::get(); + + std::cout << "- Named category in global domain (explicit):\n"; + auto& cd3 = named_category_in::get(); +#endif + +#if STATIC_ASSERT_TESTING + +#if 1 // defined(ERROR_TEST_NAME_IS_MISSING) + { + std::cout << "- Error test - category is missing name member:\n"; + auto& c3 = named_category_in::get(); + } +#endif + +#if 1 // defined(ERROR_TEST_NAME_IS_BAD_TYPE) + { + std::cout << "- Error test - category name member isn't narrow or wide char array:\n"; + auto& c4 = named_category_in::get(); + } +#endif + +#if 1 // defined(ERROR_TEST_ID_IS_MISSING) + { + std::cout << "- Error test - category is missing id member:\n"; + auto& c5 = named_category_in::get(); + } +#endif + +#if 1 // defined(ERROR_TEST_ID_IS_BAD_TYPE) + { + std::cout << "- Error test - category id member isn't uint32_t:\n"; + auto& c6 = named_category_in::get(); + } +#endif + +#if 1 // defined(ERROR_TEST_BOTH_MISSING) + { + std::cout << "- Error test - category is missing both members:\n"; + auto& c7 = named_category_in::get(); + } +#endif + +#if 1 // defined(ERROR_TEST_BOTH_BAD_TYPE) + { + std::cout << "- Error test - category members are both bad types:\n"; + auto& c8 = named_category_in::get(); + } +#endif + +#if 1 // defined(ERROR_TEST_NO_NAME_BAD_ID) + { + std::cout << "- Error test - category has no name and bad id type:\n"; + auto& c9 = named_category_in::get(); + } +#endif + +#if 1 // defined(ERROR_TEST_BAD_NAME_NO_ID) + { + std::cout << "- Error test - category has bad name type and no id:\n"; + auto& c10 = named_category_in::get(); + } +#endif + +#endif // STATIC_ASSERT_TESTING + + return 0; +} diff --git a/tests/PathHelper.h b/tests/PathHelper.h new file mode 100644 index 0000000..f12bb0c --- /dev/null +++ b/tests/PathHelper.h @@ -0,0 +1,214 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Licensed under the Apache License v2.0 with LLVM Exceptions. + * See https://nvidia.github.io/NVTX/LICENSE.txt for license information. + */ + +#pragma once + +/* Dynamic libraries should be loaded with absolute paths to avoid + * problems not finding things in the search paths. Construct the + * absolute path to a dynamic library in the same directory as the + * process's executable, or some subdirectory of it, using these + * utility functions. C++17's std::filesystem makes this much + * easier, but these utilities should work in C++11. + */ + +#if defined(_WIN32) + +#include + +#else + +#if defined(__CYGWIN__) +#if defined(__POSIX_VISIBLE) +#if __POSIX_VISIBLE < 200112L +#error On Cygwin, you must `#define _POSIX_C_SOURCE 200112L` or greater before including any headers so that readlink() is available. You can achieve this by including this header before any others. +#endif +#endif +#if defined(_POSIX_C_SOURCE) +#undef _POSIX_C_SOURCE +#endif +#define _POSIX_C_SOURCE 200809L +#endif + +#include + +#endif + +#if defined(__APPLE__) +#include +#endif + +#include +#include +#include +#include +#include + +#include "DllHelper.h" + +#if defined(_WIN32) +constexpr char pathSep = '\\'; +#else +constexpr char pathSep = '/'; +#endif + +// Adapted from C functions in NVTXW implementation +static std::string GetCurrentProcessPath() +{ + char* buf; +#if defined(_WIN32) + { + DWORD size = MAX_PATH; + DWORD newSize; + buf = NULL; + while (1) + { + buf = (char*)realloc(buf, size); + if (!buf) + { + return NULL; + } + newSize = GetModuleFileNameA(NULL, buf, size); + if (newSize < size) + { + break; + } + size *= 2; + } + } +#elif defined(__APPLE__) + { + int ret; + pid_t pid = getpid(); + buf = (char*)malloc(PROC_PIDPATHINFO_MAXSIZE); + if (!buf) + { + return NULL; + } + ret = proc_pidpath(pid, buf, PROC_PIDPATHINFO_MAXSIZE); + if (ret == 0) + { + free(buf); + return NULL; + } + } +#elif defined(__QNX__) + { + size_t size = fpathconf(0, _PC_MAX_INPUT); + if (size <= 0) + { + size = 4096; + } + ++size; + buf = (char*)malloc(size); + if (!buf) + { + return NULL; + } + _cmdname(buf); + } +#else + { + size_t size = 1024; + ssize_t bytesReadSigned; + size_t bytesRead; + static const char linkName[] = "/proc/self/exe"; + buf = NULL; + while (1) + { + buf = (char*)realloc(buf, size); + if (!buf) + { + return NULL; + } + bytesReadSigned = readlink(linkName, buf, size); + if (bytesReadSigned < 0) + { + free(buf); + return NULL; + } + bytesRead = (size_t)bytesReadSigned; + if (bytesRead < size) break; + size *= 2; + } + buf[bytesRead] = '\0'; + } +#endif + + std::string result; + if (buf) + { + result = buf; + free(buf); + } + return result; +} + +// We know the absolute path must have at least one slash in it, +// right before the exe filename. So we can truncate the string +// to end just after the last slash, and append other file or +// directory names. Examples: +// C:\path\to\foo.exe -> C:\path\to\ +// C:\foo.exe -> C:\ +// /path/to/foo -> /path/to/ +// /foo -> / +std::string GetCurrentProcessDirWithSep() +{ + std::string exeAbsPath = GetCurrentProcessPath(); + exeAbsPath.resize(exeAbsPath.find_last_of(pathSep) + 1); + return exeAbsPath; +} + +// Take the absolute path to the current process's executable, +// remove the executable's name, and then append the library +// filename. Applies the standard dynamic library prefix and +// suffix to the library's base name, but the suffix may be +// overridden if it isn't the standard one (e.g. ".so.1.1"). +// If subDirs has any entries, they are added between the +// directory and the library name, with path separators added +// between each. Examples: +// (Assuming process is C:\path\to\foo.exe on Windows) +// AbsolutePathToLibraryInCurrentProcessPath("example") +// -> C:\path\to\example.dll +// AbsolutePathToLibraryInCurrentProcessPath("example", {"nested", "deeper"}) +// -> C:\path\to\nested\deeper\example.dll +// (Assuming process is /path/to/foo on Linux) +// AbsolutePathToLibraryInCurrentProcessPath("example") +// -> /path/to/libexample.so +// AbsolutePathToLibraryInCurrentProcessPath("example", {"nested", "deeper"}, ".so.1") +// -> /path/to/nested/deeper/libexample.so.1 +std::string AbsolutePathToLibraryInCurrentProcessPath( + std::string libraryBaseName, + std::vector subDirs = {}, + std::string libSuffix = DLL_SUFFIX) +{ + std::string result = GetCurrentProcessDirWithSep(); + + for (auto const& subDir : subDirs) + { + result += subDir; + result += pathSep; + } + + result += DLL_PREFIX; + result += libraryBaseName; + result += libSuffix; + + return result; +} diff --git a/tests/PrettyPrintersNvtxC.h b/tests/PrettyPrintersNvtxC.h new file mode 100644 index 0000000..737e848 --- /dev/null +++ b/tests/PrettyPrintersNvtxC.h @@ -0,0 +1,160 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Licensed under the Apache License v2.0 with LLVM Exceptions. + * See https://nvidia.github.io/NVTX/LICENSE.txt for license information. + */ + +#pragma once +#include +#include + +// Pretty-printers for color, payload, and message discriminated-union types + +inline void WriteColorType(std::ostream& os, nvtxColorType_t t) +{ + switch (t) + { + case NVTX_COLOR_ARGB : os << "NVTX_COLOR_ARGB"; break; + case NVTX_COLOR_UNKNOWN: os << ""; break; + default : os << ""; + } +} + +inline std::ostream& operator<<(std::ostream& os, nvtxColorType_t t) +{ + WriteColorType(os, t); + return os; +} + +inline void WritePayloadType(std::ostream& os, nvtxPayloadType_t t) +{ + switch (t) + { + case NVTX_PAYLOAD_TYPE_UNSIGNED_INT64: os << "NVTX_PAYLOAD_TYPE_UNSIGNED_INT64"; break; + case NVTX_PAYLOAD_TYPE_INT64 : os << "NVTX_PAYLOAD_TYPE_INT64 "; break; + case NVTX_PAYLOAD_TYPE_DOUBLE : os << "NVTX_PAYLOAD_TYPE_DOUBLE "; break; + case NVTX_PAYLOAD_TYPE_UNSIGNED_INT32: os << "NVTX_PAYLOAD_TYPE_UNSIGNED_INT32"; break; + case NVTX_PAYLOAD_TYPE_INT32 : os << "NVTX_PAYLOAD_TYPE_INT32 "; break; + case NVTX_PAYLOAD_TYPE_FLOAT : os << "NVTX_PAYLOAD_TYPE_FLOAT "; break; + case NVTX_PAYLOAD_UNKNOWN : os << ""; break; + default : os << ""; + } +} + +inline void WritePayloadValue(std::ostream& os, nvtxPayloadType_t t, nvtxEventAttributes_v2::payload_t val) +{ + switch (t) + { + case NVTX_PAYLOAD_TYPE_UNSIGNED_INT64: os << val.ullValue; break; + case NVTX_PAYLOAD_TYPE_INT64 : os << val.llValue; break; + case NVTX_PAYLOAD_TYPE_DOUBLE : os << val.dValue; break; + case NVTX_PAYLOAD_TYPE_UNSIGNED_INT32: os << val.uiValue; break; + case NVTX_PAYLOAD_TYPE_INT32 : os << val.iValue; break; + case NVTX_PAYLOAD_TYPE_FLOAT : os << val.fValue; break; + case NVTX_PAYLOAD_UNKNOWN : os << ""; break; + default : os << ""; + } +} + +inline void WritePayload(std::ostream& os, nvtxPayloadType_t t, nvtxEventAttributes_v2::payload_t val) +{ + WritePayloadType(os, t); + os << " = "; + WritePayloadValue(os, t, val); +} + +inline std::ostream& operator<<(std::ostream& os, nvtxPayloadType_t t) +{ + WritePayloadType(os, t); + return os; +} + +inline void WriteMessageType(std::ostream& os, nvtxMessageType_t t) +{ + switch (t) + { + case NVTX_MESSAGE_TYPE_ASCII : os << "NVTX_MESSAGE_TYPE_ASCII"; break; + case NVTX_MESSAGE_TYPE_UNICODE : os << "NVTX_MESSAGE_TYPE_UNICODE"; break; + case NVTX_MESSAGE_TYPE_REGISTERED: os << "NVTX_MESSAGE_TYPE_REGISTERED"; break; + case NVTX_MESSAGE_UNKNOWN : os << ""; break; + default : os << ""; + } +} + +inline void WriteMessageValue(std::ostream& os, nvtxMessageType_t t, nvtxMessageValue_t val) +{ + switch (t) + { + case NVTX_MESSAGE_TYPE_ASCII : os << val.ascii; break; + case NVTX_MESSAGE_TYPE_UNICODE : os << ""; break; + case NVTX_MESSAGE_TYPE_REGISTERED: os << "Registered handle: " << (void*)val.registered; break; + case NVTX_MESSAGE_UNKNOWN : os << ""; break; + default : os << ""; + } +} + +inline void WriteMessage(std::ostream& os, nvtxMessageType_t t, nvtxMessageValue_t val) +{ + WriteMessageType(os, t); + os << " = "; + WriteMessageValue(os, t, val); +} + +inline std::ostream& operator<<(std::ostream& os, nvtxMessageType_t t) +{ + WriteMessageType(os, t); + return os; +} + +// Pretty-printer for attributes struct + +#if 1 +inline std::ostream& operator<<(std::ostream& os, nvtxEventAttributes_t const& a) +{ + os << "{ver: " << a.version + << ", size: " << a.size + << ", category: " << a.category + << ", color: " << (nvtxColorType_t)a.colorType << " 0x" << std::hex << a.color << std::dec + << ", payload: " << (nvtxPayloadType_t)a.payloadType << " "; + WritePayloadValue(os, (nvtxPayloadType_t)a.payloadType, a.payload); + os << ", message: " << (nvtxMessageType_t)a.messageType << " \""; + WriteMessageValue(os, (nvtxMessageType_t)a.messageType, a.message); + os << "\"}"; + + return os; +} +#else +inline std::ostream& operator<<(std::ostream& os, nvtxEventAttributes_t const& a) +{ + os + << "uint16_t version = " << a.version << "\n" + << "uint16_t size = " << a.size << "\n" + << "uint32_t category = " << a.category << "\n" + << "int32_t colorType = " << (nvtxColorType_t)a.colorType << "\n" + << "uint32_t color = 0x" << std::hex << a.color << std::dec << "\n" + << "int32_t payloadType = " << (nvtxPayloadType_t)a.payloadType << "\n" + << "(union) payload = "; + WritePayloadValue(os, (nvtxPayloadType_t)a.payloadType, a.payload); + os << "\n" + << "int32_t messageType = " << (nvtxMessageType_t)a.messageType << "\n" + << "(union) message = "; + WriteMessageValue(os, (nvtxMessageType_t)a.messageType, a.message); + os << "\n"; + + return os; +} +#endif diff --git a/tests/PrettyPrintersNvtxCpp.h b/tests/PrettyPrintersNvtxCpp.h new file mode 100644 index 0000000..4f202c5 --- /dev/null +++ b/tests/PrettyPrintersNvtxCpp.h @@ -0,0 +1,40 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Licensed under the Apache License v2.0 with LLVM Exceptions. + * See https://nvidia.github.io/NVTX/LICENSE.txt for license information. + */ + +#pragma once +#include "PrettyPrintersNvtxC.h" +#include + +inline std::ostream& operator<<(std::ostream& os, nvtx3::event_attributes const& attr) +{ + return os << *attr.get(); +} + +inline std::ostream& operator<<(std::ostream& os, nvtx3::payload const& p) +{ + WritePayload(os, p.get_type(), p.get_value()); + return os; +} + +inline std::ostream& operator<<(std::ostream& os, nvtx3::message const& m) +{ + WriteMessage(os, m.get_type(), m.get_value()); + return os; +} diff --git a/tests/PrintInjection.cpp b/tests/PrintInjection.cpp new file mode 100644 index 0000000..6f43891 --- /dev/null +++ b/tests/PrintInjection.cpp @@ -0,0 +1,218 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Licensed under the Apache License v2.0 with LLVM Exceptions. + * See https://nvidia.github.io/NVTX/LICENSE.txt for license information. + */ + +#define NVTX_NO_IMPL +#include "nvtx3/nvToolsExt.h" + +#include + +#if defined(NVTX_INJECTION_TEST_QUIET) +#define LOG_INFO(...) +#define LOG_ERROR(...) +#else +#define LOG_INFO(...) fprintf(stderr, "[inj] " __VA_ARGS__) +#define LOG_ERROR(...) fprintf(stderr, "[inj] ERROR: " __VA_ARGS__) +#endif + +/* Implementations of NVTX functions to attach to client */ + +#define NVTX_TOOL_ATTACHED_UNUSED_RANGE_ID (nvtxRangeId_t)(-1LL) +#define NVTX_TOOL_ATTACHED_UNUSED_PUSH_POP_ID (int)(-1) +#define NVTX_TOOL_ATTACHED_UNUSED_DOMAIN_HANDLE (nvtxDomainHandle_t)(-1LL) +#define NVTX_TOOL_ATTACHED_UNUSED_STRING_HANDLE (nvtxStringHandle_t)(-1LL) + +/* NVTX_CB_MODULE_CORE */ + +static void NVTX_API HandleMarkA(const char* str) +{ + LOG_INFO("%s\n", "nvtxMarkA"); +} + +static int NVTX_API HandleRangePushA(const char* str) +{ + LOG_INFO("%s\n", "nvtxRangePushA"); + return NVTX_TOOL_ATTACHED_UNUSED_PUSH_POP_ID; +} + +static int NVTX_API HandleRangePop() +{ + LOG_INFO("%s\n", "nvtxRangePop"); + return NVTX_TOOL_ATTACHED_UNUSED_PUSH_POP_ID; +} + +/* NVTX_CB_MODULE_CORE2 */ + +static void NVTX_API HandleDomainMarkEx(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib) +{ + LOG_INFO("%s\n", "nvtxDomainMarkEx"); +} + +static nvtxRangeId_t NVTX_API HandleDomainRangeStartEx(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib) +{ + LOG_INFO("%s\n", "nvtxDomainRangeStartEx"); + return NVTX_TOOL_ATTACHED_UNUSED_RANGE_ID; +} + +static void NVTX_API HandleDomainRangeEnd(nvtxDomainHandle_t domain, nvtxRangeId_t id) +{ + LOG_INFO("%s\n", "nvtxDomainRangeEnd"); +} + +static int NVTX_API HandleDomainRangePushEx(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib) +{ + LOG_INFO("%s\n", "nvtxDomainRangePushEx"); + return NVTX_TOOL_ATTACHED_UNUSED_PUSH_POP_ID; +} + +static int NVTX_API HandleDomainRangePop(nvtxDomainHandle_t domain) +{ + LOG_INFO("%s\n", "nvtxDomainRangePop"); + return NVTX_TOOL_ATTACHED_UNUSED_PUSH_POP_ID; +} + +static nvtxStringHandle_t NVTX_API HandleDomainRegisterStringA(nvtxDomainHandle_t domain, const char* string) +{ + LOG_INFO("%s\n", "nvtxDomainRegisterStringA"); + return NVTX_TOOL_ATTACHED_UNUSED_STRING_HANDLE; +} + +static nvtxDomainHandle_t NVTX_API HandleDomainCreateA(const char* name) +{ + LOG_INFO("%s\n", "nvtxDomainCreateA"); + return NVTX_TOOL_ATTACHED_UNUSED_DOMAIN_HANDLE; +} + +static void NVTX_API HandleDomainDestroy(nvtxDomainHandle_t domain) +{ + LOG_INFO("%s\n", "nvtxDomainDestroy"); +} + +static void NVTX_API HandleInitialize(const void* reserved) +{ + LOG_INFO("%s\n", "nvtxInitialize"); +} + +#define ADD_TO_TABLE(mod, name) *table[NVTX_CBID_##mod##_##name] = (NvtxFunctionPointer)Handle##name; + +extern "C" NVTX_DYNAMIC_EXPORT +int NVTX_API InitializeInjectionNvtx2(NvtxGetExportTableFunc_t getExportTable) +{ + NVTX_EXPORT_UNMANGLED_FUNCTION_NAME + + uint32_t version = 0; + const NvtxExportTableVersionInfo* pVersionInfo = + (const NvtxExportTableVersionInfo*)getExportTable(NVTX_ETID_VERSIONINFO); + if (pVersionInfo) + { + if (pVersionInfo->struct_size < sizeof(*pVersionInfo)) + { + LOG_ERROR( + "(init v2) NvtxExportTableVersionInfo structure size is %d, expected %d!\n", + (int)pVersionInfo->struct_size, + (int)sizeof(*pVersionInfo)); + return 0; + } + + version = pVersionInfo->version; + if (version < 2) + { + LOG_ERROR( + "(init v2) client's NVTX version is %d, expected 2+\n", + (int)version); + return 0; + } + } + + LOG_INFO("---- InitializeInjectionNvtx2 called from client's NVTX v%d\n", version); + + const NvtxExportTableCallbacks* pCallbacks = + (const NvtxExportTableCallbacks*)getExportTable(NVTX_ETID_CALLBACKS); + if (!pCallbacks) + { + LOG_ERROR("(init v2) NVTX_ETID_CALLBACKS is not supported.\n"); + return 0; + } + + if (pCallbacks->struct_size < sizeof(*pCallbacks)) + { + LOG_ERROR("(init v2) NvtxExportTableCallbacks structure size is %d, expected %d!\n", + (int)pCallbacks->struct_size, + (int)sizeof(*pCallbacks)); + return 0; + } + + { + NvtxFunctionTable table = 0; + unsigned int size = 0; + int success = pCallbacks->GetModuleFunctionTable(NVTX_CB_MODULE_CORE, &table, &size); + if (!success || !table) + { + LOG_ERROR("(init v2) NVTX_CB_MODULE_CORE is not supported.\n"); + return 0; + } + + /* Ensure client's table is new enough to support the function pointers we want to register */ + unsigned int highestIdUsed = NVTX_CBID_CORE_RangePop; /* Can auto-detect this in C++ */ + if (size <= highestIdUsed) + { + LOG_ERROR("(init v2) Client's function pointer table size is %d, and we need to assign to table[%d].\n", + (int)size, + (int)highestIdUsed); + return 0; + } + + *table[NVTX_CBID_CORE_MarkA ] = (NvtxFunctionPointer)HandleMarkA ; + *table[NVTX_CBID_CORE_RangePushA] = (NvtxFunctionPointer)HandleRangePushA; + *table[NVTX_CBID_CORE_RangePop ] = (NvtxFunctionPointer)HandleRangePop ; + } + + { + NvtxFunctionTable table = 0; + unsigned int size = 0; + int success = pCallbacks->GetModuleFunctionTable(NVTX_CB_MODULE_CORE2, &table, &size); + if (!success || !table) + { + LOG_ERROR("(init v2) NVTX_CB_MODULE_CORE2 is not supported.\n"); + return 0; + } + + /* Ensure client's table is new enough to support the function pointers we want to register */ + unsigned int highestIdUsed = NVTX_CBID_CORE2_Initialize; /* Can auto-detect this in C++ */ + if (size <= highestIdUsed) + { + LOG_ERROR("(init v2) Client's function pointer table size is %d, and we need to assign to table[%d].\n", + (int)size, + (int)highestIdUsed); + return 0; + } + + *table[NVTX_CBID_CORE2_DomainMarkEx ] = (NvtxFunctionPointer)HandleDomainMarkEx ; + *table[NVTX_CBID_CORE2_DomainRangeStartEx ] = (NvtxFunctionPointer)HandleDomainRangeStartEx ; + *table[NVTX_CBID_CORE2_DomainRangeEnd ] = (NvtxFunctionPointer)HandleDomainRangeEnd ; + *table[NVTX_CBID_CORE2_DomainRangePushEx ] = (NvtxFunctionPointer)HandleDomainRangePushEx ; + *table[NVTX_CBID_CORE2_DomainRangePop ] = (NvtxFunctionPointer)HandleDomainRangePop ; + *table[NVTX_CBID_CORE2_DomainRegisterStringA] = (NvtxFunctionPointer)HandleDomainRegisterStringA; + *table[NVTX_CBID_CORE2_DomainCreateA ] = (NvtxFunctionPointer)HandleDomainCreateA ; + *table[NVTX_CBID_CORE2_DomainDestroy ] = (NvtxFunctionPointer)HandleDomainDestroy ; + *table[NVTX_CBID_CORE2_Initialize ] = (NvtxFunctionPointer)HandleInitialize ; + } + + return 1; +} diff --git a/tests/README.txt b/tests/README.txt new file mode 100644 index 0000000..119c90f --- /dev/null +++ b/tests/README.txt @@ -0,0 +1,37 @@ +This test suite builds an executable called "runtest" and several dynamic libraries. +The dynamic libraries serve as tests, NVTX injections, or both. To invoke "runtest", +specify the desired test library using -t and the desired injection library using -i. +For example: + runtest -t coverage -i inj +...will run the "coverage" test using "inj" for the injection. The tests can be run +without any injection to see what they do without NVTX. Some test libraries are self- +injecting, i.e. they serve as both the test and the injection. Specifying "-i -" for +the injection option instructs runtest to use the test library as the injection also. +Additional arguments are forwarded to the test. For example: + runtest -t calls -i - -v +...will use the "calls" library for both the test and the injection, and will pass +the "-v" argument to the calls test. + +Some tests include compile-time negative tests, guarded by #ifs. By defining macros +like ERROR_TEST_NAME_IS_MISSING, the tests should fail to compile with the expected +error message. In these cases, successful compilation or emitting the wrong error +message should be considered a failure of the test. + +Here are the dynamic libraries, and X in columns to show which usage they support: + + Test? Injection? Description +attributes X - Use NVTX C++ API for setting event attributes +calls X X - Use self-injection to test C/C++ APIs call handlers with all parameters correctly +categories X - Use NVTX C++ API for naming categories +coverage X - Use all features of NVTX C++ API +coveragec X - Use all features of NVTX C API +coverage-counter X - Use all features of NVTX C API Extension for Counters +coverage-cu X - Use all features of NVTX C++ API from a .cu file (i.e. use nvcc instead of host cc) +coverage-mem X - Use all features of NVTX C API Extension for Memory Naming +coverage-memcudart X - Use all features of NVTX C API Extension for Memory Naming (using CUDART types) +coverage-payload X - Use all features of NVTX C API Extension for Payloads +domains X - Use NVTX C++ API for creating domains +inj X - A simple injection that prints messages when NVTX functions are called +linkerdupes X - Compile-time tests to ensure multiple libraries using NVTX don't conflict +regstrings X - Use NVTX C++ API for registering strings +self X X - Use self-injection to demonstrate the self-injection mechanism is working diff --git a/tests/RegisteredStrings.cpp b/tests/RegisteredStrings.cpp new file mode 100644 index 0000000..ff72141 --- /dev/null +++ b/tests/RegisteredStrings.cpp @@ -0,0 +1,100 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Licensed under the Apache License v2.0 with LLVM Exceptions. + * See https://nvidia.github.io/NVTX/LICENSE.txt for license information. + */ + +#if defined(_MSC_VER) && _MSC_VER < 1914 +#define STATIC_ASSERT_TESTING 0 +#else +#define STATIC_ASSERT_TESTING 1 +#endif + +#if defined(STATIC_ASSERT_TESTING) +#include +#define NVTX3_STATIC_ASSERT(c, m) do { if (!(c)) printf("static_assert would fail: %s\n", m); } while (0) +#endif + +#include + +#include + +// Domain description types +struct d { static constexpr const char* name{"Test domain"}; }; + +// Registered string types +struct regstr_char_test { static constexpr const char* message{"Reg str char"}; }; +struct regstr_wchar_test { static constexpr const wchar_t* message{L"Reg str wchar_t"}; }; +struct error_msg_missing { static constexpr const char* x {"Name"}; }; +struct error_msg_is_bad_type { static constexpr const int message{5}; }; +struct regstr_global_domain1 { static constexpr const char* message{"Global1"}; }; +struct regstr_global_domain2 { static constexpr const char* message{"Global2"}; }; +struct regstr_global_domain3 { static constexpr const char* message{"Global3"}; }; + +extern "C" NVTX_DYNAMIC_EXPORT +int RunTest(int argc, const char** argv) +{ + NVTX_EXPORT_UNMANGLED_FUNCTION_NAME + + (void)argc; + (void)argv; + + using namespace nvtx3; + + auto& d1 = domain::get(); + +#if 1 + std::cout << "- Registered string (char):\n"; + auto& r1 = registered_string_in::get(); + mark_in("Mark in regstr_char_test category", registered_string_in::get()); + + std::cout << "- Registered string (wchar_t):\n"; + auto& r2 = registered_string_in::get(); + mark_in("Mark in regstr_wchar_test category", registered_string_in::get()); +#endif + +#if 1 + std::cout << "- Registered string in global domain (alias):\n"; + auto& rd1 = registered_string::get(); + + std::cout << "- Registered string in global domain (implicit):\n"; + auto& rd2 = registered_string_in<>::get(); + + std::cout << "- Registered string in global domain (explicit):\n"; + auto& rd3 = registered_string_in::get(); +#endif + +#if STATIC_ASSERT_TESTING + +#if 1 // defined(ERROR_TEST_MSG_IS_MISSING) + { + std::cout << "- Error test - registered string is missing name member:\n"; + auto& r3 = registered_string_in::get(); + } +#endif + +#if 1 // defined(ERROR_TEST_MSG_IS_BAD_TYPE) + { + std::cout << "- Error test - registered string message member isn't narrow or wide char array:\n"; + auto& r4 = registered_string_in::get(); + } +#endif + +#endif // STATIC_ASSERT_TESTING + + return 0; +} diff --git a/tests/RunTest.cpp b/tests/RunTest.cpp new file mode 100644 index 0000000..b432dd3 --- /dev/null +++ b/tests/RunTest.cpp @@ -0,0 +1,138 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Licensed under the Apache License v2.0 with LLVM Exceptions. + * See https://nvidia.github.io/NVTX/LICENSE.txt for license information. + */ + +#include "PathHelper.h" +#include "DllHelper.h" +#include +#include +#include +#include + +static bool SetEnvVar(const char* name, const char* value) +{ +#if defined(_WIN32) + auto result = _putenv_s(name, value); +#else + auto result = setenv(name, value, 1); +#endif + return result == 0; +} + +static int MainInternal(int argc, const char** argv) +{ + const std::string testArg("-t"); + const std::string injectionArg("-i"); + std::string testName; + std::string injectionName; + + auto oldArgv = argv; + ++argv; + while (*argv) + { + if (*argv == testArg ) { ++argv; if (*argv) testName = *argv; else return 100; } + else if (*argv == injectionArg) { ++argv; if (*argv) injectionName = *argv; else return 101; } + else break; + ++argv; + } + argc -= (int)(argv - oldArgv); + + if (testName.empty()) + { + return 103; + } + + printf("RunTest:\n"); + + std::string test = AbsolutePathToLibraryInCurrentProcessPath(testName); + printf(" - Using test: %s\n", test.c_str()); + + std::string injection; + if (!injectionName.empty()) + { + const char* injectionVar = (sizeof(void*) == 8) + ? "NVTX_INJECTION64_PATH" + : "NVTX_INJECTION32_PATH"; + + // Passing - for the injection means to use the test library as its own injection + injection = (injectionName == "-") + ? test + : AbsolutePathToLibraryInCurrentProcessPath(injectionName); + + bool success = SetEnvVar(injectionVar, injection.c_str()); + if (!success) return 102; + } + + printf(" - Using injection: %s\n", injection.empty() ? "" : injection.c_str()); + + DLL_HANDLE hDll = DLL_OPEN(test.c_str()); + if (!hDll) return 104; + + using pfnRunTest_t = int(*)(int, const char**); + + auto pfnRunTest = (pfnRunTest_t)GET_DLL_FUNC(hDll, "RunTest"); + if (!pfnRunTest) return 105; + + int result = pfnRunTest(argc, argv); // Forward remaining args + if (result) return result; + + return 0; +} + +int main(int argc, const char** argv) +{ + int result = MainInternal(argc, argv); + if (result == 0) + { + printf("RunTest PASSED\n"); + } + else + { + // For error codes known to this test driver, print useful error descriptions. + // Otherwise, rely on test to print information about errors. + switch (result) + { + case 100: + puts("RunTest: -t requires an argument, the base name of the library to use as a test"); + break; + case 101: + puts("RunTest: -i requires an argument, the base name of the library to use as an injection"); + break; + case 102: + puts("RunTest: Failed to set NVTX injection environment variable"); + break; + case 103: + puts("RunTest: Missing required argument: -t "); + break; + case 104: + puts("RunTest: Test library failed to load"); +#ifndef _WIN32 + printf(" dlerror: %s\n", dlerror()); +#endif + break; + case 105: + puts("RunTest: Test library loaded, but does not export required entry point RunTest"); + break; + default: + printf("RunTest FAILED with return code: %d\n", result); + } + } + + return result; +} diff --git a/tests/Same.h b/tests/Same.h new file mode 100644 index 0000000..3024d50 --- /dev/null +++ b/tests/Same.h @@ -0,0 +1,186 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Licensed under the Apache License v2.0 with LLVM Exceptions. + * See https://nvidia.github.io/NVTX/LICENSE.txt for license information. + */ + +#include +#include +#include +#include +#include +#include + +//----------------------------------------------------------------------------------------------- +// Implementations of "Same" function for various types +// Provides better comparison capabilities than operator== +// - Option for shallow or deep comparision (i.e. pointers vs. what they point at) +// - Option for verbose mode, with a custom ostream to write to +// - Option to specify name string for what is being compared +// - Option for indent depth, so nested comparisons can print unwinding mismatch messages +//----------------------------------------------------------------------------------------------- + +// C++11-compatible SFINAE helpers to choose overloads based on whether a type is complete or not +template struct make_void { typedef void type; }; +template using void_t = typename make_void::type; +template using enable_if = typename std::enable_if::type; +template struct is_complete { static constexpr bool value = false; }; +template struct is_complete> { static constexpr bool value = true; }; + +#define SAME_COMMON_ARGS \ + bool deep = false, bool verbose = false, const char* name = "", std::ostream& oss = std::cout, int depth = 0 + +// Test if two objects are the same. When 'deep' is true, ignore pointer values and only +// compare pointed-to contents, otherwise behave as operator==. When 'verbose' is true, +// print information about differences to 'oss'. The generic overload only works if there's +// an operator== and operator<< defined. +template +inline auto Same(T const& lhs, T const& rhs, SAME_COMMON_ARGS) + -> decltype(lhs == rhs, oss << lhs, bool()) +{ + bool objSame = lhs == rhs; + if (verbose && !objSame) + { + oss << std::string(depth, ' ') << "'" << name << "' different: values are " + << lhs << " and " << rhs + // << " (type is " << typeid(lhs).name() << ")" + << '\n'; + } + return objSame; +} + +// Generic pointer overload for complete types +template ::value> = 0> +inline bool Same(T* lhs, T* rhs, SAME_COMMON_ARGS) +{ + if (deep) + { + return Same(*lhs, *rhs, deep, verbose, name, oss, depth); + } + else + { + bool ptrSame = lhs == rhs; + if (verbose && !ptrSame) + { + oss << std::string(depth, ' ') << "'" << name << "' different: pointer values are 0x" + << static_cast(lhs) << " and 0x" << static_cast(rhs) << '\n'; + } + return ptrSame; + } +} + +// Generic pointer overload for incomplete types +template ::value> = 0> +inline bool Same(T* lhs, T* rhs, SAME_COMMON_ARGS) +{ + // Don't know how to deep-copy incomplete types, so always compare pointers + bool ptrSame = lhs == rhs; + if (verbose && !ptrSame) + { + oss << std::string(depth, ' ') << "'" << name << "' different: pointer values (to incomplete type) are 0x" + << static_cast(lhs) << " and 0x" << static_cast(rhs) << '\n'; + } + return ptrSame; +} + +// Overloads for smart pointers -- in all cases, forward to contained raw pointer. +// In deep mode the comparison will be on the pointed-at objects, and in non-deep +// mode the comparison will be on the raw pointer values. +template +inline bool Same(std::shared_ptr const& lhs, std::shared_ptr const& rhs, SAME_COMMON_ARGS) +{ + return Same(lhs.get(), rhs.get(), deep, verbose, name, oss, depth); +} + +template +inline bool Same(std::unique_ptr const& lhs, std::unique_ptr const& rhs, SAME_COMMON_ARGS) +{ + return Same(lhs.get(), rhs.get(), deep, verbose, name, oss, depth); +} + +// Overloads for C-style strings (narrow and wide) +inline bool Same(char const* lhs, char const* rhs, SAME_COMMON_ARGS) +{ + if (deep) + { + bool strSame = strcmp(lhs, rhs) == 0; + if (verbose && !strSame) + { + oss << std::string(depth, ' ') << "'" << name << "' different: char strings are \"" + << lhs << "\" and \"" << rhs << "\"\n"; + } + return strSame; + } + else + { + bool ptrSame = lhs == rhs; + if (verbose && !ptrSame) + { + oss << std::string(depth, ' ') << "'" << name << "' different: pointer values are " + << static_cast(lhs) << " and " << static_cast(rhs) << '\n'; + } + return ptrSame; + } +} + +inline bool Same(wchar_t const* lhs, wchar_t const* rhs, SAME_COMMON_ARGS) +{ + if (deep) + { + bool strSame = wcscmp(lhs, rhs) == 0; + if (verbose && !strSame) + { + oss << std::string(depth, ' ') << "'" << name << "' different: wchar_t strings are L\"" + << "" << "\" and L\"" << "" << "\"\n"; + } + return strSame; + } + else + { + bool ptrSame = lhs == rhs; + if (verbose && !ptrSame) + { + oss << std::string(depth, ' ') << "'" << name << "' different: pointer values are " + << static_cast(lhs) << " and " << static_cast(rhs) << '\n'; + } + return ptrSame; + } +} + +// Helper macros to define Same() overloads (and operators == and !=) for struct and tagged union types + +#define MEMBER_SAME(member) Same(lhs.member, rhs.member, deep, verbose, #member, oss, depth + 1) +#define UNION_MEMBER_SAME(tagField, tagValue, member) (lhs.tagField == tagValue && MEMBER_SAME(member)) + +#define VERBOSE_PRINT() if (verbose && !same) oss << std::string(depth, ' ') << "'" << name << "' members different\n" + +#define EQ_SIG(T) inline bool operator==(T const& lhs, T const& rhs) +#define NE_FROM_EQ(T) inline bool operator!=(T const& lhs, T const& rhs) { return !(lhs == rhs); } + +#define DEFINE_EQ_NE_DEEP(T) EQ_SIG(T) { return Same(lhs, rhs, true ); } NE_FROM_EQ(T) +#define DEFINE_EQ_NE_SHALLOW(T) EQ_SIG(T) { return Same(lhs, rhs, false); } NE_FROM_EQ(T) + +#define DEFINE_MEMBER_SAME_1(a) MEMBER_SAME(a) +#define DEFINE_MEMBER_SAME_2(a, b) MEMBER_SAME(a) && DEFINE_MEMBER_SAME_1(b) +#define DEFINE_MEMBER_SAME_3(a, b, c) MEMBER_SAME(a) && DEFINE_MEMBER_SAME_2(b, c) + +#define SAME_SIG(T) inline bool Same(T const& lhs, T const& rhs, SAME_COMMON_ARGS) + +#define DEFINE_SAME_0(T) SAME_SIG(T) { return true; } DEFINE_EQ_NE_DEEP(T) +#define DEFINE_SAME_1(T, a) SAME_SIG(T) { bool same = DEFINE_MEMBER_SAME_1(a); VERBOSE_PRINT(); return same; } DEFINE_EQ_NE_DEEP(T) +#define DEFINE_SAME_2(T, a, b) SAME_SIG(T) { bool same = DEFINE_MEMBER_SAME_2(a, b); VERBOSE_PRINT(); return same; } DEFINE_EQ_NE_DEEP(T) +#define DEFINE_SAME_3(T, a, b, c) SAME_SIG(T) { bool same = DEFINE_MEMBER_SAME_3(a, b, c); VERBOSE_PRINT(); return same; } DEFINE_EQ_NE_DEEP(T) diff --git a/tests/SelfInjection.cpp b/tests/SelfInjection.cpp new file mode 100644 index 0000000..6b51b02 --- /dev/null +++ b/tests/SelfInjection.cpp @@ -0,0 +1,199 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Licensed under the Apache License v2.0 with LLVM Exceptions. + * See https://nvidia.github.io/NVTX/LICENSE.txt for license information. + */ + +#include "SelfInjection.h" +#include + +#if defined(NVTX_INJECTION_TEST_QUIET) +#define LOG_ERROR(...) +#else +#define LOG_ERROR(...) do { fprintf(stderr, " [inj] ERROR: " __VA_ARGS__); } while (0) +#endif + +Callbacks g_callbacks; + +namespace { + +/* NVTX_CB_MODULE_CORE */ +void NVTX_API HandleMarkEx (const nvtxEventAttributes_t* eventAttrib) { g_callbacks.MarkEx (eventAttrib); } +void NVTX_API HandleMarkA (const char* str ) { g_callbacks.MarkA (str ); } +void NVTX_API HandleMarkW (const wchar_t* str ) { g_callbacks.MarkW (str ); } +nvtxRangeId_t NVTX_API HandleRangeStartEx (const nvtxEventAttributes_t* eventAttrib) { return g_callbacks.RangeStartEx (eventAttrib); } +nvtxRangeId_t NVTX_API HandleRangeStartA (const char* str ) { return g_callbacks.RangeStartA (str ); } +nvtxRangeId_t NVTX_API HandleRangeStartW (const wchar_t* str ) { return g_callbacks.RangeStartW (str ); } +void NVTX_API HandleRangeEnd (nvtxRangeId_t id ) { g_callbacks.RangeEnd (id ); } +int NVTX_API HandleRangePushEx (const nvtxEventAttributes_t* eventAttrib) { return g_callbacks.RangePushEx (eventAttrib); } +int NVTX_API HandleRangePushA (const char* str ) { return g_callbacks.RangePushA (str ); } +int NVTX_API HandleRangePushW (const wchar_t* str ) { return g_callbacks.RangePushW (str ); } +int NVTX_API HandleRangePop ( ) { return g_callbacks.RangePop ( ); } +void NVTX_API HandleNameCategoryA(uint32_t id, const char* str ) { g_callbacks.NameCategoryA(id, str ); } +void NVTX_API HandleNameCategoryW(uint32_t id, const wchar_t* str ) { g_callbacks.NameCategoryW(id, str ); } +void NVTX_API HandleNameOsThreadA(uint32_t id, const char* str ) { g_callbacks.NameOsThreadA(id, str ); } +void NVTX_API HandleNameOsThreadW(uint32_t id, const wchar_t* str ) { g_callbacks.NameOsThreadW(id, str ); } + +/* NVTX_CB_MODULE_CORE2 */ +void NVTX_API HandleDomainMarkEx (nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib) { g_callbacks.DomainMarkEx (domain, eventAttrib); } +nvtxRangeId_t NVTX_API HandleDomainRangeStartEx (nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib) { return g_callbacks.DomainRangeStartEx (domain, eventAttrib); } +void NVTX_API HandleDomainRangeEnd (nvtxDomainHandle_t domain, nvtxRangeId_t id ) { g_callbacks.DomainRangeEnd (domain, id ); } +int NVTX_API HandleDomainRangePushEx (nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib) { return g_callbacks.DomainRangePushEx (domain, eventAttrib); } +int NVTX_API HandleDomainRangePop (nvtxDomainHandle_t domain ) { return g_callbacks.DomainRangePop (domain ); } +nvtxResourceHandle_t NVTX_API HandleDomainResourceCreate (nvtxDomainHandle_t domain, nvtxResourceAttributes_t* attr ) { return g_callbacks.DomainResourceCreate (domain, attr ); } +void NVTX_API HandleDomainResourceDestroy(nvtxResourceHandle_t attr ) { g_callbacks.DomainResourceDestroy(attr ); } +void NVTX_API HandleDomainNameCategoryA (nvtxDomainHandle_t domain, uint32_t id, const char* str ) { g_callbacks.DomainNameCategoryA (domain, id, str ); } +void NVTX_API HandleDomainNameCategoryW (nvtxDomainHandle_t domain, uint32_t id, const wchar_t* str ) { g_callbacks.DomainNameCategoryW (domain, id, str ); } +nvtxStringHandle_t NVTX_API HandleDomainRegisterStringA(nvtxDomainHandle_t domain, const char* str ) { return g_callbacks.DomainRegisterStringA(domain, str ); } +nvtxStringHandle_t NVTX_API HandleDomainRegisterStringW(nvtxDomainHandle_t domain, const wchar_t* str ) { return g_callbacks.DomainRegisterStringW(domain, str ); } +nvtxDomainHandle_t NVTX_API HandleDomainCreateA (const char* name ) { return g_callbacks.DomainCreateA (name ); } +nvtxDomainHandle_t NVTX_API HandleDomainCreateW (const wchar_t* name ) { return g_callbacks.DomainCreateW (name ); } +void NVTX_API HandleDomainDestroy (nvtxDomainHandle_t domain ) { g_callbacks.DomainDestroy (domain ); } +void NVTX_API HandleInitialize (const void* reserved ) { g_callbacks.Initialize (reserved ); } + +} + +extern "C" NVTX_DYNAMIC_EXPORT +int NVTX_API InitializeInjectionNvtx2(NvtxGetExportTableFunc_t getExportTable) +{ + NVTX_EXPORT_UNMANGLED_FUNCTION_NAME + + uint32_t version = 0; + const NvtxExportTableVersionInfo* pVersionInfo = + (const NvtxExportTableVersionInfo*)getExportTable(NVTX_ETID_VERSIONINFO); + if (pVersionInfo) + { + if (pVersionInfo->struct_size < sizeof(*pVersionInfo)) + { + LOG_ERROR( + "(init v2) NvtxExportTableVersionInfo structure size is %d, expected %d!\n", + (int)pVersionInfo->struct_size, + (int)sizeof(*pVersionInfo)); + g_callbacks.Load(0); + return 0; + } + + version = pVersionInfo->version; + if (version < 2) + { + LOG_ERROR( + "(init v2) client's NVTX version is %d, expected 2+\n", + (int)version); + g_callbacks.Load(0); + return 0; + } + } + + const NvtxExportTableCallbacks* pCallbacks = + (const NvtxExportTableCallbacks*)getExportTable(NVTX_ETID_CALLBACKS); + if (!pCallbacks) + { + LOG_ERROR("(init v2) NVTX_ETID_CALLBACKS is not supported.\n"); + g_callbacks.Load(0); + return 0; + } + + if (pCallbacks->struct_size < sizeof(*pCallbacks)) + { + LOG_ERROR("(init v2) NvtxExportTableCallbacks structure size is %d, expected %d!\n", + (int)pCallbacks->struct_size, + (int)sizeof(*pCallbacks)); + g_callbacks.Load(0); + return 0; + } + + { + NvtxFunctionTable table = 0; + unsigned int size = 0; + int success = pCallbacks->GetModuleFunctionTable(NVTX_CB_MODULE_CORE, &table, &size); + if (!success || !table) + { + LOG_ERROR("(init v2) NVTX_CB_MODULE_CORE is not supported.\n"); + g_callbacks.Load(0); + return 0; + } + + /* Ensure client's table is new enough to support the function pointers we want to register */ + unsigned int highestIdUsed = NVTX_CBID_CORE_RangePop; /* Can auto-detect this in C++ */ + if (size <= highestIdUsed) + { + LOG_ERROR("(init v2) Client's function pointer table size is %d, and we need to assign to table[%d].\n", + (int)size, + (int)highestIdUsed); + g_callbacks.Load(0); + return 0; + } + + *table[NVTX_CBID_CORE_MarkEx ] = (NvtxFunctionPointer)HandleMarkEx ; + *table[NVTX_CBID_CORE_MarkA ] = (NvtxFunctionPointer)HandleMarkA ; + *table[NVTX_CBID_CORE_MarkW ] = (NvtxFunctionPointer)HandleMarkW ; + *table[NVTX_CBID_CORE_RangeStartEx ] = (NvtxFunctionPointer)HandleRangeStartEx ; + *table[NVTX_CBID_CORE_RangeStartA ] = (NvtxFunctionPointer)HandleRangeStartA ; + *table[NVTX_CBID_CORE_RangeStartW ] = (NvtxFunctionPointer)HandleRangeStartW ; + *table[NVTX_CBID_CORE_RangeEnd ] = (NvtxFunctionPointer)HandleRangeEnd ; + *table[NVTX_CBID_CORE_RangePushEx ] = (NvtxFunctionPointer)HandleRangePushEx ; + *table[NVTX_CBID_CORE_RangePushA ] = (NvtxFunctionPointer)HandleRangePushA ; + *table[NVTX_CBID_CORE_RangePushW ] = (NvtxFunctionPointer)HandleRangePushW ; + *table[NVTX_CBID_CORE_RangePop ] = (NvtxFunctionPointer)HandleRangePop ; + *table[NVTX_CBID_CORE_NameCategoryA] = (NvtxFunctionPointer)HandleNameCategoryA; + *table[NVTX_CBID_CORE_NameCategoryW] = (NvtxFunctionPointer)HandleNameCategoryW; + *table[NVTX_CBID_CORE_NameOsThreadA] = (NvtxFunctionPointer)HandleNameOsThreadA; + *table[NVTX_CBID_CORE_NameOsThreadW] = (NvtxFunctionPointer)HandleNameOsThreadW; + } + + { + NvtxFunctionTable table = 0; + unsigned int size = 0; + int success = pCallbacks->GetModuleFunctionTable(NVTX_CB_MODULE_CORE2, &table, &size); + if (!success || !table) + { + LOG_ERROR("(init v2) NVTX_CB_MODULE_CORE2 is not supported.\n"); + g_callbacks.Load(0); + return 0; + } + + /* Ensure client's table is new enough to support the function pointers we want to register */ + unsigned int highestIdUsed = NVTX_CBID_CORE2_Initialize; /* Can auto-detect this in C++ */ + if (size <= highestIdUsed) + { + LOG_ERROR("(init v2) Client's function pointer table size is %d, and we need to assign to table[%d].\n", + (int)size, + (int)highestIdUsed); + g_callbacks.Load(0); + return 0; + } + + *table[NVTX_CBID_CORE2_DomainMarkEx ] = (NvtxFunctionPointer)HandleDomainMarkEx ; + *table[NVTX_CBID_CORE2_DomainRangeStartEx ] = (NvtxFunctionPointer)HandleDomainRangeStartEx ; + *table[NVTX_CBID_CORE2_DomainRangeEnd ] = (NvtxFunctionPointer)HandleDomainRangeEnd ; + *table[NVTX_CBID_CORE2_DomainRangePushEx ] = (NvtxFunctionPointer)HandleDomainRangePushEx ; + *table[NVTX_CBID_CORE2_DomainRangePop ] = (NvtxFunctionPointer)HandleDomainRangePop ; + *table[NVTX_CBID_CORE2_DomainResourceCreate ] = (NvtxFunctionPointer)HandleDomainResourceCreate ; + *table[NVTX_CBID_CORE2_DomainResourceDestroy] = (NvtxFunctionPointer)HandleDomainResourceDestroy; + *table[NVTX_CBID_CORE2_DomainNameCategoryA ] = (NvtxFunctionPointer)HandleDomainNameCategoryA ; + *table[NVTX_CBID_CORE2_DomainNameCategoryW ] = (NvtxFunctionPointer)HandleDomainNameCategoryW ; + *table[NVTX_CBID_CORE2_DomainRegisterStringA] = (NvtxFunctionPointer)HandleDomainRegisterStringA; + *table[NVTX_CBID_CORE2_DomainRegisterStringW] = (NvtxFunctionPointer)HandleDomainRegisterStringW; + *table[NVTX_CBID_CORE2_DomainCreateA ] = (NvtxFunctionPointer)HandleDomainCreateA ; + *table[NVTX_CBID_CORE2_DomainCreateW ] = (NvtxFunctionPointer)HandleDomainCreateW ; + *table[NVTX_CBID_CORE2_DomainDestroy ] = (NvtxFunctionPointer)HandleDomainDestroy ; + *table[NVTX_CBID_CORE2_Initialize ] = (NvtxFunctionPointer)HandleInitialize ; + } + + g_callbacks.Load(1); + return 1; +} diff --git a/tests/SelfInjection.h b/tests/SelfInjection.h new file mode 100644 index 0000000..7c5625d --- /dev/null +++ b/tests/SelfInjection.h @@ -0,0 +1,688 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Licensed under the Apache License v2.0 with LLVM Exceptions. + * See https://nvidia.github.io/NVTX/LICENSE.txt for license information. + */ + +#define NVTX_NO_IMPL +#include "nvtx3/nvToolsExt.h" + +#include "Same.h" +#include "PrettyPrintersNvtxC.h" + +#include +#include +#include +#include +#include +#include + +constexpr auto NVTX_TOOL_ATTACHED_UNUSED_RANGE_ID = static_cast(-1LL); +constexpr int NVTX_TOOL_ATTACHED_UNUSED_PUSH_POP_ID = -1; +const auto NVTX_TOOL_ATTACHED_UNUSED_DOMAIN_HANDLE = reinterpret_cast(-1LL); +const auto NVTX_TOOL_ATTACHED_UNUSED_STRING_HANDLE = reinterpret_cast(-1LL); +const auto NVTX_TOOL_ATTACHED_UNUSED_RESOURCE_HANDLE = reinterpret_cast(-1LL); + +struct ArgsLoad { int success; }; + +struct ArgsMarkEx { const nvtxEventAttributes_t* eventAttrib; }; +struct ArgsMarkA { const char* str ; }; +struct ArgsMarkW { const wchar_t* str ; }; +struct ArgsRangeStartEx { const nvtxEventAttributes_t* eventAttrib; }; +struct ArgsRangeStartA { const char* str ; }; +struct ArgsRangeStartW { const wchar_t* str ; }; +struct ArgsRangeEnd { nvtxRangeId_t id ; }; +struct ArgsRangePushEx { const nvtxEventAttributes_t* eventAttrib; }; +struct ArgsRangePushA { const char* str ; }; +struct ArgsRangePushW { const wchar_t* str ; }; +struct ArgsRangePop { ; }; +struct ArgsNameCategoryA { uint32_t id; const char* str ; }; +struct ArgsNameCategoryW { uint32_t id; const wchar_t* str ; }; +struct ArgsNameOsThreadA { uint32_t id; const char* str ; }; +struct ArgsNameOsThreadW { uint32_t id; const wchar_t* str ; }; + +struct ArgsDomainMarkEx { nvtxDomainHandle_t domain; const nvtxEventAttributes_t* eventAttrib; }; +struct ArgsDomainRangeStartEx { nvtxDomainHandle_t domain; const nvtxEventAttributes_t* eventAttrib; }; +struct ArgsDomainRangeEnd { nvtxDomainHandle_t domain; nvtxRangeId_t id ; }; +struct ArgsDomainRangePushEx { nvtxDomainHandle_t domain; const nvtxEventAttributes_t* eventAttrib; }; +struct ArgsDomainRangePop { nvtxDomainHandle_t domain ; }; +struct ArgsDomainResourceCreate { nvtxDomainHandle_t domain; nvtxResourceAttributes_t* attr ; }; +struct ArgsDomainResourceDestroy { nvtxResourceHandle_t attr ; }; +struct ArgsDomainNameCategoryA { nvtxDomainHandle_t domain; uint32_t id; const char* str ; }; +struct ArgsDomainNameCategoryW { nvtxDomainHandle_t domain; uint32_t id; const wchar_t* str ; }; +struct ArgsDomainRegisterStringA { nvtxDomainHandle_t domain; const char* str ; }; +struct ArgsDomainRegisterStringW { nvtxDomainHandle_t domain; const wchar_t* str ; }; +struct ArgsDomainCreateA { const char* name ; }; +struct ArgsDomainCreateW { const wchar_t* name ; }; +struct ArgsDomainDestroy { nvtxDomainHandle_t domain ; }; +struct ArgsInitialize { const void* reserved ; }; + +struct CallId +{ + NvtxCallbackModule mod; + int32_t cb; +}; +DEFINE_SAME_2(CallId, mod, cb) + +// Helper to write CALLID(CORE, MarkEx) as shorthand for CallId{NVTX_CB_MODULE_CORE, NVTX_CBID_CORE_MarkEx} +#define CALLID(m,c) CallId{NVTX_CB_MODULE_##m, (int32_t)NVTX_CBID_##m##_##c} + +#define CALLID_LOAD() CallId{NVTX_CB_MODULE_INVALID, (int32_t)0x7ac0be11} + +inline const char* CallName(CallId const& id) +{ + if (id == CALLID_LOAD()) return "InitializeInjectionNvtx2"; + switch (id.mod) + { + case NVTX_CB_MODULE_CORE: + switch (id.cb) + { + case NVTX_CBID_CORE_MarkEx : return "MarkEx"; + case NVTX_CBID_CORE_MarkA : return "MarkA"; + case NVTX_CBID_CORE_MarkW : return "MarkW"; + case NVTX_CBID_CORE_RangeStartEx : return "RangeStartEx"; + case NVTX_CBID_CORE_RangeStartA : return "RangeStartA"; + case NVTX_CBID_CORE_RangeStartW : return "RangeStartW"; + case NVTX_CBID_CORE_RangeEnd : return "RangeEnd"; + case NVTX_CBID_CORE_RangePushEx : return "RangePushEx"; + case NVTX_CBID_CORE_RangePushA : return "RangePushA"; + case NVTX_CBID_CORE_RangePushW : return "RangePushW"; + case NVTX_CBID_CORE_RangePop : return "RangePop"; + case NVTX_CBID_CORE_NameCategoryA: return "NameCategoryA"; + case NVTX_CBID_CORE_NameCategoryW: return "NameCategoryW"; + case NVTX_CBID_CORE_NameOsThreadA: return "NameOsThreadA"; + case NVTX_CBID_CORE_NameOsThreadW: return "NameOsThreadW"; + default: return ""; + } + case NVTX_CB_MODULE_CORE2: + switch (id.cb) + { + case NVTX_CBID_CORE2_DomainMarkEx : return "DomainMarkEx"; + case NVTX_CBID_CORE2_DomainRangeStartEx : return "DomainRangeStartEx"; + case NVTX_CBID_CORE2_DomainRangeEnd : return "DomainRangeEnd"; + case NVTX_CBID_CORE2_DomainRangePushEx : return "DomainRangePushEx"; + case NVTX_CBID_CORE2_DomainRangePop : return "DomainRangePop"; + case NVTX_CBID_CORE2_DomainResourceCreate : return "DomainResourceCreate"; + case NVTX_CBID_CORE2_DomainResourceDestroy: return "DomainResourceDestroy"; + case NVTX_CBID_CORE2_DomainNameCategoryA : return "DomainNameCategoryA"; + case NVTX_CBID_CORE2_DomainNameCategoryW : return "DomainNameCategoryW"; + case NVTX_CBID_CORE2_DomainRegisterStringA: return "DomainRegisterStringA"; + case NVTX_CBID_CORE2_DomainRegisterStringW: return "DomainRegisterStringW"; + case NVTX_CBID_CORE2_DomainCreateA : return "DomainCreateA"; + case NVTX_CBID_CORE2_DomainCreateW : return "DomainCreateW"; + case NVTX_CBID_CORE2_DomainDestroy : return "DomainDestroy"; + case NVTX_CBID_CORE2_Initialize : return "Initialize"; + default: return ""; + } + default: return ""; + } +} + +inline std::ostream& operator<<(std::ostream& os, CallId const& id) +{ + return os << CallName(id); +}; + +union Args +{ + ArgsLoad Load; + + ArgsMarkEx MarkEx ; + ArgsMarkA MarkA ; + ArgsMarkW MarkW ; + ArgsRangeStartEx RangeStartEx ; + ArgsRangeStartA RangeStartA ; + ArgsRangeStartW RangeStartW ; + ArgsRangeEnd RangeEnd ; + ArgsRangePushEx RangePushEx ; + ArgsRangePushA RangePushA ; + ArgsRangePushW RangePushW ; + ArgsRangePop RangePop ; + ArgsNameCategoryA NameCategoryA; + ArgsNameCategoryW NameCategoryW; + ArgsNameOsThreadA NameOsThreadA; + ArgsNameOsThreadW NameOsThreadW; + + ArgsDomainMarkEx DomainMarkEx ; + ArgsDomainRangeStartEx DomainRangeStartEx ; + ArgsDomainRangeEnd DomainRangeEnd ; + ArgsDomainRangePushEx DomainRangePushEx ; + ArgsDomainRangePop DomainRangePop ; + ArgsDomainResourceCreate DomainResourceCreate ; + ArgsDomainResourceDestroy DomainResourceDestroy; + ArgsDomainNameCategoryA DomainNameCategoryA ; + ArgsDomainNameCategoryW DomainNameCategoryW ; + ArgsDomainRegisterStringA DomainRegisterStringA; + ArgsDomainRegisterStringW DomainRegisterStringW; + ArgsDomainCreateA DomainCreateA ; + ArgsDomainCreateW DomainCreateW ; + ArgsDomainDestroy DomainDestroy ; + ArgsInitialize Initialize ; +}; + +// Free functions to emulate copy constructors and destructors for the NVTX C API types using pointers +inline void CopyCstring(const char*& lhs, const char* rhs) +{ + size_t len = strlen(rhs) + 1; + auto* tmp = new char[len]; + std::copy(rhs, rhs + len, tmp); + lhs = tmp; +} +inline void CopyCstring(const char*& s) { CopyCstring(s, s); } +inline void DestroyCstring(const char* s) { delete[] s; } + +inline void CopyCstring(const wchar_t*& lhs, const wchar_t* rhs) +{ + size_t len = wcslen(rhs) + 1; + auto* tmp = new wchar_t[len]; + std::copy(rhs, rhs + len, tmp); + lhs = tmp; +} +inline void CopyCstring(const wchar_t*& s) { CopyCstring(s, s); } +inline void DestroyCstring(const wchar_t* s) { delete[] s; } + +inline void CopyEventAttributes(const nvtxEventAttributes_t*& lhs, const nvtxEventAttributes_t* rhs) +{ + auto* tmp = new nvtxEventAttributes_t; + memcpy(tmp, rhs, sizeof(*tmp)); + switch (tmp->messageType) + { + case NVTX_MESSAGE_TYPE_ASCII: CopyCstring(tmp->message.ascii); break; + case NVTX_MESSAGE_TYPE_UNICODE: CopyCstring(tmp->message.unicode); break; + } + lhs = tmp; +} +inline void CopyEventAttributes(const nvtxEventAttributes_t*& a) { CopyEventAttributes(a, a); } +inline void DestroyEventAttributes(const nvtxEventAttributes_t* a) +{ + switch (a->messageType) + { + case NVTX_MESSAGE_TYPE_ASCII: DestroyCstring(a->message.ascii); break; + case NVTX_MESSAGE_TYPE_UNICODE: DestroyCstring(a->message.unicode); break; + } + delete a; +} + +inline void CopyResourceAttributes(nvtxResourceAttributes_t*& lhs, const nvtxResourceAttributes_t* rhs) +{ + auto* tmp = new nvtxResourceAttributes_t; + memcpy(tmp, rhs, sizeof(*tmp)); + switch (tmp->messageType) + { + case NVTX_MESSAGE_TYPE_ASCII: CopyCstring(tmp->message.ascii); break; + case NVTX_MESSAGE_TYPE_UNICODE: CopyCstring(tmp->message.unicode); break; + } + lhs = tmp; +} +inline void CopyResourceAttributes(nvtxResourceAttributes_t*& a) { CopyResourceAttributes(a, a); } +inline void DestroyResourceAttributes(nvtxResourceAttributes_t* a) +{ + switch (a->messageType) + { + case NVTX_MESSAGE_TYPE_ASCII: DestroyCstring(a->message.ascii); break; + case NVTX_MESSAGE_TYPE_UNICODE: DestroyCstring(a->message.unicode); break; + } + delete a; +} + +template inline void DeepCopyAssign(ArgsT& lhs, ArgsT const& rhs) { lhs = rhs; } + +template <> inline void DeepCopyAssign(ArgsMarkEx & lhs, ArgsMarkEx const& rhs) { lhs = rhs; CopyEventAttributes(lhs.eventAttrib); } +template <> inline void DeepCopyAssign(ArgsMarkA & lhs, ArgsMarkA const& rhs) { lhs = rhs; CopyCstring(lhs.str); } +template <> inline void DeepCopyAssign(ArgsMarkW & lhs, ArgsMarkW const& rhs) { lhs = rhs; CopyCstring(lhs.str); } +template <> inline void DeepCopyAssign(ArgsRangeStartEx & lhs, ArgsRangeStartEx const& rhs) { lhs = rhs; CopyEventAttributes(lhs.eventAttrib); } +template <> inline void DeepCopyAssign(ArgsRangeStartA & lhs, ArgsRangeStartA const& rhs) { lhs = rhs; CopyCstring(lhs.str); } +template <> inline void DeepCopyAssign(ArgsRangeStartW & lhs, ArgsRangeStartW const& rhs) { lhs = rhs; CopyCstring(lhs.str); } +template <> inline void DeepCopyAssign(ArgsRangeEnd & lhs, ArgsRangeEnd const& rhs) { lhs = rhs; } +template <> inline void DeepCopyAssign(ArgsRangePushEx & lhs, ArgsRangePushEx const& rhs) { lhs = rhs; CopyEventAttributes(lhs.eventAttrib); } +template <> inline void DeepCopyAssign(ArgsRangePushA & lhs, ArgsRangePushA const& rhs) { lhs = rhs; CopyCstring(lhs.str); } +template <> inline void DeepCopyAssign(ArgsRangePushW & lhs, ArgsRangePushW const& rhs) { lhs = rhs; CopyCstring(lhs.str); } +template <> inline void DeepCopyAssign(ArgsRangePop & lhs, ArgsRangePop const& rhs) { lhs = rhs; } +template <> inline void DeepCopyAssign(ArgsNameCategoryA& lhs, ArgsNameCategoryA const& rhs) { lhs = rhs; CopyCstring(lhs.str); } +template <> inline void DeepCopyAssign(ArgsNameCategoryW& lhs, ArgsNameCategoryW const& rhs) { lhs = rhs; CopyCstring(lhs.str); } +template <> inline void DeepCopyAssign(ArgsNameOsThreadA& lhs, ArgsNameOsThreadA const& rhs) { lhs = rhs; CopyCstring(lhs.str); } +template <> inline void DeepCopyAssign(ArgsNameOsThreadW& lhs, ArgsNameOsThreadW const& rhs) { lhs = rhs; CopyCstring(lhs.str); } + +template <> inline void DeepCopyAssign(ArgsDomainMarkEx & lhs, ArgsDomainMarkEx const& rhs) { lhs = rhs; CopyEventAttributes(lhs.eventAttrib); } +template <> inline void DeepCopyAssign(ArgsDomainRangeStartEx & lhs, ArgsDomainRangeStartEx const& rhs) { lhs = rhs; CopyEventAttributes(lhs.eventAttrib); } +template <> inline void DeepCopyAssign(ArgsDomainRangeEnd & lhs, ArgsDomainRangeEnd const& rhs) { lhs = rhs; } +template <> inline void DeepCopyAssign(ArgsDomainRangePushEx & lhs, ArgsDomainRangePushEx const& rhs) { lhs = rhs; CopyEventAttributes(lhs.eventAttrib); } +template <> inline void DeepCopyAssign(ArgsDomainRangePop & lhs, ArgsDomainRangePop const& rhs) { lhs = rhs; } +template <> inline void DeepCopyAssign(ArgsDomainResourceCreate & lhs, ArgsDomainResourceCreate const& rhs) { lhs = rhs; CopyResourceAttributes(lhs.attr); } +template <> inline void DeepCopyAssign(ArgsDomainResourceDestroy& lhs, ArgsDomainResourceDestroy const& rhs) { lhs = rhs; } +template <> inline void DeepCopyAssign(ArgsDomainNameCategoryA & lhs, ArgsDomainNameCategoryA const& rhs) { lhs = rhs; CopyCstring(lhs.str); } +template <> inline void DeepCopyAssign(ArgsDomainNameCategoryW & lhs, ArgsDomainNameCategoryW const& rhs) { lhs = rhs; CopyCstring(lhs.str); } +template <> inline void DeepCopyAssign(ArgsDomainRegisterStringA& lhs, ArgsDomainRegisterStringA const& rhs) { lhs = rhs; CopyCstring(lhs.str); } +template <> inline void DeepCopyAssign(ArgsDomainRegisterStringW& lhs, ArgsDomainRegisterStringW const& rhs) { lhs = rhs; CopyCstring(lhs.str); } +template <> inline void DeepCopyAssign(ArgsDomainCreateA & lhs, ArgsDomainCreateA const& rhs) { lhs = rhs; CopyCstring(lhs.name); } +template <> inline void DeepCopyAssign(ArgsDomainCreateW & lhs, ArgsDomainCreateW const& rhs) { lhs = rhs; CopyCstring(lhs.name); } +template <> inline void DeepCopyAssign(ArgsDomainDestroy & lhs, ArgsDomainDestroy const& rhs) { lhs = rhs; } +template <> inline void DeepCopyAssign(ArgsInitialize & lhs, ArgsInitialize const& rhs) { lhs = rhs; } + +template inline void DeepCopyDestroy(ArgsT&) {} + +template <> inline void DeepCopyDestroy(ArgsMarkEx & args) { DestroyEventAttributes(args.eventAttrib); } +template <> inline void DeepCopyDestroy(ArgsMarkA & args) { DestroyCstring(args.str); } +template <> inline void DeepCopyDestroy(ArgsMarkW & args) { DestroyCstring(args.str); } +template <> inline void DeepCopyDestroy(ArgsRangeStartEx & args) { DestroyEventAttributes(args.eventAttrib); } +template <> inline void DeepCopyDestroy(ArgsRangeStartA & args) { DestroyCstring(args.str); } +template <> inline void DeepCopyDestroy(ArgsRangeStartW & args) { DestroyCstring(args.str); } +template <> inline void DeepCopyDestroy(ArgsRangeEnd & args) { } +template <> inline void DeepCopyDestroy(ArgsRangePushEx & args) { DestroyEventAttributes(args.eventAttrib); } +template <> inline void DeepCopyDestroy(ArgsRangePushA & args) { DestroyCstring(args.str); } +template <> inline void DeepCopyDestroy(ArgsRangePushW & args) { DestroyCstring(args.str); } +template <> inline void DeepCopyDestroy(ArgsRangePop & args) { } +template <> inline void DeepCopyDestroy(ArgsNameCategoryA& args) { DestroyCstring(args.str); } +template <> inline void DeepCopyDestroy(ArgsNameCategoryW& args) { DestroyCstring(args.str); } +template <> inline void DeepCopyDestroy(ArgsNameOsThreadA& args) { DestroyCstring(args.str); } +template <> inline void DeepCopyDestroy(ArgsNameOsThreadW& args) { DestroyCstring(args.str); } + +template <> inline void DeepCopyDestroy(ArgsDomainMarkEx & args) { DestroyEventAttributes(args.eventAttrib); } +template <> inline void DeepCopyDestroy(ArgsDomainRangeStartEx & args) { DestroyEventAttributes(args.eventAttrib); } +template <> inline void DeepCopyDestroy(ArgsDomainRangeEnd & args) { } +template <> inline void DeepCopyDestroy(ArgsDomainRangePushEx & args) { DestroyEventAttributes(args.eventAttrib); } +template <> inline void DeepCopyDestroy(ArgsDomainRangePop & args) { } +template <> inline void DeepCopyDestroy(ArgsDomainResourceCreate & args) { DestroyResourceAttributes(args.attr); } +template <> inline void DeepCopyDestroy(ArgsDomainResourceDestroy& args) { } +template <> inline void DeepCopyDestroy(ArgsDomainNameCategoryA & args) { DestroyCstring(args.str); } +template <> inline void DeepCopyDestroy(ArgsDomainNameCategoryW & args) { DestroyCstring(args.str); } +template <> inline void DeepCopyDestroy(ArgsDomainRegisterStringA& args) { DestroyCstring(args.str); } +template <> inline void DeepCopyDestroy(ArgsDomainRegisterStringW& args) { DestroyCstring(args.str); } +template <> inline void DeepCopyDestroy(ArgsDomainCreateA & args) { DestroyCstring(args.name); } +template <> inline void DeepCopyDestroy(ArgsDomainCreateW & args) { DestroyCstring(args.name); } +template <> inline void DeepCopyDestroy(ArgsDomainDestroy & args) { } +template <> inline void DeepCopyDestroy(ArgsInitialize & args) { } + +struct CallData +{ + CallId id{NVTX_CB_MODULE_INVALID, 0}; + Args args; + + ~CallData() + { + switch (id.mod) + { + case NVTX_CB_MODULE_CORE: + switch (id.cb) + { + case NVTX_CBID_CORE_MarkEx : DeepCopyDestroy(args.MarkEx ); break; + case NVTX_CBID_CORE_MarkA : DeepCopyDestroy(args.MarkA ); break; + case NVTX_CBID_CORE_MarkW : DeepCopyDestroy(args.MarkW ); break; + case NVTX_CBID_CORE_RangeStartEx : DeepCopyDestroy(args.RangeStartEx ); break; + case NVTX_CBID_CORE_RangeStartA : DeepCopyDestroy(args.RangeStartA ); break; + case NVTX_CBID_CORE_RangeStartW : DeepCopyDestroy(args.RangeStartW ); break; + case NVTX_CBID_CORE_RangeEnd : DeepCopyDestroy(args.RangeEnd ); break; + case NVTX_CBID_CORE_RangePushEx : DeepCopyDestroy(args.RangePushEx ); break; + case NVTX_CBID_CORE_RangePushA : DeepCopyDestroy(args.RangePushA ); break; + case NVTX_CBID_CORE_RangePushW : DeepCopyDestroy(args.RangePushW ); break; + case NVTX_CBID_CORE_RangePop : DeepCopyDestroy(args.RangePop ); break; + case NVTX_CBID_CORE_NameCategoryA: DeepCopyDestroy(args.NameCategoryA); break; + case NVTX_CBID_CORE_NameCategoryW: DeepCopyDestroy(args.NameCategoryW); break; + case NVTX_CBID_CORE_NameOsThreadA: DeepCopyDestroy(args.NameOsThreadA); break; + case NVTX_CBID_CORE_NameOsThreadW: DeepCopyDestroy(args.NameOsThreadW); break; + default: break; + } + break; + case NVTX_CB_MODULE_CORE2: + switch (id.cb) + { + case NVTX_CBID_CORE2_DomainMarkEx : DeepCopyDestroy(args.DomainMarkEx ); break; + case NVTX_CBID_CORE2_DomainRangeStartEx : DeepCopyDestroy(args.DomainRangeStartEx ); break; + case NVTX_CBID_CORE2_DomainRangeEnd : DeepCopyDestroy(args.DomainRangeEnd ); break; + case NVTX_CBID_CORE2_DomainRangePushEx : DeepCopyDestroy(args.DomainRangePushEx ); break; + case NVTX_CBID_CORE2_DomainRangePop : DeepCopyDestroy(args.DomainRangePop ); break; + case NVTX_CBID_CORE2_DomainResourceCreate : DeepCopyDestroy(args.DomainResourceCreate ); break; + case NVTX_CBID_CORE2_DomainResourceDestroy: DeepCopyDestroy(args.DomainResourceDestroy); break; + case NVTX_CBID_CORE2_DomainNameCategoryA : DeepCopyDestroy(args.DomainNameCategoryA ); break; + case NVTX_CBID_CORE2_DomainNameCategoryW : DeepCopyDestroy(args.DomainNameCategoryW ); break; + case NVTX_CBID_CORE2_DomainRegisterStringA: DeepCopyDestroy(args.DomainRegisterStringA); break; + case NVTX_CBID_CORE2_DomainRegisterStringW: DeepCopyDestroy(args.DomainRegisterStringW); break; + case NVTX_CBID_CORE2_DomainCreateA : DeepCopyDestroy(args.DomainCreateA ); break; + case NVTX_CBID_CORE2_DomainCreateW : DeepCopyDestroy(args.DomainCreateW ); break; + case NVTX_CBID_CORE2_DomainDestroy : DeepCopyDestroy(args.DomainDestroy ); break; + case NVTX_CBID_CORE2_Initialize : DeepCopyDestroy(args.Initialize ); break; + default: break; + } + break; + default: break; + } + } +}; + +inline std::ostream& operator<<(std::ostream& os, CallData const& data) +{ + if (data.id == CALLID_LOAD()) + { + return os << CallName(data.id) << " returned " << data.args.Load.success; + } + + os << "[" << data.id.mod << "," << std::setw(2) << data.id.cb << "] "; + os << CallName(data.id) << '('; + switch (data.id.mod) + { + case NVTX_CB_MODULE_CORE: + switch (data.id.cb) + { + case NVTX_CBID_CORE_MarkEx : {auto& a = data.args.MarkEx ; os << *a.eventAttrib; } break; + case NVTX_CBID_CORE_MarkA : {auto& a = data.args.MarkA ; os << '"' << a.str << '"'; } break; + case NVTX_CBID_CORE_MarkW : {auto& a = data.args.MarkW ; os << "WIDE"; } break; + case NVTX_CBID_CORE_RangeStartEx : {auto& a = data.args.RangeStartEx ; os << *a.eventAttrib; } break; + case NVTX_CBID_CORE_RangeStartA : {auto& a = data.args.RangeStartA ; os << '"' << a.str << '"'; } break; + case NVTX_CBID_CORE_RangeStartW : {auto& a = data.args.RangeStartW ; os << "WIDE"; } break; + case NVTX_CBID_CORE_RangeEnd : {auto& a = data.args.RangeEnd ; os << a.id; } break; + case NVTX_CBID_CORE_RangePushEx : {auto& a = data.args.RangePushEx ; os << *a.eventAttrib; } break; + case NVTX_CBID_CORE_RangePushA : {auto& a = data.args.RangePushA ; os << '"' << a.str << '"'; } break; + case NVTX_CBID_CORE_RangePushW : {auto& a = data.args.RangePushW ; os << "WIDE"; } break; + case NVTX_CBID_CORE_RangePop : {auto& a = data.args.RangePop ; } break; + case NVTX_CBID_CORE_NameCategoryA: {auto& a = data.args.NameCategoryA; os << a.id << ", \"" << a.str << '"'; } break; + case NVTX_CBID_CORE_NameCategoryW: {auto& a = data.args.NameCategoryW; os << a.id << ", " << "WIDE"; } break; + case NVTX_CBID_CORE_NameOsThreadA: {auto& a = data.args.NameOsThreadA; os << a.id << ", \"" << a.str << '"'; } break; + case NVTX_CBID_CORE_NameOsThreadW: {auto& a = data.args.NameOsThreadW; os << a.id << ", " << "WIDE"; } break; + default: break; + } + break; + case NVTX_CB_MODULE_CORE2: + switch (data.id.cb) + { + case NVTX_CBID_CORE2_DomainMarkEx : {auto& a = data.args.DomainMarkEx ; os << a.domain << ", " << *a.eventAttrib; } break; + case NVTX_CBID_CORE2_DomainRangeStartEx : {auto& a = data.args.DomainRangeStartEx ; os << a.domain << ", " << *a.eventAttrib; } break; + case NVTX_CBID_CORE2_DomainRangeEnd : {auto& a = data.args.DomainRangeEnd ; os << a.domain << ", " << a.id; } break; + case NVTX_CBID_CORE2_DomainRangePushEx : {auto& a = data.args.DomainRangePushEx ; os << a.domain << ", " << *a.eventAttrib; } break; + case NVTX_CBID_CORE2_DomainRangePop : {auto& a = data.args.DomainRangePop ; os << a.domain; } break; + case NVTX_CBID_CORE2_DomainResourceCreate : {auto& a = data.args.DomainResourceCreate ; os << a.domain << ", " << a.attr; } break; // TODO + case NVTX_CBID_CORE2_DomainResourceDestroy: {auto& a = data.args.DomainResourceDestroy; os << a.attr; } break; + case NVTX_CBID_CORE2_DomainNameCategoryA : {auto& a = data.args.DomainNameCategoryA ; os << a.domain << ", " << a.id << ", \"" << a.str << '"';} break; + case NVTX_CBID_CORE2_DomainNameCategoryW : {auto& a = data.args.DomainNameCategoryW ; os << a.domain << ", " << a.id << ", " << "WIDE"; } break; + case NVTX_CBID_CORE2_DomainRegisterStringA: {auto& a = data.args.DomainRegisterStringA; os << a.domain << ", \"" << a.str << '"'; } break; + case NVTX_CBID_CORE2_DomainRegisterStringW: {auto& a = data.args.DomainRegisterStringW; os << a.domain << ", " << "WIDE"; } break; + case NVTX_CBID_CORE2_DomainCreateA : {auto& a = data.args.DomainCreateA ; os << '"' << a.name << '"'; } break; + case NVTX_CBID_CORE2_DomainCreateW : {auto& a = data.args.DomainCreateW ; os << "WIDE"; } break; + case NVTX_CBID_CORE2_DomainDestroy : {auto& a = data.args.DomainDestroy ; os << a.domain; } break; + case NVTX_CBID_CORE2_Initialize : {auto& a = data.args.Initialize ; os << a.reserved; } break; + default: break; + } + break; + default: break; + } + os << ')'; + return os; +}; + +using Call = std::shared_ptr; + +// Helper to write CALL(CORE, NameCategoryA, id, str) to construct a Call with arg values +#define CALL(m,c,...) [=]{ Call v(new CallData); v->id = CALLID(m,c); DeepCopyAssign(v->args.c, Args##c{__VA_ARGS__}); return v; }() + +#define CALL_LOAD(s) [=]{ Call v(new CallData); v->id = CALLID_LOAD(); v->args.Load = ArgsLoad{s}; return v; }() + +// Helpers to construct unions from NVTX C API types +inline nvtxMessageValue_t MakeMessage(const char* msg) { nvtxMessageValue_t v; v.ascii = msg; return v; } +inline nvtxMessageValue_t MakeMessage(const wchar_t* msg) { nvtxMessageValue_t v; v.unicode = msg; return v; } +inline nvtxMessageValue_t MakeMessage(nvtxStringHandle_t msg) { nvtxMessageValue_t v; v.registered = msg; return v; } + +inline nvtxEventAttributes_t::payload_t MakePayload(uint64_t v) { nvtxEventAttributes_t::payload_t p; p.ullValue = v; return p; } +inline nvtxEventAttributes_t::payload_t MakePayload(int64_t v) { nvtxEventAttributes_t::payload_t p; p.llValue = v; return p; } +inline nvtxEventAttributes_t::payload_t MakePayload(double v) { nvtxEventAttributes_t::payload_t p; p.dValue = v; return p; } +inline nvtxEventAttributes_t::payload_t MakePayload(uint32_t v) { nvtxEventAttributes_t::payload_t p; p.uiValue = v; return p; } +inline nvtxEventAttributes_t::payload_t MakePayload(int32_t v) { nvtxEventAttributes_t::payload_t p; p.iValue = v; return p; } +inline nvtxEventAttributes_t::payload_t MakePayload(float v) { nvtxEventAttributes_t::payload_t p; p.fValue = v; return p; } + +// Define Same() overloads for NVTX API types +inline bool Same(nvtxEventAttributes_t const& lhs, nvtxEventAttributes_t const& rhs, SAME_COMMON_ARGS) +{ + bool same = true + && MEMBER_SAME(version) + && MEMBER_SAME(size) + && MEMBER_SAME(category) + && MEMBER_SAME(colorType) + && MEMBER_SAME(color) + && MEMBER_SAME(payloadType) + && (false + || lhs.payloadType == NVTX_PAYLOAD_UNKNOWN + || (lhs.payloadType == NVTX_PAYLOAD_TYPE_UNSIGNED_INT64 && MEMBER_SAME(payload.ullValue)) + || (lhs.payloadType == NVTX_PAYLOAD_TYPE_INT64 && MEMBER_SAME(payload.llValue)) + || (lhs.payloadType == NVTX_PAYLOAD_TYPE_DOUBLE && MEMBER_SAME(payload.dValue)) + || (lhs.payloadType == NVTX_PAYLOAD_TYPE_UNSIGNED_INT32 && MEMBER_SAME(payload.uiValue)) + || (lhs.payloadType == NVTX_PAYLOAD_TYPE_INT32 && MEMBER_SAME(payload.iValue)) + || (lhs.payloadType == NVTX_PAYLOAD_TYPE_FLOAT && MEMBER_SAME(payload.fValue)) + ) + && MEMBER_SAME(messageType) + && (false + || lhs.messageType == NVTX_MESSAGE_UNKNOWN + || (lhs.messageType == NVTX_MESSAGE_TYPE_ASCII && MEMBER_SAME(message.ascii)) + || (lhs.messageType == NVTX_MESSAGE_TYPE_UNICODE && MEMBER_SAME(message.unicode)) + || (lhs.messageType == NVTX_MESSAGE_TYPE_REGISTERED && MEMBER_SAME(message.registered)) + ) + ; + VERBOSE_PRINT() + << std::string(depth, ' ') << "Expected: " << rhs << "\n" + << std::string(depth, ' ') << "Provided: " << lhs << "\n"; + return same; +} +DEFINE_EQ_NE_DEEP(nvtxEventAttributes_t) + +inline bool Same(nvtxResourceAttributes_t const& lhs, nvtxResourceAttributes_t const& rhs, SAME_COMMON_ARGS) +{ + bool same = true + && MEMBER_SAME(version) + && MEMBER_SAME(size) + && MEMBER_SAME(identifierType) + && (false + || lhs.identifierType == NVTX_RESOURCE_TYPE_UNKNOWN + || (lhs.identifierType == NVTX_RESOURCE_TYPE_GENERIC_POINTER && MEMBER_SAME(identifier.pValue)) + || (lhs.identifierType == NVTX_RESOURCE_TYPE_GENERIC_HANDLE && MEMBER_SAME(identifier.ullValue)) + || (lhs.identifierType == NVTX_RESOURCE_TYPE_GENERIC_THREAD_NATIVE && MEMBER_SAME(identifier.ullValue)) + || (lhs.identifierType == NVTX_RESOURCE_TYPE_GENERIC_THREAD_POSIX && MEMBER_SAME(identifier.ullValue)) + ) + && MEMBER_SAME(messageType) + && (false + || lhs.messageType == NVTX_MESSAGE_UNKNOWN + || (lhs.messageType == NVTX_MESSAGE_TYPE_ASCII && MEMBER_SAME(message.ascii)) + || (lhs.messageType == NVTX_MESSAGE_TYPE_UNICODE && MEMBER_SAME(message.unicode)) + || (lhs.messageType == NVTX_MESSAGE_TYPE_REGISTERED && MEMBER_SAME(message.registered)) + ) + ; + VERBOSE_PRINT(); + return same; +} +DEFINE_EQ_NE_DEEP(nvtxResourceAttributes_t) + +// Define Same() overloads (and operators == and !=) for NVTX arg pack types & Args union + +#define DEFINE_ARGS_SAME_0(cb) DEFINE_SAME_0(Args##cb) +#define DEFINE_ARGS_SAME_1(cb, a) DEFINE_SAME_1(Args##cb, a) +#define DEFINE_ARGS_SAME_2(cb, a, b) DEFINE_SAME_2(Args##cb, a, b) +#define DEFINE_ARGS_SAME_3(cb, a, b, c) DEFINE_SAME_3(Args##cb, a, b, c) + +DEFINE_ARGS_SAME_1(Load, success) +// CORE +DEFINE_ARGS_SAME_1(MarkEx, eventAttrib) +DEFINE_ARGS_SAME_1(MarkA, str) +DEFINE_ARGS_SAME_1(MarkW, str) +DEFINE_ARGS_SAME_1(RangeStartEx, eventAttrib) +DEFINE_ARGS_SAME_1(RangeStartA, str) +DEFINE_ARGS_SAME_1(RangeStartW, str) +DEFINE_ARGS_SAME_0(RangeEnd) +DEFINE_ARGS_SAME_1(RangePushEx, eventAttrib) +DEFINE_ARGS_SAME_1(RangePushA, str) +DEFINE_ARGS_SAME_1(RangePushW, str) +DEFINE_ARGS_SAME_0(RangePop) +DEFINE_ARGS_SAME_2(NameCategoryA, id, str) +DEFINE_ARGS_SAME_2(NameCategoryW, id, str) +DEFINE_ARGS_SAME_2(NameOsThreadA, id, str) +DEFINE_ARGS_SAME_2(NameOsThreadW, id, str) +// CORE2 +DEFINE_ARGS_SAME_2(DomainMarkEx, domain, eventAttrib) +DEFINE_ARGS_SAME_2(DomainRangeStartEx, domain, eventAttrib) +DEFINE_ARGS_SAME_2(DomainRangeEnd, domain, id) +DEFINE_ARGS_SAME_2(DomainRangePushEx, domain, eventAttrib) +DEFINE_ARGS_SAME_1(DomainRangePop, domain) +DEFINE_ARGS_SAME_2(DomainResourceCreate, domain, attr) +DEFINE_ARGS_SAME_1(DomainResourceDestroy, attr) +DEFINE_ARGS_SAME_3(DomainNameCategoryA, domain, id, str) +DEFINE_ARGS_SAME_3(DomainNameCategoryW, domain, id, str) +DEFINE_ARGS_SAME_2(DomainRegisterStringA, domain, str) +DEFINE_ARGS_SAME_2(DomainRegisterStringW, domain, str) +DEFINE_ARGS_SAME_1(DomainCreateA, name) +DEFINE_ARGS_SAME_1(DomainCreateW, name) +DEFINE_ARGS_SAME_1(DomainDestroy, domain) +DEFINE_ARGS_SAME_1(Initialize, reserved) + +inline bool Same(CallData const& lhs, CallData const& rhs, SAME_COMMON_ARGS) +{ + bool same = true + && MEMBER_SAME(id) + && (false + || UNION_MEMBER_SAME(id, CALLID_LOAD(), args.Load) + || UNION_MEMBER_SAME(id, CALLID(CORE, MarkEx), args.MarkEx) + || UNION_MEMBER_SAME(id, CALLID(CORE, MarkA), args.MarkA) + || UNION_MEMBER_SAME(id, CALLID(CORE, MarkW), args.MarkW) + || UNION_MEMBER_SAME(id, CALLID(CORE, RangeStartEx), args.RangeStartEx) + || UNION_MEMBER_SAME(id, CALLID(CORE, RangeStartA), args.RangeStartA) + || UNION_MEMBER_SAME(id, CALLID(CORE, RangeStartW), args.RangeStartW) + || UNION_MEMBER_SAME(id, CALLID(CORE, RangeEnd), args.RangeEnd) + || UNION_MEMBER_SAME(id, CALLID(CORE, RangePushEx), args.RangePushEx) + || UNION_MEMBER_SAME(id, CALLID(CORE, RangePushA), args.RangePushA) + || UNION_MEMBER_SAME(id, CALLID(CORE, RangePushW), args.RangePushW) + || UNION_MEMBER_SAME(id, CALLID(CORE, RangePop), args.RangePop) + || UNION_MEMBER_SAME(id, CALLID(CORE, NameCategoryA), args.NameCategoryA) + || UNION_MEMBER_SAME(id, CALLID(CORE, NameCategoryW), args.NameCategoryW) + || UNION_MEMBER_SAME(id, CALLID(CORE, NameOsThreadA), args.NameOsThreadA) + || UNION_MEMBER_SAME(id, CALLID(CORE, NameOsThreadW), args.NameOsThreadW) + || UNION_MEMBER_SAME(id, CALLID(CORE2, DomainMarkEx), args.DomainMarkEx) + || UNION_MEMBER_SAME(id, CALLID(CORE2, DomainRangeStartEx), args.DomainRangeStartEx) + || UNION_MEMBER_SAME(id, CALLID(CORE2, DomainRangeEnd), args.DomainRangeEnd) + || UNION_MEMBER_SAME(id, CALLID(CORE2, DomainRangePushEx), args.DomainRangePushEx) + || UNION_MEMBER_SAME(id, CALLID(CORE2, DomainRangePop), args.DomainRangePop) + || UNION_MEMBER_SAME(id, CALLID(CORE2, DomainResourceCreate), args.DomainResourceCreate) + || UNION_MEMBER_SAME(id, CALLID(CORE2, DomainResourceDestroy), args.DomainResourceDestroy) + || UNION_MEMBER_SAME(id, CALLID(CORE2, DomainNameCategoryA), args.DomainNameCategoryA) + || UNION_MEMBER_SAME(id, CALLID(CORE2, DomainNameCategoryW), args.DomainNameCategoryW) + || UNION_MEMBER_SAME(id, CALLID(CORE2, DomainRegisterStringA), args.DomainRegisterStringA) + || UNION_MEMBER_SAME(id, CALLID(CORE2, DomainRegisterStringW), args.DomainRegisterStringW) + || UNION_MEMBER_SAME(id, CALLID(CORE2, DomainCreateA), args.DomainCreateA) + || UNION_MEMBER_SAME(id, CALLID(CORE2, DomainCreateW), args.DomainCreateW) + || UNION_MEMBER_SAME(id, CALLID(CORE2, DomainDestroy), args.DomainDestroy) + || UNION_MEMBER_SAME(id, CALLID(CORE2, Initialize), args.Initialize) + ) + ; + + VERBOSE_PRINT(); + return same; +} +DEFINE_EQ_NE_DEEP(CallData) + +inline nvtxDomainHandle_t PostInc(nvtxDomainHandle_t & h) { auto v = h; ++(intptr_t&)h; return v; } +inline nvtxStringHandle_t PostInc(nvtxStringHandle_t & h) { auto v = h; ++(intptr_t&)h; return v; } +inline nvtxResourceHandle_t PostInc(nvtxResourceHandle_t& h) { auto v = h; ++(intptr_t&)h; return v; } +inline nvtxRangeId_t PostInc(nvtxRangeId_t & h) { return h++; } + +struct Callbacks +{ + std::function Default; + std::function Load; + + std::function MarkEx; + std::function MarkA; + std::function MarkW; + std::function RangeStartEx; + std::function RangeStartA; + std::function RangeStartW; + std::function RangeEnd; + std::function RangePushEx; + std::function RangePushA; + std::function RangePushW; + std::function RangePop; + std::function NameCategoryA; + std::function NameCategoryW; + std::function NameOsThreadA; + std::function NameOsThreadW; + + std::function DomainMarkEx; + std::function DomainRangeStartEx; + std::function DomainRangeEnd; + std::function DomainRangePushEx; + std::function DomainRangePop; + std::function DomainResourceCreate; + std::function DomainResourceDestroy; + std::function DomainNameCategoryA; + std::function DomainNameCategoryW; + std::function DomainRegisterStringA; + std::function DomainRegisterStringW; + std::function DomainCreateA; + std::function DomainCreateW; + std::function DomainDestroy; + std::function Initialize; + + + Callbacks(Callbacks const&) = default; + Callbacks& operator=(Callbacks const&) = default; + Callbacks(Callbacks&&) = default; + Callbacks& operator=(Callbacks&&) = default; + + nvtxDomainHandle_t nextDomainHandle = (nvtxDomainHandle_t)1; + struct DomainData + { + int pushPopDepth = 0; + nvtxRangeId_t nextRangeId = (nvtxRangeId_t)1; + nvtxStringHandle_t nextStringHandle = (nvtxStringHandle_t)1; + nvtxResourceHandle_t nextResourceHandle = (nvtxResourceHandle_t)1; + }; + std::map domainData; + + Callbacks() + : Default([](Call const&) {}) + , Load ([&](int success) { Default(CALL_LOAD(success)); }) + // CORE + , MarkEx ([&](const nvtxEventAttributes_t* a) { Default(CALL(CORE, MarkEx , a )); }) + , MarkA ([&](const char* a) { Default(CALL(CORE, MarkA , a )); }) + , MarkW ([&](const wchar_t* a) { Default(CALL(CORE, MarkW , a )); }) + , RangeStartEx ([&](const nvtxEventAttributes_t* a) { Default(CALL(CORE, RangeStartEx , a )); return PostInc(domainData[nullptr].nextRangeId); }) + , RangeStartA ([&](const char* a) { Default(CALL(CORE, RangeStartA , a )); return PostInc(domainData[nullptr].nextRangeId); }) + , RangeStartW ([&](const wchar_t* a) { Default(CALL(CORE, RangeStartW , a )); return PostInc(domainData[nullptr].nextRangeId); }) + , RangeEnd ([&](nvtxRangeId_t a) { Default(CALL(CORE, RangeEnd , a )); }) + , RangePushEx ([&](const nvtxEventAttributes_t* a) { Default(CALL(CORE, RangePushEx , a )); return ++domainData[nullptr].pushPopDepth; }) + , RangePushA ([&](const char* a) { Default(CALL(CORE, RangePushA , a )); return ++domainData[nullptr].pushPopDepth; }) + , RangePushW ([&](const wchar_t* a) { Default(CALL(CORE, RangePushW , a )); return ++domainData[nullptr].pushPopDepth; }) + , RangePop ([&]( ) { Default(CALL(CORE, RangePop )); return domainData[nullptr].pushPopDepth--; }) + , NameCategoryA([&](uint32_t a, const char* b) { Default(CALL(CORE, NameCategoryA, a, b)); }) + , NameCategoryW([&](uint32_t a, const wchar_t* b) { Default(CALL(CORE, NameCategoryW, a, b)); }) + , NameOsThreadA([&](uint32_t a, const char* b) { Default(CALL(CORE, NameOsThreadA, a, b)); }) + , NameOsThreadW([&](uint32_t a, const wchar_t* b) { Default(CALL(CORE, NameOsThreadW, a, b)); }) + // CORE2 + , DomainMarkEx ([&](nvtxDomainHandle_t a, const nvtxEventAttributes_t* b) { Default(CALL(CORE2, DomainMarkEx , a, b )); }) + , DomainRangeStartEx ([&](nvtxDomainHandle_t a, const nvtxEventAttributes_t* b) { Default(CALL(CORE2, DomainRangeStartEx , a, b )); return PostInc(domainData[a].nextRangeId); }) + , DomainRangeEnd ([&](nvtxDomainHandle_t a, nvtxRangeId_t b) { Default(CALL(CORE2, DomainRangeEnd , a, b )); }) + , DomainRangePushEx ([&](nvtxDomainHandle_t a, const nvtxEventAttributes_t* b) { Default(CALL(CORE2, DomainRangePushEx , a, b )); return ++domainData[a].pushPopDepth; }) + , DomainRangePop ([&](nvtxDomainHandle_t a) { Default(CALL(CORE2, DomainRangePop , a )); return domainData[a].pushPopDepth--; }) + , DomainResourceCreate ([&](nvtxDomainHandle_t a, nvtxResourceAttributes_t* b) { Default(CALL(CORE2, DomainResourceCreate , a, b )); return PostInc(domainData[a].nextResourceHandle); }) + , DomainResourceDestroy([&](nvtxResourceHandle_t a) { Default(CALL(CORE2, DomainResourceDestroy, a )); }) + , DomainNameCategoryA ([&](nvtxDomainHandle_t a, uint32_t b, const char* c) { Default(CALL(CORE2, DomainNameCategoryA , a, b, c)); }) + , DomainNameCategoryW ([&](nvtxDomainHandle_t a, uint32_t b, const wchar_t* c) { Default(CALL(CORE2, DomainNameCategoryW , a, b, c)); }) + , DomainRegisterStringA([&](nvtxDomainHandle_t a, const char* b) { Default(CALL(CORE2, DomainRegisterStringA, a, b )); return PostInc(domainData[a].nextStringHandle); }) + , DomainRegisterStringW([&](nvtxDomainHandle_t a, const wchar_t* b) { Default(CALL(CORE2, DomainRegisterStringW, a, b )); return PostInc(domainData[a].nextStringHandle); }) + , DomainCreateA ([&](const char* a) { Default(CALL(CORE2, DomainCreateA , a )); return PostInc(nextDomainHandle); }) + , DomainCreateW ([&](const wchar_t* a) { Default(CALL(CORE2, DomainCreateW , a )); return PostInc(nextDomainHandle); }) + , DomainDestroy ([&](nvtxDomainHandle_t a) { Default(CALL(CORE2, DomainDestroy , a )); }) + , Initialize ([&](const void* a) { Default(CALL(CORE2, Initialize , a )); }) + { + } +}; + +extern Callbacks g_callbacks; + + diff --git a/tests/TestCoverage.h b/tests/TestCoverage.h new file mode 100644 index 0000000..c1d2d78 --- /dev/null +++ b/tests/TestCoverage.h @@ -0,0 +1,498 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Licensed under the Apache License v2.0 with LLVM Exceptions. + * See https://nvidia.github.io/NVTX/LICENSE.txt for license information. + */ + +#include + +#include +#include + +#include "PrettyPrintersNvtxCpp.h" + +struct a_lib +{ + static constexpr const char* name{"Library A"}; + //static constexpr const float name{3.14f}; +}; + +struct cat_x +{ + static constexpr const char* name{"Category X"}; + static constexpr uint32_t id{42}; +}; + +struct cat_y +{ + static constexpr const char* name{"Category Y"}; + //static constexpr const float name{3.14f}; + static constexpr uint32_t id{43}; +}; + +struct regstr_hello +{ + static constexpr const char* message{"Hello"}; +}; + +static void TestFuncRange() +{ + NVTX3_FUNC_RANGE(); + nvtx3::mark("Marker in TestFuncRange"); +} + +static void TestFuncRangeV() +{ + NVTX3_V1_FUNC_RANGE(); + nvtx3::mark("Marker in TestFuncRangeV"); +} + +static void TestFuncRangeIfDyn(bool cond) +{ + NVTX3_FUNC_RANGE_IF(cond); + nvtx3::mark("Marker in TestFuncRangeIfDyn"); +} + +static void TestFuncRangeIfDynV(bool cond) +{ + NVTX3_V1_FUNC_RANGE_IF(cond); + nvtx3::mark("Marker in TestFuncRangeIfDynV"); +} + +static void TestFuncRangeIfStat(bool cond) +{ + NVTX3_FUNC_RANGE_IF(cond); + nvtx3::mark("Marker in TestFuncRangeIfStat"); +} + +static void TestFuncRangeIfStatV(bool cond) +{ + NVTX3_V1_FUNC_RANGE_IF(cond); + nvtx3::mark("Marker in TestFuncRangeIfStatV"); +} + +static void TestFuncRangeIn() +{ + NVTX3_FUNC_RANGE_IN(a_lib); + nvtx3::mark("Marker in TestFuncRangeIn"); +} + +static void TestFuncRangeInV() +{ + NVTX3_V1_FUNC_RANGE_IN(a_lib); + nvtx3::mark("Marker in TestFuncRangeInV"); +} + +static void TestFuncRangeIfInDyn(bool cond) +{ + NVTX3_FUNC_RANGE_IF_IN(a_lib, cond); + nvtx3::mark("Marker in TestFuncRangeIfInDyn"); +} + +static void TestFuncRangeIfInDynV(bool cond) +{ + NVTX3_V1_FUNC_RANGE_IF_IN(a_lib, cond); + nvtx3::mark("Marker in TestFuncRangeIfInDynV"); +} + +static void TestFuncRangeIfInStat(bool cond) +{ + NVTX3_FUNC_RANGE_IF_IN(a_lib, cond); + nvtx3::mark("Marker in TestFuncRangeIfInStat"); +} + +static void TestFuncRangeIfInStatV(bool cond) +{ + NVTX3_V1_FUNC_RANGE_IF_IN(a_lib, cond); + nvtx3::mark("Marker in TestFuncRangeIfInStatV"); +} + +static int RunTestCommon(int argc, const char** argv) +{ + bool verbose = false; + const std::string verboseArg = "-v"; + for (; *argv; ++argv) + { + if (*argv == verboseArg) verbose = true; + } + + using namespace nvtx3; + + { + std::cout << "Default attributes:\n"; + event_attributes attr; + if (verbose) std::cout << attr << '\n'; + } + if (verbose) std::cout << "-------------------------------------\n"; + + { + std::cout << "Set a message (ascii), payload, color, and category:\n"; + event_attributes attr{ + message{"Hello"}, + category{11}, + payload{5.0f}, + rgb{0,255,0}}; + if (verbose) std::cout << attr << '\n'; + } + if (verbose) std::cout << "-------------------------------------\n"; + + { + std::cout << "Set a message with different string types:\n"; + + event_attributes a{message{"Hello"}}; + if (verbose) std::cout << a << '\n'; + + event_attributes wa{message{L"Hello"}}; + if (verbose) std::cout << wa << '\n'; + + std::string hello{"Hello"}; + event_attributes b{message{hello}}; + if (verbose) std::cout << b << '\n'; + + std::wstring whello{L"Hello"}; + event_attributes wb{message{whello}}; + if (verbose) std::cout << wb << '\n'; + + // Important! Neither of following will compile: + // + // event_attributes c{message{std::string{"foo"}}}; + // std::cout << c; + // + // std::string foo{"foo"}; + // event_attributes d{message{hello + "bar"}}; + // std::cout << d; + // + // Both of those usages fail with: + // "error C2280: 'message::message(std::string &&)': + // attempting to reference a deleted function" + // + // message is a "view" class, not an owning class. + // It cannot take ownership of a temporary string and + // destroy it when it goes out of scope. Similarly, + // event_attributes is not an owning class, so it cannot take + // ownership of an message either. + // + // TODO: Could we add implicit support for this? + } + if (verbose) std::cout << "-------------------------------------\n"; + + { + std::cout << "Set a message (registered):\n"; + auto hTacobell = reinterpret_cast(0x7ac0be11); + event_attributes attr{message{hTacobell}}; + if (verbose) std::cout << attr << '\n'; + } + if (verbose) std::cout << "-------------------------------------\n"; + + { + std::cout << "Convenience: Set a message without the helper type:\n"; + + event_attributes a{"Hello"}; + if (verbose) std::cout << a << '\n'; + + std::string hello{"Hello"}; + event_attributes b{hello}; + if (verbose) std::cout << b << '\n'; + } + if (verbose) std::cout << "-------------------------------------\n"; + + { + std::cout << "Set a payload twice (first should win):\n"; + event_attributes attr{"test", payload{1.0f}, payload{2}}; + if (verbose) std::cout << attr << '\n'; + } + if (verbose) std::cout << "-------------------------------------\n"; + + { + std::cout << "Set a color twice (first should win):\n"; + event_attributes attr{"test", argb{127,0,0,255}, rgb{0,255,0}}; + if (verbose) std::cout << attr << '\n'; + } + if (verbose) std::cout << "-------------------------------------\n"; + + { + std::cout << "Set a message twice (first should win):\n"; + event_attributes attr{L"wide", "narrow"}; + if (verbose) std::cout << attr << '\n'; + } + if (verbose) std::cout << "-------------------------------------\n"; + + { + std::cout << "Set a category twice (first should win):\n"; + event_attributes attr{"test", category{1}, category{2}}; + if (verbose) std::cout << attr << '\n'; + } + if (verbose) std::cout << "-------------------------------------\n"; + + { + std::cout << "Markers\n"; + + // Global domain + event_attributes attr{ + message{"Hello1"}, + category{11}, + payload{5.0f}, + rgb{1,2,3}}; + mark(attr); + + mark(event_attributes{ + message{"Hello2"}, + category{11}, + payload{5.0f}, + rgb{0,255,0}}); + + mark( + message{"Hello3"}, + category{11}, + payload{5.0f}, + rgb{0,255,0}); + + // a_lib domain + event_attributes a_attr{ + message{"a: Hello1"}, + category{11}, + payload{5.0f}, + rgb{1,2,3}}; + mark_in(attr); + + mark_in(event_attributes{ + message{"a: Hello2"}, + category{11}, + payload{5.0f}, + rgb{0,255,0}}); + + mark_in( + message{"a: Hello3"}, + category{11}, + payload{5.0f}, + rgb{0,255,0}); + } + if (verbose) std::cout << "-------------------------------------\n"; + + { + std::cout << "Range start/end and range_handle\n"; + + // Global domain + event_attributes attr{ + message{"Hello1"}, + category{11}, + payload{5.0f}, + rgb{1,2,3}}; + auto h1 = start_range(attr); + + auto h2 = start_range(event_attributes{ + message{"Hello2"}, + category{11}, + payload{5.0f}, + rgb{0,255,0}}); + + auto h3 = start_range( + message{"Hello3"}, + category{11}, + payload{5.0f}, + rgb{0,255,0}); + + // a_lib domain + event_attributes a_attr{ + message{"a: Hello1"}, + category{11}, + payload{5.0f}, + rgb{1,2,3}}; + auto h4 = start_range_in(attr); + + auto h5 = start_range_in(event_attributes{ + message{"a: Hello2"}, + category{11}, + payload{5.0f}, + rgb{0,255,0}}); + + auto h6 = start_range_in( + message{"a: Hello3"}, + category{11}, + payload{5.0f}, + rgb{0,255,0}); + + // range_handle operator ==, !=, and cast overloads + bool testEq = h1 == h2; + bool testNe = h3 != h4; + bool testCast = bool(h5); + if (verbose) std::cout << std::boolalpha + << testEq << "\n" + << testNe << "\n" + << testCast << "\n"; + + end_range(h1); + end_range(h2); + end_range(h3); + + end_range_in(h4); + end_range_in(h5); + end_range_in(h6); + } + if (verbose) std::cout << "-------------------------------------\n"; + + { + std::cout << "unique_range\n"; + + // Global domain + event_attributes attr{ + message{"Hello1"}, + category{11}, + payload{5.0f}, + rgb{1,2,3}}; + unique_range u1{attr}; + + unique_range u2{event_attributes{ + message{"Hello2"}, + category{11}, + payload{5.0f}, + rgb{0,255,0}}}; + + unique_range u3{ + message{"Hello3"}, + category{11}, + payload{5.0f}, + rgb{0,255,0}}; + + // a_lib domain + event_attributes a_attr{ + message{"a: Hello1"}, + category{11}, + payload{5.0f}, + rgb{1,2,3}}; + unique_range_in u4{attr}; + + unique_range_in u5{event_attributes{ + message{"a: Hello2"}, + category{11}, + payload{5.0f}, + rgb{0,255,0}}}; + + unique_range_in u6{ + message{"a: Hello3"}, + category{11}, + payload{5.0f}, + rgb{0,255,0}}; + + // movability + auto move_in_out_global = [](unique_range u) { return u; }; + auto move_in_out_domain = [](unique_range_in u) { return u; }; + + auto u1moved = move_in_out_global(std::move(u1)); + auto u4moved = move_in_out_domain(std::move(u4)); + } + if (verbose) std::cout << "-------------------------------------\n"; + + { + std::cout << "scoped_range\n"; + + // Global domain + event_attributes attr{ + message{"Hello1"}, + category{11}, + payload{5.0f}, + rgb{1,2,3}}; + scoped_range s1{attr}; + + scoped_range s2{event_attributes{ + message{"Hello2"}, + category{11}, + payload{5.0f}, + rgb{0,255,0}}}; + + scoped_range s3{ + message{"Hello3"}, + category{11}, + payload{5.0f}, + rgb{0,255,0}}; + + // a_lib domain + event_attributes a_attr{ + message{"a: Hello1"}, + category{11}, + payload{5.0f}, + rgb{1,2,3}}; + scoped_range_in s4{attr}; + + scoped_range_in s5{event_attributes{ + message{"a: Hello2"}, + category{11}, + payload{5.0f}, + rgb{0,255,0}}}; + + scoped_range_in s6{ + message{"a: Hello3"}, + category{11}, + payload{5.0f}, + rgb{0,255,0}}; + } + if (verbose) std::cout << "-------------------------------------\n"; + + { + std::cout << "named_category\n"; + + // Global domain + mark ("Cat", named_category::get ()); + mark_in<> ("Cat", named_category_in<>::get ()); + mark_in("Cat", named_category_in::get()); + + // a_lib domain + mark_in("Cat", named_category_in::get()); + } + if (verbose) std::cout << "-------------------------------------\n"; + + { + std::cout << "registered_string\n"; + + // Global domain + mark ("RegStr", registered_string::get ()); + mark_in<> ("RegStr", registered_string_in<>::get ()); + mark_in("RegStr", registered_string_in::get()); + + // a_lib domain + mark_in("RegStr", registered_string_in::get()); + } + if (verbose) std::cout << "-------------------------------------\n"; + + { + std::cout << "Macros:\n"; + TestFuncRange(); + TestFuncRangeV(); + TestFuncRangeIfDyn(argc == 1001); + TestFuncRangeIfDyn(argc != 1001); + TestFuncRangeIfDynV(argc == 1002); + TestFuncRangeIfDynV(argc != 1002); + TestFuncRangeIfStat(true); + TestFuncRangeIfStat(false); + TestFuncRangeIfStatV(true); + TestFuncRangeIfStatV(false); + + TestFuncRangeIn(); + TestFuncRangeInV(); + TestFuncRangeIfInDyn(argc == 1003); + TestFuncRangeIfInDyn(argc != 1003); + TestFuncRangeIfInDynV(argc == 1004); + TestFuncRangeIfInDynV(argc != 1004); + TestFuncRangeIfInStat(true); + TestFuncRangeIfInStat(false); + TestFuncRangeIfInStatV(true); + TestFuncRangeIfInStatV(false); + } + if (verbose) std::cout << "-------------------------------------\n"; + + return 0; +} diff --git a/tests/TestSelfInjection.cpp b/tests/TestSelfInjection.cpp new file mode 100644 index 0000000..97c2fbd --- /dev/null +++ b/tests/TestSelfInjection.cpp @@ -0,0 +1,271 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Licensed under the Apache License v2.0 with LLVM Exceptions. + * See https://nvidia.github.io/NVTX/LICENSE.txt for license information. + */ + +#include + +#include +#include + +#include +#include +#include + +#include "SelfInjection.h" + +struct S1 +{ + int i; + float f; +}; +bool operator==(S1 const& lhs, S1 const& rhs) +{ + return lhs.i == rhs.i && lhs.f == rhs.f; +} +std::ostream& operator<<(std::ostream& lhs, S1 const& rhs) +{ + return lhs << '{' << rhs.i << ',' << rhs.f << '}'; +} + +struct S2 +{ + int i; + float f; + const char* s; +}; + +static bool Same(S2 const& lhs, S2 const& rhs, SAME_COMMON_ARGS) +{ + bool same = + Same(lhs.i, rhs.i, deep, verbose, "i", oss, depth + 1) && + Same(lhs.f, rhs.f, deep, verbose, "f", oss, depth + 1) && + Same(lhs.s, rhs.s, deep, verbose, "s", oss, depth + 1); + if (verbose && !same) oss << std::string(depth, ' ') << "'" << name << "' members different\n"; + return same; +} + +static bool TestSame(bool verbose, bool deep) +{ + std::cout << std::boolalpha; + + std::cout << "--- Simple ints:\n"; + { + int xL = 5, xR = 5; + bool result = Same(xL, xR, deep, verbose, "x"); + std::cout << "> == ints: " << result << '\n'; + } + { + int xL = 5, xR = 6; + bool result = Same(xL, xR, deep, verbose, "x"); + std::cout << "> != ints: " << result << '\n'; + } + + std::cout << "--- C-style strings:\n"; + { + const char* str = "String"; + bool result = Same(str, str, deep, verbose, "str"); + std::cout << "> char string w/itself: " << result << '\n'; + } + { + const char* strL = "String"; + const char* strR = "String"; + bool result = Same(strL, strR, deep, verbose, "str"); + std::cout << "> == char strings: " << result << '\n'; + } + { + const char* strL = "StringA"; + const char* strR = "StringB"; + bool result = Same(strL, strR, deep, verbose, "str"); + std::cout << "> != char strings: " << result << '\n'; + } + + std::cout << "--- Structs with == and << operators:\n"; + { + S1 sL{5, 3.125f}; + S1 sR{5, 3.125f}; + bool result = Same(sL, sR, deep, verbose, "S1"); + std::cout << "> == S1s: " << result << '\n'; + } + { + S1 sL{5, 3.125f}; + S1 sR{5, 3.14159f}; + bool result = Same(sL, sR, deep, verbose, "S1"); + std::cout << "> != S1s: " << result << '\n'; + } + + std::cout << "--- Pointers to structs with == and << operators:\n"; + { + S1 sL{5, 3.125f}; + S1* psL = &sL; + bool result = Same(psL, psL, deep, verbose, "S1 ptr"); + std::cout << "> same ptr to an S1: " << result << '\n'; + } + { + S1 sL{5, 3.125f}; + S1 sR{5, 3.125f}; + S1* psL = &sL; + S1* psR = &sR; + bool result = Same(psL, psR, deep, verbose, "S1 ptr"); + std::cout << "> different ptrs to == S1s: " << result << '\n'; + } + { + S1 sL{5, 3.125f}; + S1 sR{5, 3.14159f}; + S1* psL = &sL; + S1* psR = &sR; + bool result = Same(psL, psR, deep, verbose, "S1 ptr"); + std::cout << "> different ptrs to != S1s: " << result << '\n'; + } + + std::cout << "--- Structs with Same function defined:\n"; + { + S2 sL{5, 3.125f, "An S2"}; + S2 sR{5, 3.125f, "An S2"}; + bool result = Same(sL, sR, deep, verbose, "S2"); + std::cout << "> == S2s: " << result << '\n'; + } + { + S2 sL{5, 3.125f, "An S2"}; + S2 sR{5, 3.14159f, "An S2"}; + bool result = Same(sL, sR, deep, verbose, "S2"); + std::cout << "> !=f in S2s: " << result << '\n'; + } + { + S2 sL{5, 3.125f, "An S2"}; + S2 sR{5, 3.125f, "Another S2"}; + bool result = Same(sL, sR, deep, verbose, "S2"); + std::cout << "> !=s in S2s: " << result << '\n'; + } + + std::cout << "--- NVTX handles - pointers to incomplete types:\n"; + { + auto hL = reinterpret_cast(1024); + auto hR = reinterpret_cast(1024); + bool result = Same(hL, hR, deep, verbose, "nvtxDomainHandle_t"); + std::cout << "> == domain handles: " << result << '\n'; + } + { + auto hL = reinterpret_cast(1024); + auto hR = reinterpret_cast(2048); + bool result = Same(hL, hR, deep, verbose, "nvtxDomainHandle_t"); + std::cout << "> != domain handles: " << result << '\n'; + } + + std::cout << "--- NVTX event attributes - struct with tagged union:\n"; + { + char buf1[]{"Test message"}; + char buf2[]{"Test message"}; + + nvtxEventAttributes_t aL{}; + aL.version = NVTX_VERSION; + aL.size = sizeof(nvtxEventAttributes_t); + aL.category = 5; + aL.colorType = NVTX_COLOR_ARGB; + aL.color = 0xFF446688; + aL.payloadType = NVTX_PAYLOAD_TYPE_DOUBLE; + aL.payload.dValue = 3.125; + aL.messageType = NVTX_MESSAGE_TYPE_ASCII; + aL.message.ascii = buf1; + aL.reserved0 = 1; + + auto aR = aL; + + auto* paL = &aL; + auto* paR = &aR; + + bool result = Same(aL, aR, deep, verbose, "nvtxEventAttributes_t"); + std::cout << "> == attrs: " << result << '\n'; + + aR = aL; + aR.reserved0 = 2; + result = Same(aL, aR, deep, verbose, "nvtxEventAttributes_t"); + std::cout << "> == attrs with different padding: " << result << '\n'; + + aR = aL; + aR.category = 6; + result = Same(aL, aR, deep, verbose, "nvtxEventAttributes_t"); + std::cout << "> != attrs, category: " << result << '\n'; + + aR = aL; + aR.message.ascii = buf2; + result = Same(aL, aR, deep, verbose, "nvtxEventAttributes_t"); + std::cout << "> == attrs with same message in different buffers: " << result << '\n'; + + aR = aL; + aR.message.ascii = "Different message"; + result = Same(aL, aR, deep, verbose, "nvtxEventAttributes_t"); + std::cout << "> != attrs, message: " << result << '\n'; + + aR = aL; + aR.payloadType = NVTX_PAYLOAD_TYPE_FLOAT; + result = Same(aL, aR, deep, verbose, "nvtxEventAttributes_t"); + std::cout << "> != attrs, payloadType: " << result << '\n'; + + aR = aL; + aR.payload.dValue = -3.125; + result = Same(aL, aR, deep, verbose, "nvtxEventAttributes_t"); + std::cout << "> != attrs, payload union value: " << result << '\n'; + + aR = aL; + result = Same(paL, paL, deep, verbose, "nvtxEventAttributes_t by pointer"); + std::cout << "> == attr pointers: " << result << '\n'; + + result = Same(paL, paR, deep, verbose, "nvtxEventAttributes_t by pointer"); + std::cout << "> == attr values, different pointers: " << result << '\n'; + + aR.payload.dValue = -3.125; + result = Same(paL, paR, deep, verbose, "nvtxEventAttributes_t by pointer"); + std::cout << "> != attr values, payload union value: " << result << '\n'; + } + + return true; +} + +extern "C" NVTX_DYNAMIC_EXPORT +int RunTest(int argc, const char** argv) +{ + NVTX_EXPORT_UNMANGLED_FUNCTION_NAME + + // Always verbose -- tests both verbose and non-verbose modes + + { + std::cout << "\n------- Non-verbose, non-deep:\n"; + bool success = TestSame(false, false); + if (!success) { std::cout << "TestSame returned false!\n"; return 1; } + } + { + std::cout << "\n------- Non-verbose, deep:\n"; + bool success = TestSame(false, true); + if (!success) { std::cout << "TestSame returned false!\n"; return 1; } + } + + { + std::cout << "\n------- Verbose, non-deep:\n"; + bool success = TestSame(true, false); + if (!success) { std::cout << "TestSame returned false!\n"; return 1; } + } + { + std::cout << "\n------- Verbose, deep:\n"; + bool success = TestSame(true, true); + if (!success) { std::cout << "TestSame returned false!\n"; return 1; } + } + + std::cout << "\n--------- Success!\n"; + return 0; +} diff --git a/tests/UseExportedApi.cpp b/tests/UseExportedApi.cpp new file mode 100644 index 0000000..c33ff2f --- /dev/null +++ b/tests/UseExportedApi.cpp @@ -0,0 +1,210 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Licensed under the Apache License v2.0 with LLVM Exceptions. + * See https://nvidia.github.io/NVTX/LICENSE.txt for license information. + */ + +#include "PathHelper.h" + +#include +#include +#include +#include +#include + +#include +#include + +// Use an X-macro to allow doing arbitrary operations to all exported API functions. +// An easy way to generate this list is to use Linux and dump the exports from libexport-api.so. +// I recommend having a bash script to do this, e.g. "exports": +// +// #!/bin/bash +// nm -D "$@" | perl -ne 'print if s/^\S+ T //' +// +// Then typing "exports libexport-api.so" will dump a plain list of the exported symbols. +// That can be piped into perl or sed again to add the X-macro stuff, e.g.: +// +// exports libexport-api.so | perl -ne 'chomp; print " func($_) \\\n"' +// +// Running that command would produce the exact text you can use for the implementation of this +// macro. Don't forget to leave at least one blank line after the macro so the backslash on the +// last line doesn't connect the macro to the next line of code afterwards. +// +// Double-check when generating the list of exports from libexport-api.so that it does in fact +// contain the expected number of exported functions!!! If you automate generating this macro +// as part of the build, then failure to export some symbols would result in failure to include +// them in this list of symbols to test! +// +#define FOR_EACH_EXPORT(func) \ + func(nvtxDomainCreateA) \ + func(nvtxDomainCreateW) \ + func(nvtxDomainDestroy) \ + func(nvtxDomainMarkEx) \ + func(nvtxDomainNameCategoryA) \ + func(nvtxDomainNameCategoryW) \ + func(nvtxDomainRangeEnd) \ + func(nvtxDomainRangePop) \ + func(nvtxDomainRangePushEx) \ + func(nvtxDomainRangeStartEx) \ + func(nvtxDomainRegisterStringA) \ + func(nvtxDomainRegisterStringW) \ + func(nvtxDomainResourceCreate) \ + func(nvtxDomainResourceDestroy) \ + func(nvtxDomainSyncUserAcquireFailed) \ + func(nvtxDomainSyncUserAcquireStart) \ + func(nvtxDomainSyncUserAcquireSuccess) \ + func(nvtxDomainSyncUserCreate) \ + func(nvtxDomainSyncUserDestroy) \ + func(nvtxDomainSyncUserReleasing) \ + func(nvtxInitialize) \ + func(nvtxMarkA) \ + func(nvtxMarkEx) \ + func(nvtxMarkW) \ + func(nvtxNameCategoryA) \ + func(nvtxNameCategoryW) \ + func(nvtxNameClCommandQueueA) \ + func(nvtxNameClCommandQueueW) \ + func(nvtxNameClContextA) \ + func(nvtxNameClContextW) \ + func(nvtxNameClDeviceA) \ + func(nvtxNameClDeviceW) \ + func(nvtxNameClEventA) \ + func(nvtxNameClEventW) \ + func(nvtxNameClMemObjectA) \ + func(nvtxNameClMemObjectW) \ + func(nvtxNameClProgramA) \ + func(nvtxNameClProgramW) \ + func(nvtxNameClSamplerA) \ + func(nvtxNameClSamplerW) \ + func(nvtxNameCuContextA) \ + func(nvtxNameCuContextW) \ + func(nvtxNameCuDeviceA) \ + func(nvtxNameCuDeviceW) \ + func(nvtxNameCuEventA) \ + func(nvtxNameCuEventW) \ + func(nvtxNameCuStreamA) \ + func(nvtxNameCuStreamW) \ + func(nvtxNameCudaDeviceA) \ + func(nvtxNameCudaDeviceW) \ + func(nvtxNameCudaEventA) \ + func(nvtxNameCudaEventW) \ + func(nvtxNameCudaStreamA) \ + func(nvtxNameCudaStreamW) \ + func(nvtxNameOsThreadA) \ + func(nvtxNameOsThreadW) \ + func(nvtxRangeEnd) \ + func(nvtxRangePop) \ + func(nvtxRangePushA) \ + func(nvtxRangePushEx) \ + func(nvtxRangePushW) \ + func(nvtxRangeStartA) \ + func(nvtxRangeStartEx) \ + func(nvtxRangeStartW) \ + +// ^ Above line must be left blank, since last line of macro ends with a backslash + +template +FnPtr GetExport( + DLL_HANDLE hDll, + const char* fnName, + std::vector& found, + std::vector& missing) +{ + FnPtr pfn = (FnPtr)GET_DLL_FUNC(hDll, fnName); + if (pfn) + { + found.push_back(fnName); + } + else + { + missing.push_back(fnName); + } + return pfn; +} + +#define DEFINE_AND_GET_FN_PTR_FOR_EXPORT(fn) \ + auto pfn_##fn = GetExport(hDll, #fn, foundFuncs, missingFuncs); + +extern "C" NVTX_DYNAMIC_EXPORT +int RunTest(int argc, const char** argv) +{ + NVTX_EXPORT_UNMANGLED_FUNCTION_NAME + + bool verbose = false; + const std::string verboseArg = "-v"; + for (; *argv; ++argv) + { + if (*argv == verboseArg) verbose = true; + } + + if (verbose) std::cout << "-------------------------------------\n"; + + // Construct abs path to export-api library + std::string exportApiLib = AbsolutePathToLibraryInCurrentProcessPath("export-api"); + + // Load export-api library + DLL_HANDLE hDll = DLL_OPEN(exportApiLib.c_str()); + if (!hDll) return 201; + + std::vector foundFuncs, missingFuncs; + + // For each export, try to GET_DLL_FUNC for it + // - Don't early-out, print list of all failed exports + + //auto pfn_nvtxMarkA = GetExport(hDll, "nvtxMarkA", foundFuncs, missingFuncs); + //auto pfn_nvtxDomainCreateA = GetExport(hDll, "nvtxDomainCreateA", foundFuncs, missingFuncs); + // ... + FOR_EACH_EXPORT(DEFINE_AND_GET_FN_PTR_FOR_EXPORT) + + if (verbose) std::cout << " - Got non-zero pointers for " << foundFuncs.size() << " NVTX functions.\n"; + + if (verbose) std::cout << " - Trying to call some NVTX functions through the exports...\n"; + + // For a few simple functions, try calling them through function pointers with + // harmless args. If the calling conventions are wrong, these calls will crash. + // If they are working, the NVTX injection should load and print something. + if (pfn_nvtxMarkA) + { + pfn_nvtxMarkA("Testing nvtxMarkA"); + } + + if (pfn_nvtxDomainCreateA) + { + auto hDomain = pfn_nvtxDomainCreateA("Testing nvtxDomainCreateA"); + (void)hDomain; + } + + if (verbose) std::cout << " - Survived calling NVTX functions.\n"; + + if (!missingFuncs.empty()) + { + if (verbose) + { + std::cout << "Missing exports:\n"; + for (auto fnName : missingFuncs) + { + std::cout << " " << fnName << "\n"; + } + } + return 202; + } + + if (verbose) std::cout << "-------------------------------------\n"; + + return 0; +} diff --git a/tests/mingw-w64-x86_64.cmake b/tests/mingw-w64-x86_64.cmake new file mode 100644 index 0000000..39c3be5 --- /dev/null +++ b/tests/mingw-w64-x86_64.cmake @@ -0,0 +1,15 @@ +set(CMAKE_SYSTEM_NAME Windows) +set(CMAKE_SYSTEM_PROCESSOR x86_64) + +set(TOOLCHAIN_PREFIX x86_64-w64-mingw32) + +set(CMAKE_C_COMPILER ${TOOLCHAIN_PREFIX}-gcc) +set(CMAKE_CXX_COMPILER ${TOOLCHAIN_PREFIX}-g++) +set(CMAKE_Fortran_COMPILER ${TOOLCHAIN_PREFIX}-gfortran) +set(CMAKE_RC_COMPILER ${TOOLCHAIN_PREFIX}-windres) + +set(CMAKE_FIND_ROOT_PATH /usr/${TOOLCHAIN_PREFIX}) + +set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)