Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add more version support in integration-tests.yml #2

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
136 changes: 121 additions & 15 deletions .github/workflows/integration-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,26 +23,25 @@ jobs:
matrix-required: ${{ steps.set-matrix.outputs.matrix-required }}
matrix-optional: ${{ steps.set-matrix.outputs.matrix-optional }}
steps:
- name: Prepare runner matrix
- name: Prepare matrix
id: set-matrix
run: |
if [ x"${{ github.repository }}" == x"openai/triton" ]; then
echo '::set-output name=matrix-required::[["self-hosted", "A100"], ["self-hosted", "H100"]]'
echo '::set-output name=matrix-optional::[["self-hosted", "gfx908"], ["self-hosted", "arc770"]]'
echo 'matrix-required={"runner": [["self-hosted", "A100"], ["self-hosted", "H100"]], "python-version": ["3.11"], "cuda-version": ["12.1"], "cc": ["clang"]}' >> "$GITHUB_OUTPUT"
echo 'matrix-optional={"runner": [["self-hosted", "gfx908"], ["self-hosted", "arc770"]], "python-version": ["3.11"], "cuda-version": ["12.1"], "cc": ["clang"]}' >> "$GITHUB_OUTPUT"
else
echo '::set-output name=matrix-required::["ubuntu-latest"]'
echo '::set-output name=matrix-optional::["ubuntu-latest"]'
echo 'matrix-required={"runner":["ubuntu-latest", "windows-latest"], "python-version": ["3.8", "3.9", "3.10", "3.11"], "cuda-version": ["11.8.89", "12.1.1"], "cc": ["clang"]}' >> "$GITHUB_OUTPUT"
echo 'matrix-optional={"runner":["ubuntu-latest", "windows-latest"], "python-version": ["3.8", "3.9", "3.10", "3.11"], "cuda-version": ["11.8.89", "12.1.1"], "cc": ["clang"]}' >> "$GITHUB_OUTPUT"
fi

Integration-Tests:
needs: Runner-Preparation

runs-on: ${{ matrix.runner }}
timeout-minutes: 20
timeout-minutes: 60

strategy:
matrix:
runner: ${{fromJson(needs.Runner-Preparation.outputs.matrix-required)}}
matrix: ${{fromJson(needs.Runner-Preparation.outputs.matrix-required)}}

steps:
- name: Checkout
Expand All @@ -55,15 +54,99 @@ jobs:
echo "BACKEND=CUDA" >> "${GITHUB_ENV}"
echo "TRITON_DISABLE_LINE_INFO=1" >> "${GITHUB_ENV}"

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}

- name: Set up MSVC
if: matrix.runner == 'windows-latest'
uses: ilammy/msvc-dev-cmd@v1.12.1
with:
arch: amd64

- name: Setup Micromamba
uses: mamba-org/setup-micromamba@v1
if: matrix.runner[0] != 'self-hosted'
with:
environment-name: triton-env
init-shell: bash
create-args: >-
typer
ca-certificates
certifi
openssl
zlib
zstd
llvm>=17.0
condarc: |
channels:
- nvidia/label/cuda-${{ matrix.cuda-version }}
- conda-forge
- pytorch
channel_priority: strict

- name: set Environment Variables (Windows)
if: matrix.runner == 'windows-latest'
shell: bash -el {0}
run: |
ver=4017f04e
curl -L -O https://github.com/wkpark/triton/releases/download/llvm-$ver-windows/llvm-$ver-windows-x64.tar.gz
curl -L -O https://github.com/wkpark/triton/releases/download/llvm-$ver-windows/llvm-fix.patch
tar xvf llvm-$ver-windows-x64.tar.gz
mv llvm-$ver-windows-x64 LLVM
patch -p0 < llvm-fix.patch
echo "LLVM_SYSPATH=${{ github.workspace }}\\LLVM" >> "$GITHUB_ENV"
rm -f llvm-$ver-windows-x64.tar.gz

### LLVM_SHORTHASH="$(cat cmake/llvm-hash.txt | cut -c1-8)"
# prepare LLVM prebuilt path. will be downloaded and extracted by setup.py step
### echo "~/.triton/llvm/llvm-$LLVM_SHORTHASH-windows-x64/bin" >> "$GITHUB_PATH"
#echo "LLVM_SYSPATH=~/.triton/llvm/llvm-$LLVM_SHORTHASH-windows-x64" >> "$GITHUB_ENV"
# compile with a selected matrix.cc
if [ "${{matrix.cc}}" = "cl" ]; then
echo "CC=cl" >> "${GITHUB_ENV}"
echo "CXX=cl" >> "${GITHUB_ENV}"
elif [ "${{matrix.cc}}" = "clang" ]; then
echo "CC=clang" >> "${GITHUB_ENV}"
echo "CXX=clang++" >> "${GITHUB_ENV}"
fi

- name: CUDA Setup ${{ matrix.cuda-version }}
if: matrix.runner[0] != 'self-hosted'
shell: bash -el {0}
run: |
CUDA_HOME="${{ env.MAMBA_ROOT_PREFIX }}/envs/bnb-env"
echo CUDA_HOME=$CUDA_HOME >> "$GITHUB_ENV"
echo CUDA_PATH=$CUDA_HOME >> "$GITHUB_ENV"

- name: Update environment
if: matrix.runner[0] != 'self-hosted'
shell: bash
run: |
echo "BACKEND=CUDA" >> "${GITHUB_ENV}"
echo "ENABLE_TMA=0" >> "${GITHUB_ENV}"
echo "TRITON_DISABLE_LINE_INFO=1" >> "${GITHUB_ENV}"

- name: Set reusable strings
# Turn repeated input strings (such as the build output directory) into step outputs. These step outputs can be used throughout the workflow file.
id: strings
shell: bash
run: |
echo "build-output-dir=${{ github.workspace }}/build" >> "$GITHUB_OUTPUT"

- name: Clear cache
shell: bash
run: |
rm -rf ~/.triton

- name: Update PATH
if: matrix.runner[0] == 'self-hosted'
run: |
echo "PATH=${HOME}/.local/bin:${PATH}" >> "${GITHUB_ENV}"

- name: Check pre-commit
shell: bash
run: |
python3 -m pip install --upgrade pre-commit
# TODO: ignore the first yapf failure until https://github.com/google/yapf/issues/1164 is fixed
Expand All @@ -73,17 +156,29 @@ jobs:
python3 -m pre_commit run --all-files --verbose

- name: Install Triton
if: ${{ env.BACKEND == 'CUDA'}}
if: matrix.runner != 'windows-latest'
run: |
cd python
python3 -m pip install --upgrade pip
python3 -m pip install cmake==3.24 ninja pytest-xdist
python3 -m pip install cmake==3.24 ninja pytest-xdist wheel
sudo apt-get update -y
sudo apt-get install -y ccache clang lld
TRITON_BUILD_WITH_CLANG_LLD=true TRITON_BUILD_WITH_CCACHE=true python3 -m pip install --no-build-isolation -vvv '.[tests]'
if [ "${{ matrix.runner }}" = 'ubuntu-latest' ]; then
python3 setup.py bdist_wheel
fi

- name: Install Triton (Windows)
if: matrix.runner == 'windows-latest'
run: |
cd python
python -m pip install --upgrade pip
python -m pip install cmake==3.24 ninja pytest-xdist wheel
python -m pip install --no-build-isolation -vvv .
python setup.py bdist_wheel

- name: Run lit tests
if: ${{ env.BACKEND == 'CUDA'}}
if: matrix.runner[0] == 'self-hosted' && env.BACKEND == 'CUDA'
run: |
python3 -m pip install lit
cd python
Expand All @@ -94,7 +189,7 @@ jobs:
lit -v "${LIT_TEST_DIR}"

- name: Run python tests on CUDA
if: ${{ env.BACKEND == 'CUDA' }}
if: ${{ (matrix.runner[0] == 'self-hosted') && env.BACKEND == 'CUDA' }}
run: |
cd python/test/unit
python3 -m pytest -vvv -n 8 --ignore=runtime --ignore=operators --ignore=language/test_line_info.py --ignore=language/test_subprocess.py
Expand All @@ -107,10 +202,12 @@ jobs:
python3 -m pytest -vvv hopper/test_flashattention.py

- name: Clear cache
shell: bash
run: |
rm -rf ~/.triton

- name: Run interpreter tests
if: matrix.runner[0] == 'self-hosted'
env:
# TRITON_INTERPRET: "1"
CUA_VISIBLE_DEVICES: ""
Expand All @@ -119,11 +216,19 @@ jobs:
python3 -m pytest -vvv -s operators/test_flash_attention.py

- name: Run partial tests on CUDA
if: ${{ env.BACKEND == 'CUDA' }}
if: ${{ (matrix.runner[0] == 'self-hosted') && env.BACKEND == 'CUDA' }}
run: |
cd python/test/unit
python3 -m pytest -vvv -n 8 operators

- name: Upload Build artifacts
if: matrix.runner[0] != 'self-hosted'
uses: actions/upload-artifact@v3
with:
name: triton-dist ${{ matrix.runner }} python-${{ matrix.python-version }} cuda-${{ matrix.cuda-version }}
path: |
${{ github.workspace }}/python/dist/

- name: Create artifacts archive
if: ${{(matrix.runner[0] == 'self-hosted') && (matrix.runner[1] == 'V100' || matrix.runner[1] == 'A100' || matrix.runner[1] == 'H100')}}
run: |
Expand All @@ -132,20 +237,21 @@ jobs:

- name: Upload artifacts archive
if: ${{(matrix.runner[0] == 'self-hosted') && (matrix.runner[1] == 'V100' || matrix.runner[1] == 'A100' || matrix.runner[1] == 'H100')}}
uses: actions/upload-artifact@v2
uses: actions/upload-artifact@v3
with:
name: artifacts ${{ matrix.runner[1] }}
path: ~/.triton/artifacts.tar.gz

- name: Run CXX unittests
if: ${{ env.BACKEND == 'CUDA'}}
if: ${{(matrix.runner[0] == 'self-hosted') && env.BACKEND == 'CUDA'}}
run: |
cd python
cd "build/$(ls build | grep -i cmake)"
ctest


Compare-artifacts:
if: ${{(github.repository == 'openai/triton')}}
needs: Integration-Tests
timeout-minutes: 5

Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ python/build/
python/triton.egg-info/
python/triton/_C/libtriton.pyd
python/triton/_C/libtriton.so
python/triton/_C/triton.dll

# Backends copied from submodules
python/triton/backends/
Expand Down
38 changes: 34 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,17 @@ set(TRITON_CODEGEN_BACKENDS "" CACHE STRING "Enable different codegen backends")
# used conditionally in this file and by lit tests

# Customized release build type with assertions: TritonRelBuildWithAsserts
set(CMAKE_C_FLAGS_TRITONRELBUILDWITHASSERTS "-O2 -g")
set(CMAKE_CXX_FLAGS_TRITONRELBUILDWITHASSERTS "-O2 -g")
if(NOT MSVC)
set(CMAKE_C_FLAGS_TRITONRELBUILDWITHASSERTS "-O2 -g")
set(CMAKE_CXX_FLAGS_TRITONRELBUILDWITHASSERTS "-O2 -g")
else()
set(CMAKE_C_FLAGS_TRITONRELBUILDWITHASSERTS "/Zi /Ob0 /Od /RTC1")
set(CMAKE_CXX_FLAGS_TRITONRELBUILDWITHASSERTS "/Zi /Ob0 /Od /RTC1")
set(CMAKE_EXE_LINKER_FLAGS_TRITONRELBUILDWITHASSERTS "/debug:fastlink /INCREMENTAL")
set(CMAKE_MODULE_LINKER_FLAGS_TRITONRELBUILDWITHASSERTS "/debug:fastlink /INCREMENTAL")
set(CMAKE_SHARED_LINKER_FLAGS_TRITONRELBUILDWITHASSERTS "/debug:fastlink /INCREMENTAL")
set(CMAKE_STATIC_LINKER_FLAGS_TRITONRELBUILDWITHASSERTS "/debug:fastlink /INCREMENTAL")
endif()

# Default build type
if(NOT CMAKE_BUILD_TYPE)
Expand All @@ -45,7 +54,15 @@ endif()

# Compiler flags
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include)
set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} -D__STDC_FORMAT_MACROS -fPIC -std=gnu++17")
if(NOT MSVC)
if(NOT WIN32)
set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} -D__STDC_FORMAT_MACROS -fPIC -std=gnu++17")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} -D__STDC_FORMAT_MACROS -std=gnu++17 -Wno-deprecated")
endif()
else()
set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} -D__STDC_FORMAT_MACROS /wd4244 /wd4624 /wd4715 /wd4530")
endif()

# Third-party
include_directories(${PYBIND11_INCLUDE_DIR})
Expand Down Expand Up @@ -103,7 +120,11 @@ endfunction()


# Disable warnings that show up in external code (gtest;pybind11)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -Wno-covered-switch-default -fvisibility=hidden")
if(NOT MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -Wno-covered-switch-default -fvisibility=hidden")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /WX-")
endif()

include_directories(".")
include_directories(${MLIR_INCLUDE_DIRS})
Expand Down Expand Up @@ -137,6 +158,8 @@ if(TRITON_BUILD_PYTHON_MODULE)

if(PYTHON_INCLUDE_DIRS)
include_directories(${PYTHON_INCLUDE_DIRS})
message(STATUS "PYTHON_LIB_DIRS ${PYTHON_LIB_DIRS}")
link_directories(${PYTHON_LIB_DIRS})
else()
find_package(Python3 REQUIRED COMPONENTS Development Interpreter)
include_directories(${Python3_INCLUDE_DIRS})
Expand Down Expand Up @@ -203,6 +226,8 @@ if(TRITON_BUILD_PYTHON_MODULE)
target_link_libraries(triton PUBLIC ${TRITON_LIBRARIES})
if(WIN32)
target_link_libraries(triton PRIVATE ${CMAKE_DL_LIBS})
set_target_properties(triton PROPERTIES SUFFIX ".pyd")
set_target_properties(triton PROPERTIES PREFIX "lib")
else()
target_link_libraries(triton PRIVATE z)
endif()
Expand All @@ -220,6 +245,11 @@ if(TRITON_BUILD_PYTHON_MODULE AND NOT WIN32)
target_link_libraries(triton PRIVATE ${PYTHON_LDFLAGS})
endif()

if(WIN32)
option(CMAKE_USE_WIN32_THREADS_INIT "using WIN32 threads" ON)
option(gtest_disable_pthreads "Disable uses of pthreads in gtest." ON)
endif()

add_subdirectory(bin)
add_subdirectory(test)
add_subdirectory(unittest)
1 change: 1 addition & 0 deletions bin/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ mlir_check_all_link_libraries(triton-lsp)


add_llvm_executable(triton-llvm-opt
PARTIAL_SOURCES_INTENDED
triton-llvm-opt.cpp

DEPENDS
Expand Down
1 change: 1 addition & 0 deletions lib/Dialect/TritonGPU/IR/Dialect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1433,6 +1433,7 @@ MfmaEncodingAttr::getShapePerCTATileForDotOperands(ArrayRef<int64_t> shape,
return {32, parentShapePerCTA[1]};
} else {
assert(0 && "DotOperandEncodingAttr opIdx must be 0 or 1");
return {};
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -634,8 +634,8 @@ void mlir::triton::asyncLaunchDots(scf::ForOp forOp) {
lastOp = op;
op = op->getBlock()->getParentOp();
}
return std::distance(lastOp->getBlock()->getParent()->begin(),
lastOp->getBlock()->getIterator());
return (long)std::distance(lastOp->getBlock()->getParent()->begin(),
lastOp->getBlock()->getIterator());
};
/// XXX(Keren): Clean up the following duplicate code with checkDotOp
/// dots to be pipelined
Expand Down
Loading
Loading