Skip to content

Improve performance of reduction for small number of elements to reduce for types where tree-reduction is needed #2880

Improve performance of reduction for small number of elements to reduce for types where tree-reduction is needed

Improve performance of reduction for small number of elements to reduce for types where tree-reduction is needed #2880

name: Build with Open Source LLVM SYCL compiler
on:
pull_request:
push:
branches: [master]
jobs:
install-compiler:
name: Build with nightly build of DPC++ toolchain
runs-on: ubuntu-22.04
env:
DOWNLOAD_URL_PREFIX: https://github.com/intel/llvm/releases/download
DRIVER_PATH: 2023-WW27
OCLCPUEXP_FN: oclcpuexp-2023.16.6.0.28_rel.tar.gz
FPGAEMU_FN: fpgaemu-2023.16.6.0.28_rel.tar.gz
TBB_URL: https://github.com/oneapi-src/oneTBB/releases/download/v2021.9.0/
TBB_INSTALL_DIR: oneapi-tbb-2021.9.0
TBB_FN: oneapi-tbb-2021.9.0-lin.tgz
steps:
- name: Cancel Previous Runs
uses: styfle/cancel-workflow-action@0.11.0
with:
access_token: ${{ github.token }}
- name: Cache sycl bundle
id: cache-sycl-bundle
uses: actions/cache@v3
with:
path: |
/home/runner/work/sycl_bundle
key: ${{ runner.os }}-build-${{ env.cache-name }}-${{ hashFiles('/home/runner/work/sycl_bundle/bundle_id.txt') }}
restore-keys: |
${{ runner.os }}-build-${{ env.cache-name }}-
${{ runner.os }}-build-
${{ runner.os }}-
- name: Download and install nightly and components
env:
ARTIFACT_NAME: sycl_linux
USE_LATEST_SYCLOS: 1
shell: bash -l {0}
run: |
cd /home/runner/work
mkdir -p sycl_bundle
cd sycl_bundle
if [[ "${USE_LATEST_SYCLOS:-0}" -eq "1" ]]; then
# get list of shas and tags from remote, filter nightly tags and reverse order
export LLVM_TAGS=$(git -c 'versionsort.suffix=-' ls-remote --tags --sort='v:refname' https://github.com/intel/llvm.git | \
grep 'refs/tags/nightly-' | awk '{a[i++]=$0} END {for (j=i-1; j>=0;) print a[j--] }')
# initialize
unset DEPLOY_NIGHTLY_TAG
unset DEPLOY_NIGHTLY_TAG_SHA
# go through tags and find the most recent one where nighly build binary is available
while IFS= read -r NEXT_LLVM_TAG; do
export NEXT_LLVM_TAG_SHA=$(echo ${NEXT_LLVM_TAG} | awk '{print $1}')
export NEXT_NIGHTLY_TAG=$(python3 -c "import sys, urllib.parse as ul; print (ul.quote_plus(sys.argv[1]))" \
$(echo ${NEXT_LLVM_TAG} | awk '{gsub(/^refs\/tags\//, "", $2)} {print $2}'))
if [[ `wget -S --spider ${DOWNLOAD_URL_PREFIX}/${NEXT_NIGHTLY_TAG}/${ARTIFACT_NAME}.tar.gz 2>&1 | grep 'HTTP/1.1 200 OK'` ]];
then
export DEPLOY_NIGHTLY_TAG=${NEXT_NIGHTLY_TAG}
export DEPLOY_LLVM_TAG_SHA=${NEXT_LLVM_TAG_SHA}
break
fi
done <<< "${LLVM_TAGS}"
else
# Use latest known to work tag instead
export DEPLOY_NIGHTLY_TAG="sycl-nightly%2F20230606"
export DEPLOY_LLVM_TAG_SHA=f44d0133d4b0077298f034697a1f3818ff1d6134
fi
[[ -n "${DEPLOY_NIGHTLY_TAG}" ]] || exit 1
[[ -n "${DEPLOY_LLVM_TAG_SHA}" ]] || exit 1
echo "Using ${DEPLOY_NIGHTLY_TAG} corresponding to intel/llvm at ${DEPLOY_LLVM_TAG_SHA}"
if [[ -f bundle_id.txt && ( "$(cat bundle_id.txt)" == "${DEPLOY_LLVM_TAG_SHA}" ) ]]; then
echo "Using cached download of ${DEPLOY_LLVM_TAG_SHA}"
else
rm -rf ${ARTIFACT_NAME}.tar.gz
wget ${DOWNLOAD_URL_PREFIX}/${DEPLOY_NIGHTLY_TAG}/${ARTIFACT_NAME}.tar.gz && echo ${DEPLOY_LLVM_TAG_SHA} > bundle_id.txt || rm -rf bundle_id.txt
[ -f ${OCLCPUEXP_FN} ] || wget ${DOWNLOAD_URL_PREFIX}/${DRIVER_PATH}/${OCLCPUEXP_FN} || rm -rf bundle_id.txt
[ -f ${FPGAEMU_FN} ] || wget ${DOWNLOAD_URL_PREFIX}/${DRIVER_PATH}/${FPGAEMU_FN} || rm -rf bundle_id.txt
[ -f ${TBB_FN} ] || wget ${TBB_URL}/${TBB_FN} || rm -rf bundle_id.txt
rm -rf dpcpp_compiler
mkdir -p dpcpp_compiler
tar xf ${ARTIFACT_NAME}.tar.gz -C dpcpp_compiler
mkdir -p oclcpuexp
mkdir -p fpgaemu
[ -d oclcpuexp/x64 ] || tar xf ${OCLCPUEXP_FN} -C oclcpuexp
[ -d fpgaemu/x64 ] || tar xf ${FPGAEMU_FN} -C fpgaemu
[ -d ${TBB_INSTALL_DIR}/lib ] || tar xf ${TBB_FN}
cp oclcpuexp/x64/libOpenCL.so* dpcpp_compiler/lib/
fi
- name: Install system components
shell: bash -l {0}
run: |
sudo apt-get install libtinfo5
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.11'
architecture: x64
- name: Install dpctl dependencies
shell: bash -l {0}
run: |
pip install numpy"<1.26.0" cython setuptools pytest scikit-build cmake ninja
- name: Checkout repo
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Create set_allvars.sh
shell: bash -l {0}
run: |
cat << 'EOF' > set_allvars.sh
#!/usr/bin/bash
export SYCL_BUNDLE_FOLDER=/home/runner/work/sycl_bundle
export PATH=${SYCL_BUNDLE_FOLDER}/dpcpp_compiler/bin:${PATH}
export LD_LIBRARY_PATH=${SYCL_BUNDLE_FOLDER}/dpcpp_compiler/lib:${LD_LIBRARY_PATH}
export LD_LIBRARY_PATH=${SYCL_BUNDLE_FOLDER}/oclcpuexp/x64:${LD_LIBRARY_PATH}
export LD_LIBRARY_PATH=${SYCL_BUNDLE_FOLDER}/fpgaemu/x64:${LD_LIBRARY_PATH}
export LD_LIBRARY_PATH=${SYCL_BUNDLE_FOLDER}/${TBB_INSTALL_DIR}/lib/intel64/gcc4.8:${LD_LIBRARY_PATH}
export OCL_ICD_VENDORS=
export OCL_ICD_FILENAMES=libintelocl.so:libintelocl_emu.so
EOF
chmod +x set_allvars.sh
cat set_allvars.sh
- name: Report compiler version
shell: bash -l {0}
run: |
source set_allvars.sh
clang++ --version
- name: Run sycl-ls
shell: bash -l {0}
run: |
source set_allvars.sh
sycl-ls
- name: build dpctl
shell: bash -l {0}
run: |
source set_allvars.sh
CC=clang CXX=clang++ python setup.py develop -G Ninja
- name: Run lsplatforms
shell: bash -l {0}
run: |
source set_allvars.sh
python -m dpctl -f || exit 1
- name: Run dpctl/tests
shell: bash -l {0}
env:
SYCL_QUEUE_THREAD_POOL_SIZE: 6
run: |
source set_allvars.sh
# skip test due to https://github.com/intel/llvm/issues/9264
python -m pytest -v dpctl/tests -k "not test_event_backend"