Canary-GPU #476
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Canary-GPU | |
on: | |
schedule: | |
- cron: '0 4 * * *' | |
workflow_dispatch: | |
inputs: | |
repo-id: | |
description: 'staging repository id to test' | |
required: false | |
default: '' | |
djl-version: | |
description: 'djl version to test' | |
required: false | |
pt-version: | |
description: 'pytorch version to test' | |
required: false | |
default: '' | |
jobs: | |
canary-test-cuda112: | |
if: github.repository == 'deepjavalibrary/djl-demo' | |
runs-on: [ self-hosted, gpu ] | |
container: | |
image: nvidia/cuda:11.2.2-cudnn8-runtime-ubuntu18.04 | |
options: --gpus all --runtime=nvidia | |
env: | |
AWS_REGION: us-east-1 | |
DJL_STAGING: ${{github.event.inputs.repo-id}} | |
DJL_VERSION: ${{github.event.inputs.djl-version}} | |
PT_VERSION: ${{github.event.inputs.pt-version}} | |
timeout-minutes: 30 | |
needs: create-gpu-runner | |
steps: | |
- name: Setup Environment | |
run: | | |
apt-get update | |
apt-get install -y software-properties-common wget libgomp1 | |
- uses: actions/checkout@v3 | |
- name: Set up JDK 11 | |
uses: actions/setup-java@v3 | |
with: | |
java-version: 11 | |
distribution: corretto | |
- name: Test MXNet | |
working-directory: canary | |
run: | | |
set -x | |
DJL_ENGINE=mxnet-native-auto ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=mxnet-native-mkl ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=mxnet-native-cu112mkl ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
- name: Test PyTorch | |
working-directory: canary | |
run: | | |
set -x | |
DJL_ENGINE=pytorch-native-auto PYTORCH_VERSION=$PT_VERSION ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=pytorch-native-auto PYTORCH_VERSION=1.11.0 ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=pytorch-native-auto PYTORCH_VERSION=1.12.1 ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=pytorch-native-auto PYTORCH_VERSION=1.13.1 ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=pytorch-native-auto PYTORCH_VERSION=2.0.1 ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=pytorch-native-auto PYTORCH_PRECXX11=true PYTORCH_VERSION=$PT_VERSION ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=pytorch-native-auto PYTORCH_PRECXX11=true PYTORCH_VERSION=1.11.0 ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=pytorch-native-auto PYTORCH_PRECXX11=true PYTORCH_VERSION=1.12.1 ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=pytorch-native-auto PYTORCH_PRECXX11=true PYTORCH_VERSION=1.13.1 ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=pytorch-native-auto PYTORCH_PRECXX11=true PYTORCH_VERSION=2.0.1 ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=pytorch-native-cpu ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=pytorch-native-cpu-precxx11 ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
# not support lower version of CUDA since 0.22.0, fallback to CPU | |
DJL_ENGINE=pytorch-native-cu117 ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=pytorch-native-cu113 PT_VERSION=1.11.0 ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=pytorch-native-cu116 PT_VERSION=1.12.1 ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=pytorch-native-cu117 PT_VERSION=1.13.1 ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=pytorch-native-cu118 PT_VERSION=2.0.1 ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=pytorch-native-cu117-precxx11 ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=pytorch-native-cu113-precxx11 PT_VERSION=1.11.0 ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=pytorch-native-cu116-precxx11 PT_VERSION=1.12.1 ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=pytorch-native-cu117-precxx11 PT_VERSION=1.13.1 ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=pytorch-native-cu118-precxx11 PT_VERSION=2.0.1 ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
- name: Test Tensorflow | |
working-directory: canary | |
run: | | |
set -x | |
DJL_ENGINE=mxnet-native-auto ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=tensorflow-native-cpu ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
# tensorflow-native-cu113 cannot run on CU112 since 0.22.0 | |
- name: Test Paddle | |
working-directory: canary | |
run: | | |
set -x | |
mkdir -p $HOME/.djl.ai/paddle/2.3.2-cu112-linux-x86_64 | |
DJL_CACHE_DIR=$HOME/.djl.ai/ DJL_ENGINE=paddlepaddle-native-auto \ | |
LD_LIBRARY_PATH=$DJL_CACHE_DIR/paddle/2.3.2-cu112-linux-x86_64 \ | |
./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
mkdir -p $HOME/.djl.ai/paddle/2.3.2-20221103-cu112-linux-x86_64 | |
DJL_CACHE_DIR=$HOME/.djl.ai/ DJL_ENGINE=paddlepaddle-native-cu112 \ | |
LD_LIBRARY_PATH=$DJL_CACHE_DIR/paddle/2.3.2-20221103-cu112-linux-x86_64 \ | |
./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
- name: Test Xgboost GPU | |
working-directory: canary | |
run: | | |
DJL_ENGINE=xgboost-gpu ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
canary-test-cuda113: | |
if: github.repository == 'deepjavalibrary/djl-demo' | |
runs-on: [ self-hosted, gpu ] | |
container: | |
image: nvidia/cuda:11.3.1-cudnn8-runtime-ubuntu18.04 | |
options: --gpus all --runtime=nvidia | |
env: | |
AWS_REGION: us-east-1 | |
DJL_STAGING: ${{github.event.inputs.repo-id}} | |
DJL_VERSION: ${{github.event.inputs.djl-version}} | |
PT_VERSION: ${{github.event.inputs.pt-version}} | |
timeout-minutes: 30 | |
needs: create-gpu-runner | |
steps: | |
- name: Setup Environment | |
run: | | |
apt-get update | |
apt-get install -y software-properties-common wget libgomp1 | |
- uses: actions/checkout@v3 | |
- name: Set up JDK 11 | |
uses: actions/setup-java@v3 | |
with: | |
java-version: 11 | |
distribution: corretto | |
- name: Test MXNet | |
working-directory: canary | |
run: | | |
set -x | |
DJL_ENGINE=mxnet-native-auto ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=mxnet-native-mkl ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
- name: Test PyTorch | |
working-directory: canary | |
run: | | |
set -x | |
DJL_ENGINE=pytorch-native-auto PYTORCH_VERSION=$PT_VERSION ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=pytorch-native-auto PYTORCH_VERSION=2.0.1 PYTORCH_FLAVOR=cu118 ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=pytorch-native-auto PYTORCH_VERSION=2.0.1 PYTORCH_FLAVOR=cu118-precxx11 ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
- name: Test Tensorflow | |
working-directory: canary | |
run: | | |
set -x | |
DJL_ENGINE=tensorflow-native-auto ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=tensorflow-native-cpu ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=tensorflow-native-cu113 ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
- name: Test Paddle | |
working-directory: canary | |
run: | | |
set -x | |
DJL_ENGINE=paddlepaddle-native-auto ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=paddlepaddle-native-cpu ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
- name: Test Xgboost GPU | |
working-directory: canary | |
run: | | |
set -x | |
DJL_ENGINE=xgboost-gpu ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
canary-test-cuda118: | |
if: github.repository == 'deepjavalibrary/djl-demo' | |
runs-on: [ self-hosted, gpu ] | |
container: | |
image: nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu18.04 | |
options: --gpus all --runtime=nvidia | |
env: | |
AWS_REGION: us-east-1 | |
DJL_STAGING: ${{github.event.inputs.repo-id}} | |
DJL_VERSION: ${{github.event.inputs.djl-version}} | |
PT_VERSION: ${{github.event.inputs.pt-version}} | |
timeout-minutes: 30 | |
needs: create-gpu-runner | |
steps: | |
- name: Setup Environment | |
run: | | |
apt-get update | |
apt-get install -y software-properties-common wget libgomp1 | |
- uses: actions/checkout@v3 | |
- name: Set up JDK 11 | |
uses: actions/setup-java@v3 | |
with: | |
java-version: 11 | |
distribution: corretto | |
- name: Test MXNet | |
working-directory: canary | |
run: | | |
set -x | |
DJL_ENGINE=mxnet-native-auto ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=mxnet-native-mkl ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
- name: Test PyTorch | |
working-directory: canary | |
run: | | |
set -x | |
DJL_ENGINE=pytorch-native-auto PYTORCH_VERSION=$PT_VERSION ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=pytorch-native-auto PYTORCH_VERSION=1.11.0 ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=pytorch-native-auto PYTORCH_VERSION=1.12.1 ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=pytorch-native-auto PYTORCH_VERSION=1.13.1 ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=pytorch-native-auto PYTORCH_VERSION=2.0.1 ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=pytorch-native-auto PYTORCH_PRECXX11=true PYTORCH_VERSION=$PT_VERSION ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=pytorch-native-auto PYTORCH_PRECXX11=true PYTORCH_VERSION=1.11.0 ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=pytorch-native-auto PYTORCH_PRECXX11=true PYTORCH_VERSION=1.12.1 ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=pytorch-native-auto PYTORCH_PRECXX11=true PYTORCH_VERSION=1.13.1 ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=pytorch-native-auto PYTORCH_PRECXX11=true PYTORCH_VERSION=2.0.1 ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=pytorch-native-cpu ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=pytorch-native-cpu-precxx11 ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=pytorch-native-cu117 ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=pytorch-native-cu113 PT_VERSION=1.11.0 ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=pytorch-native-cu116 PT_VERSION=1.12.1 ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=pytorch-native-cu117 PT_VERSION=1.13.1 ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=pytorch-native-cu118 PT_VERSION=2.0.1 ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=pytorch-native-cu117-precxx11 ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=pytorch-native-cu113-precxx11 PT_VERSION=1.11.0 ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=pytorch-native-cu116-precxx11 PT_VERSION=1.12.1 ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=pytorch-native-cu117-precxx11 PT_VERSION=1.13.1 ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=pytorch-native-cu118-precxx11 PT_VERSION=2.0.1 ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
- name: Test Tensorflow | |
working-directory: canary | |
run: | | |
set -x | |
DJL_ENGINE=tensorflow-native-auto ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=tensorflow-native-cpu ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
- name: Test Paddle | |
working-directory: canary | |
run: | | |
set -x | |
DJL_ENGINE=paddlepaddle-native-auto ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
DJL_ENGINE=paddlepaddle-native-cpu ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
- name: Test Xgboost GPU | |
working-directory: canary | |
run: | | |
set -x | |
DJL_ENGINE=xgboost-gpu ./gradlew clean run | |
rm -rf /root/.djl.ai/ | |
create-gpu-runner: | |
if: github.repository == 'deepjavalibrary/djl-demo' | |
runs-on: [ self-hosted, scheduler ] | |
steps: | |
- name: Create new GPU instance | |
id: create_gpu | |
run: | | |
cd /home/ubuntu/djl_benchmark_script/scripts | |
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \ | |
https://api.github.com/repos/deepjavalibrary/djl-demo/actions/runners/registration-token \ | |
--fail \ | |
| jq '.token' | tr -d '"' ) | |
./start_instance.sh action_gpu $token djl-demo | |
outputs: | |
gpu_instance_id: ${{ steps.create_gpu.outputs.action_gpu_instance_id }} | |
stop-runners: | |
if: always() | |
runs-on: [ self-hosted, scheduler ] | |
needs: [ create-gpu-runner, canary-test-cuda112, canary-test-cuda113, canary-test-cuda118 ] | |
steps: | |
- name: Stop all instances | |
run: | | |
cd /home/ubuntu/djl_benchmark_script/scripts | |
instance_id=${{ needs.create-gpu-runner.outputs.gpu_instance_id }} | |
./stop_instance.sh $instance_id |