LMI No-Code tests #293
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: LMI No-Code tests | |
on: | |
workflow_dispatch: | |
inputs: | |
djl-version: | |
description: 'The released version of DJL' | |
required: false | |
default: '' | |
schedule: | |
- cron: '0 8 * * *' | |
jobs: | |
create-runners: | |
runs-on: [self-hosted, scheduler] | |
steps: | |
- name: Create new G5.12xl instance | |
id: create_gpu_g512_1 | |
run: | | |
cd /home/ubuntu/djl_benchmark_script/scripts | |
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \ | |
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \ | |
--fail \ | |
| jq '.token' | tr -d '"' ) | |
./start_instance.sh action_g5 $token djl-serving | |
- name: Create new G5.12xl instance | |
id: create_gpu_g512_2 | |
run: | | |
cd /home/ubuntu/djl_benchmark_script/scripts | |
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \ | |
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \ | |
--fail \ | |
| jq '.token' | tr -d '"' ) | |
./start_instance.sh action_g5 $token djl-serving | |
- name: Create new G6.12xl instance | |
id: create_gpu_g612_1 | |
run: | | |
cd /home/ubuntu/djl_benchmark_script/scripts | |
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \ | |
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \ | |
--fail \ | |
| jq '.token' | tr -d '"' ) | |
./start_instance.sh action_g6 $token djl-serving | |
- name: Create new G6.12xl instance | |
id: create_gpu_g612_2 | |
run: | | |
cd /home/ubuntu/djl_benchmark_script/scripts | |
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \ | |
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \ | |
--fail \ | |
| jq '.token' | tr -d '"' ) | |
./start_instance.sh action_g6 $token djl-serving | |
outputs: | |
g512_instance_id_1: ${{ steps.create_gpu_g512_1.outputs.action_g5_instance_id }} | |
g512_instance_id_2: ${{ steps.create_gpu_g512_2.outputs.action_g5_instance_id }} | |
g612_instance_id_1: ${{ steps.create_gpu_g612_1.outputs.action_g6_instance_id }} | |
g612_instance_id_2: ${{ steps.create_gpu_g612_2.outputs.action_g6_instance_id }} | |
create-runners-p4d: | |
runs-on: [ self-hosted, scheduler ] | |
steps: | |
- name: Create new P4d instance | |
id: create_gpu_p4d | |
run: | | |
cd /home/ubuntu/djl_benchmark_script/scripts | |
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \ | |
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \ | |
--fail \ | |
| jq '.token' | tr -d '"' ) | |
./start_instance.sh action_lmic_p4d $token djl-serving | |
outputs: | |
p4d_instance_id: ${{ steps.create_gpu_p4d.outputs.action_lmic_p4d_instance_id }} | |
create-runners-inf: | |
runs-on: [ self-hosted, scheduler ] | |
steps: | |
- name: Create new Inf2.24x instance | |
id: create_inf2_24x | |
run: | | |
cd /home/ubuntu/djl_benchmark_script/scripts | |
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \ | |
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \ | |
--fail \ | |
| jq '.token' | tr -d '"' ) | |
./start_instance.sh action_inf2 $token djl-serving | |
outputs: | |
inf2_24x_instance_id: ${{ steps.create_inf2_24x.outputs.action_inf2_instance_id }} | |
p4d-no-code-tests: | |
runs-on: [self-hosted, p4d] | |
timeout-minutes: 240 | |
needs: create-runners-p4d | |
strategy: | |
# Limit to 1 so we don't steal a p4d from another test that may be running | |
max-parallel: 1 | |
fail-fast: false | |
matrix: | |
container: [tensorrt-llm, lmi] | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Clean env | |
run: | | |
yes | docker system prune -a --volumes | |
sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/ | |
echo "wait dpkg lock..." | |
while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done | |
- name: Set up Python3 | |
uses: actions/setup-python@v5 | |
with: | |
python-version: '3.10.x' | |
- name: Install pip dependencies | |
run: pip3 install requests "numpy<2" pillow | |
- name: Install s5cmd | |
working-directory: serving/docker | |
run: sudo scripts/install_s5cmd.sh x64 | |
- name: Build container name | |
run: ./serving/docker/scripts/docker_name_builder.sh ${{ matrix.container }} ${{ github.event.inputs.djl-version }} | |
- name: Download models and dockers | |
working-directory: serving/docker | |
run: | | |
docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG | |
- name: Llama3-70b lmi container | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
echo -en "HF_MODEL_ID=s3://djl-llm/llama-3-70b-hf/" > docker_env | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }} \ | |
serve | |
python3 llm/client.py no_code llama-70b | |
./remove_container.sh | |
- name: CodeLlama lmi container | |
working-directory: tests/integration | |
if: ${{ matrix.container == 'lmi' }} | |
run: | | |
rm -rf models | |
echo -en "HF_MODEL_ID=s3://djl-llm/CodeLlama-34b-Instruct-hf/" > docker_env | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }} \ | |
serve | |
python3 llm/client.py no_code codellama | |
./remove_container.sh | |
- name: Mixtral-8x7b | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
echo -en "HF_MODEL_ID=s3://djl-llm/mixtral-8x7b/" > docker_env | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }} \ | |
serve | |
python3 llm/client.py no_code mixtral-8x7b | |
./remove_container.sh | |
- name: DBRX lmi container | |
working-directory: tests/integration | |
if: ${{ matrix.container == 'lmi' }} | |
run: | | |
rm -rf models | |
echo -e "HF_MODEL_ID=s3://djl-llm/dbrx-instruct/" > docker_env | |
echo -e "HF_MODEL_TRUST_REMOTE_CODE=true" >> docker_env | |
echo -e "MODEL_LOADING_TIMEOUT=3600" >> docker_env | |
echo -e "OPTION_GPU_MEMORY_UTILIZATION=0.95" >> docker_env | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }} \ | |
serve | |
python3 llm/client.py no_code dbrx | |
./remove_container.sh | |
- name: On fail step | |
if: ${{ failure() }} | |
working-directory: tests/integration | |
run: | | |
./remove_container.sh || true | |
cat logs/serving.log | |
- name: Upload test logs | |
uses: actions/upload-artifact@v4 | |
with: | |
name: no-code-p4d-${{ matrix.container }}-logs | |
path: tests/integration/logs/ | |
g-series-no-code-tests: | |
runs-on: [self-hosted, "${{ matrix.machine }}"] | |
timeout-minutes: 240 | |
needs: create-runners | |
strategy: | |
fail-fast: false | |
matrix: | |
container: [tensorrt-llm, lmi] | |
machine: [g5, g6] | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Clean env | |
run: | | |
yes | docker system prune -a --volumes | |
sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/ | |
echo "wait dpkg lock..." | |
while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done | |
- name: Set up Python3 | |
uses: actions/setup-python@v5 | |
with: | |
python-version: '3.10.x' | |
- name: Install pip dependencies | |
run: pip3 install requests "numpy<2" huggingface_hub pillow | |
- name: Install s5cmd | |
working-directory: serving/docker | |
run: sudo scripts/install_s5cmd.sh x64 | |
- name: Build container name | |
run: ./serving/docker/scripts/docker_name_builder.sh ${{ matrix.container }} ${{ github.event.inputs.djl-version }} | |
- name: Download models and dockers | |
working-directory: serving/docker | |
run: | | |
docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG | |
- name: Llama2-7b with tgi compat | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
echo -en "HF_MODEL_ID=s3://djl-llm/llama-2-7b-hf/\nOPTION_TGI_COMPAT=true" > docker_env | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }} \ | |
serve | |
python3 llm/tgi_client.py | |
./remove_container.sh | |
- name: Llama3-8b lmi container | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
echo -en "HF_MODEL_ID=s3://djl-llm/llama-3-8b-hf/" > docker_env | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }} \ | |
serve | |
python3 llm/client.py no_code llama-7b | |
./remove_container.sh | |
- name: Llama2-13b lmi container | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
echo -en "HF_MODEL_ID=s3://djl-llm/llama-2-13b-hf/" > docker_env | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }} \ | |
serve | |
python3 llm/client.py no_code llama-13b | |
./remove_container.sh | |
- name: Gemma-2b lmi container | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
# TODO: Fix tp back to gemma 7b once TRTLLM problem | |
# https://github.com/NVIDIA/TensorRT-LLM/issues/2058 fixed | |
echo -en "HF_MODEL_ID=s3://djl-llm/gemma-2b/\nTENSOR_PARALLEL_DEGREE=1" > docker_env | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }} \ | |
serve | |
python3 llm/client.py no_code gemma-7b | |
./remove_container.sh | |
- name: Mistral-7b lmi container | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
echo -en "HF_MODEL_ID=s3://djl-llm/mistral-7b/" > docker_env | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }} \ | |
serve | |
python3 llm/client.py no_code mistral-7b | |
./remove_container.sh | |
- name: GPTNeox lmi container | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
echo -en "HF_MODEL_ID=s3://djl-llm/gpt-neox-20b/" > docker_env | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }} \ | |
serve | |
python3 llm/client.py no_code gpt-neox | |
./remove_container.sh | |
- name: Phi2 lmi container | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
echo -en "HF_MODEL_ID=s3://djl-llm/phi-2/\nTENSOR_PARALLEL_DEGREE=1" > docker_env | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }} \ | |
serve | |
python3 llm/client.py no_code phi-2 | |
./remove_container.sh | |
- name: Baichuan lmi container | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
echo -en "HF_MODEL_ID=s3://djl-llm/baichuan2-13b/\nHF_MODEL_TRUST_REMOTE_CODE=true" > docker_env | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }} \ | |
serve | |
python3 llm/client.py no_code baichuan-13b | |
./remove_container.sh | |
- name: Qwen-1.5 lmi container | |
working-directory: tests/integration | |
if: ${{ matrix.container == 'lmi' }} | |
run: | | |
rm -rf models | |
echo -en "HF_MODEL_ID=Qwen/Qwen1.5-14B" > docker_env | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }} \ | |
serve | |
python3 llm/client.py no_code qwen-1.5-14b | |
./remove_container.sh | |
- name: Starcoder2 lmi container | |
working-directory: tests/integration | |
if: ${{ matrix.container == 'lmi' }} | |
run: | | |
rm -rf models | |
echo -en "HF_MODEL_ID=s3://djl-llm/bigcode-starcoder2/" > docker_env | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }} \ | |
serve | |
python3 llm/client.py no_code starcoder | |
./remove_container.sh | |
- name: On fail step | |
if: ${{ failure() }} | |
working-directory: tests/integration | |
run: | | |
cat logs/serving.log | |
- name: Upload test logs | |
uses: actions/upload-artifact@v4 | |
with: | |
name: no-code-${{ matrix.machine }}-${{ matrix.container }}-logs | |
path: tests/integration/logs/ | |
inf2-series-no-code-tests: | |
runs-on: [self-hosted, "${{ matrix.machine }}"] | |
timeout-minutes: 240 | |
needs: create-runners | |
strategy: | |
fail-fast: false | |
matrix: | |
container: [pytorch-inf2] | |
machine: [inf2] | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Clean env | |
run: | | |
yes | docker system prune -a --volumes | |
sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/ | |
echo "wait dpkg lock..." | |
while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done | |
- name: Set up Python3 | |
uses: actions/setup-python@v5 | |
with: | |
python-version: '3.10.x' | |
- name: Install pip dependencies | |
run: pip3 install requests "numpy<2" huggingface_hub pillow | |
- name: Install s5cmd | |
working-directory: serving/docker | |
run: sudo scripts/install_s5cmd.sh x64 | |
- name: Build container name | |
run: ./serving/docker/scripts/docker_name_builder.sh ${{ matrix.container }} ${{ github.event.inputs.djl-version }} | |
- name: Download models and dockers | |
working-directory: serving/docker | |
run: | | |
docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG | |
- name: Llama2-7b with tgi compat | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
echo -en "HF_MODEL_ID=s3://djl-llm/llama-2-7b-hf/\nOPTION_TGI_COMPAT=true" > docker_env | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }}-2 \ | |
serve | |
python3 llm/tgi_client.py | |
./remove_container.sh | |
- name: Llama3-8b lmi container | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
echo -en "HF_MODEL_ID=s3://djl-llm/llama-3-8b-hf/" > docker_env | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }}-2 \ | |
serve | |
python3 llm/client.py no_code llama-7b | |
./remove_container.sh | |
- name: Mistral-7b lmi container | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
echo -en "HF_MODEL_ID=s3://djl-llm/mistral-7b/" > docker_env | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }}-2 \ | |
serve | |
python3 llm/client.py no_code mistral-7b | |
./remove_container.sh | |
- name: On fail step | |
if: ${{ failure() }} | |
working-directory: tests/integration | |
run: | | |
cat logs/serving.log | |
- name: Upload test logs | |
uses: actions/upload-artifact@v4 | |
with: | |
name: no-code-${{ matrix.machine }}-${{ matrix.container }}-logs | |
path: tests/integration/logs/ | |
stop-runners-gseries: | |
if: always() | |
runs-on: [self-hosted, scheduler] | |
needs: [create-runners, g-series-no-code-tests] | |
steps: | |
- name: Stop all instances | |
run: | | |
cd /home/ubuntu/djl_benchmark_script/scripts | |
instance_id=${{ needs.create-runners.outputs.g512_instance_id_1 }} | |
./stop_instance.sh $instance_id | |
instance_id=${{ needs.create-runners.outputs.g512_instance_id_2 }} | |
./stop_instance.sh $instance_id | |
instance_id=${{ needs.create-runners.outputs.g612_instance_id_1 }} | |
./stop_instance.sh $instance_id | |
instance_id=${{ needs.create-runners.outputs.g612_instance_id_2 }} | |
./stop_instance.sh $instance_id | |
stop-runners-p4d: | |
if: always() | |
runs-on: [self-hosted, scheduler] | |
needs: [create-runners-p4d, p4d-no-code-tests] | |
steps: | |
- name: Stop all instances | |
run: | | |
cd /home/ubuntu/djl_benchmark_script/scripts | |
instance_id=${{ needs.create-runners-p4d.outputs.p4d_instance_id }} | |
./stop_instance.sh $instance_id | |
stop-runners-inf2: | |
if: always() | |
runs-on: [self-hosted, scheduler] | |
needs: [create-runners-inf, inf2-series-no-code-tests] | |
steps: | |
- name: Stop all instances | |
run: | | |
cd /home/ubuntu/djl_benchmark_script/scripts | |
instance_id=${{ needs.create-runners-inf.outputs.inf2_24x_instance_id }} | |
./stop_instance.sh $instance_id |