forked from huggingface/text-generation-inference
-
Notifications
You must be signed in to change notification settings - Fork 3
/
test.sh
executable file
·21 lines (18 loc) · 901 Bytes
/
test.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
#!/bin/bash
# docker run --entrypoint ./entrypoint.sh \
# --gpus '"device=0,1"' --shm-size 1g --publish 9123:80 \
# --volume /shd/zzr/models:/models \
# --volume /nvme/zzr/text-generation-inference:/usr/src \
# ghcr.nju.edu.cn/huggingface/text-generation-inference:1.4 \
docker run --gpus '"device=0,1"' --shm-size 1g --publish 9123:80 \
--volume /nvme/zzr/text-generation-inference/server/text_generation_server:/opt/conda/lib/python3.10/site-packages/text_generation_server \
--volume /shd/zzr/models:/models \
ghcr.nju.edu.cn/huggingface/text-generation-inference:1.4 \
--model-id /models/codellama-7b \
--num-shard 2 \
--rope-scaling dynamic \
--rope-factor 8 \
--max-input-length 31000 \
--max-total-tokens 32768 \
--max-batch-prefill-tokens 31000 \
--max-stop-sequences 12