2023-06-15 10:55:38 +08:00
|
|
|
#!/bin/bash
|
|
|
|
|
|
2023-06-27 04:15:35 +08:00
|
|
|
PORT=8000
|
2023-06-15 10:55:38 +08:00
|
|
|
MODEL=$1
|
|
|
|
|
TOKENS=$2
|
|
|
|
|
|
2024-11-08 02:17:29 +08:00
|
|
|
docker run -e "HF_TOKEN=$HF_TOKEN" --gpus all --shm-size 1g -p $PORT:80 \
|
|
|
|
|
-v "$PWD/data:/data" \
|
2024-08-28 06:07:53 +08:00
|
|
|
ghcr.io/huggingface/text-generation-inference:2.2.0 \
|
2024-11-08 02:17:29 +08:00
|
|
|
--model-id "$MODEL" \
|
2023-06-15 10:55:38 +08:00
|
|
|
--sharded false \
|
|
|
|
|
--max-input-length 1024 \
|
|
|
|
|
--max-total-tokens 2048 \
|
|
|
|
|
--max-best-of 5 \
|
|
|
|
|
--max-concurrent-requests 5000 \
|
2024-11-08 02:17:29 +08:00
|
|
|
--max-batch-total-tokens "$TOKENS"
|