120 lines
3.1 KiB
YAML
120 lines
3.1 KiB
YAML
common_pod_spec: &common_pod_spec
|
|
priorityClassName: perf-benchmark
|
|
nodeSelector:
|
|
nvidia.com/gpu.product: NVIDIA-A100-SXM4-80GB
|
|
volumes:
|
|
- name: devshm
|
|
emptyDir:
|
|
medium: Memory
|
|
- name: hf-cache
|
|
hostPath:
|
|
path: /root/.cache/huggingface
|
|
type: Directory
|
|
|
|
common_container_settings: &common_container_settings
|
|
command:
|
|
- bash .buildkite/nightly-benchmarks/run-nightly-suite.sh
|
|
resources:
|
|
limits:
|
|
nvidia.com/gpu: 8
|
|
volumeMounts:
|
|
- name: devshm
|
|
mountPath: /dev/shm
|
|
- name: hf-cache
|
|
mountPath: /root/.cache/huggingface
|
|
env:
|
|
- name: VLLM_USAGE_SOURCE
|
|
value: ci-test
|
|
- name: HF_HOME
|
|
value: /root/.cache/huggingface
|
|
- name: VLLM_SOURCE_CODE_LOC
|
|
value: /workspace/build/buildkite/vllm/performance-benchmark
|
|
- name: HF_TOKEN
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: hf-token-secret
|
|
key: token
|
|
|
|
steps:
|
|
- block: ":rocket: Ready for comparing vllm against alternatives? This will take 4 hours."
|
|
- label: "A100 trt benchmark"
|
|
priority: 100
|
|
agents:
|
|
queue: A100
|
|
plugins:
|
|
- kubernetes:
|
|
podSpec:
|
|
<<: *common_pod_spec
|
|
containers:
|
|
- image: nvcr.io/nvidia/tritonserver:24.04-trtllm-python-py3
|
|
<<: *common_container_settings
|
|
|
|
- label: "A100 lmdeploy benchmark"
|
|
priority: 100
|
|
agents:
|
|
queue: A100
|
|
plugins:
|
|
- kubernetes:
|
|
podSpec:
|
|
<<: *common_pod_spec
|
|
containers:
|
|
- image: openmmlab/lmdeploy:v0.5.0
|
|
<<: *common_container_settings
|
|
|
|
|
|
- label: "A100 vllm benchmark"
|
|
priority: 100
|
|
agents:
|
|
queue: A100
|
|
plugins:
|
|
- kubernetes:
|
|
podSpec:
|
|
<<: *common_pod_spec
|
|
containers:
|
|
- image: vllm/vllm-openai:latest
|
|
<<: *common_container_settings
|
|
|
|
- label: "A100 tgi benchmark"
|
|
priority: 100
|
|
agents:
|
|
queue: A100
|
|
plugins:
|
|
- kubernetes:
|
|
podSpec:
|
|
<<: *common_pod_spec
|
|
containers:
|
|
- image: ghcr.io/huggingface/text-generation-inference:2.1
|
|
<<: *common_container_settings
|
|
|
|
- wait
|
|
|
|
- label: "Plot"
|
|
priority: 100
|
|
agents:
|
|
queue: A100
|
|
plugins:
|
|
- kubernetes:
|
|
podSpec:
|
|
<<: *common_pod_spec
|
|
containers:
|
|
- image: vllm/vllm-openai:v0.5.0.post1
|
|
command:
|
|
- bash .buildkite/nightly-benchmarks/scripts/nightly-annotate.sh
|
|
resources:
|
|
limits:
|
|
nvidia.com/gpu: 8
|
|
volumeMounts:
|
|
- name: devshm
|
|
mountPath: /dev/shm
|
|
env:
|
|
- name: VLLM_USAGE_SOURCE
|
|
value: ci-test
|
|
- name: VLLM_SOURCE_CODE_LOC
|
|
value: /workspace/build/buildkite/vllm/performance-benchmark
|
|
- name: HF_TOKEN
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: hf-token-secret
|
|
key: token
|
|
|
|
- wait |