common_pod_spec: &common_pod_spec priorityClassName: perf-benchmark nodeSelector: nvidia.com/gpu.product: NVIDIA-A100-SXM4-80GB volumes: - name: devshm emptyDir: medium: Memory - name: hf-cache hostPath: path: /root/.cache/huggingface type: Directory common_container_settings: &common_container_settings command: - bash .buildkite/nightly-benchmarks/run-nightly-suite.sh resources: limits: nvidia.com/gpu: 8 volumeMounts: - name: devshm mountPath: /dev/shm - name: hf-cache mountPath: /root/.cache/huggingface env: - name: VLLM_USAGE_SOURCE value: ci-test - name: HF_HOME value: /root/.cache/huggingface - name: VLLM_SOURCE_CODE_LOC value: /workspace/build/buildkite/vllm/performance-benchmark - name: HF_TOKEN valueFrom: secretKeyRef: name: hf-token-secret key: token steps: - block: ":rocket: Ready for comparing vllm against alternatives? This will take 4 hours." - label: "A100 trt benchmark" priority: 100 agents: queue: A100 plugins: - kubernetes: podSpec: <<: *common_pod_spec containers: - image: nvcr.io/nvidia/tritonserver:24.04-trtllm-python-py3 <<: *common_container_settings - label: "A100 lmdeploy benchmark" priority: 100 agents: queue: A100 plugins: - kubernetes: podSpec: <<: *common_pod_spec containers: - image: openmmlab/lmdeploy:v0.5.0 <<: *common_container_settings - label: "A100 vllm benchmark" priority: 100 agents: queue: A100 plugins: - kubernetes: podSpec: <<: *common_pod_spec containers: - image: vllm/vllm-openai:latest <<: *common_container_settings - label: "A100 tgi benchmark" priority: 100 agents: queue: A100 plugins: - kubernetes: podSpec: <<: *common_pod_spec containers: - image: ghcr.io/huggingface/text-generation-inference:2.1 <<: *common_container_settings - wait - label: "Plot" priority: 100 agents: queue: A100 plugins: - kubernetes: podSpec: <<: *common_pod_spec containers: - image: vllm/vllm-openai:v0.5.0.post1 command: - bash .buildkite/nightly-benchmarks/scripts/nightly-annotate.sh resources: limits: nvidia.com/gpu: 8 volumeMounts: - name: devshm mountPath: /dev/shm env: - name: VLLM_USAGE_SOURCE value: ci-test - name: VLLM_SOURCE_CODE_LOC value: /workspace/build/buildkite/vllm/performance-benchmark - name: HF_TOKEN valueFrom: secretKeyRef: name: hf-token-secret key: token - wait