{% set docker_image = "us-central1-docker.pkg.dev/vllm-405802/vllm-ci-test-repo/vllm-test:$BUILDKITE_COMMIT" %} {% set default_num_gpu = 1 %} {% set default_working_dir = "/vllm-workspace/tests" %} steps: - label: ":docker: build image" commands: - "docker build --build-arg max_jobs=16 --tag {{ docker_image }} --target test --progress plain ." - "docker push {{ docker_image }}" env: DOCKER_BUILDKIT: "1" retry: automatic: - exit_status: -1 # Agent was lost limit: 5 - exit_status: -10 # Agent was lost limit: 5 - wait - group: "AMD Tests" depends_on: ~ steps: {% for step in steps %} {% if step.mirror_hardwares and "amd" in step.mirror_hardwares %} - label: "AMD: {{ step.label }}" agents: queue: amd command: bash .buildkite/run-amd-test.sh "cd {{ (step.working_dir or default_working_dir) | safe }} ; {{ step.command or (step.commands | join(" ; ")) | safe }}" env: DOCKER_BUILDKIT: "1" soft_fail: true {% endif %} {% endfor %} - label: "Neuron Test" depends_on: ~ agents: queue: neuron command: bash .buildkite/run-neuron-test.sh soft_fail: false - label: "Intel Test" depends_on: ~ agents: queue: intel command: bash .buildkite/run-cpu-test.sh {% for step in steps %} - label: "{{ step.label }}" agents: queue: kubernetes soft_fail: {{ step.soft_fail or false }} {% if step.parallelism %} parallelism: {{ step.parallelism }} {% endif %} retry: automatic: - exit_status: -1 # Agent was lost limit: 5 - exit_status: -10 # Agent was lost limit: 5 plugins: - kubernetes: podSpec: {% if step.num_gpus %} priorityClassName: gpu-priority-cls-{{ step.num_gpus }} {% endif %} volumes: - name: dshm emptyDir: medium: Memory containers: - image: "{{ docker_image }}" command: ["bash"] args: - '-c' - "'cd {{ (step.working_dir or default_working_dir) | safe }} && {{ step.command or (step.commands | join(' && ')) | safe }}'" {% if not step.no_gpu %} resources: requests: nvidia.com/gpu: "{{ step.num_gpus or default_num_gpu }}" limits: nvidia.com/gpu: "{{ step.num_gpus or default_num_gpu }}" {% endif %} env: - name: VLLM_USAGE_SOURCE value: ci-test - name: HF_TOKEN valueFrom: secretKeyRef: name: hf-token-secret key: token volumeMounts: - mountPath: /dev/shm name: dshm {% endfor %}