60 lines
1.9 KiB
Django/Jinja
60 lines
1.9 KiB
Django/Jinja
{% set docker_image = "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT" %}
|
|
{% set default_working_dir = "/vllm-workspace/tests" %}
|
|
|
|
steps:
|
|
- label: ":docker: build image"
|
|
agents:
|
|
queue: cpu_queue
|
|
commands:
|
|
- "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
|
|
- "docker build --build-arg max_jobs=16 --tag {{ docker_image }} --target test --progress plain ."
|
|
- "docker push {{ docker_image }}"
|
|
env:
|
|
DOCKER_BUILDKIT: "1"
|
|
retry:
|
|
automatic:
|
|
- exit_status: -1 # Agent was lost
|
|
limit: 5
|
|
- exit_status: -10 # Agent was lost
|
|
limit: 5
|
|
- wait
|
|
|
|
{% for step in steps %}
|
|
- label: "{{ step.label }}"
|
|
agents:
|
|
{% if step.no_gpu %}
|
|
queue: cpu_queue
|
|
{% elif step.num_gpus == 2 or step.num_gpus == 4 %}
|
|
queue: gpu_4_queue
|
|
{% else %}
|
|
queue: gpu_1_queue
|
|
{% endif %}
|
|
soft_fail: true
|
|
{% if step.parallelism %}
|
|
parallelism: {{ step.parallelism }}
|
|
{% endif %}
|
|
retry:
|
|
automatic:
|
|
- exit_status: -1 # Agent was lost
|
|
limit: 5
|
|
- exit_status: -10 # Agent was lost
|
|
limit: 5
|
|
plugins:
|
|
- docker#v5.2.0:
|
|
image: {{ docker_image }}
|
|
always-pull: true
|
|
propagate-environment: true
|
|
{% if not step.no_gpu %}
|
|
gpus: all
|
|
{% endif %}
|
|
command: ["bash", "-c", "cd {{ (step.working_dir or default_working_dir) | safe }} && {{ step.command or (step.commands | join(' && ')) | safe }}"]
|
|
environment:
|
|
- VLLM_USAGE_SOURCE=ci-test
|
|
- HF_TOKEN
|
|
{% if step.label == "Speculative decoding tests" %}
|
|
- VLLM_ATTENTION_BACKEND=XFORMERS
|
|
{% endif %}
|
|
volumes:
|
|
- /dev/shm:/dev/shm
|
|
{% endfor %}
|