From 8b82a89997826af8e0e4ecfaaed60f3b28b1baed Mon Sep 17 00:00:00 2001 From: "Kevin H. Luu" Date: Wed, 12 Jun 2024 14:00:18 -0700 Subject: [PATCH] [ci] Add AMD, Neuron, Intel tests for AWS CI and turn off default soft fail for GPU tests (#5464) Signed-off-by: kevin --- .buildkite/test-template-aws.j2 | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/.buildkite/test-template-aws.j2 b/.buildkite/test-template-aws.j2 index 3b5d36b2..645747dd 100644 --- a/.buildkite/test-template-aws.j2 +++ b/.buildkite/test-template-aws.j2 @@ -19,6 +19,34 @@ steps: limit: 5 - wait + - group: "AMD Tests" + depends_on: ~ + steps: + {% for step in steps %} + {% if step.mirror_hardwares and "amd" in step.mirror_hardwares %} + - label: "AMD: {{ step.label }}" + agents: + queue: amd + command: bash .buildkite/run-amd-test.sh "cd {{ (step.working_dir or default_working_dir) | safe }} ; {{ step.command or (step.commands | join(" ; ")) | safe }}" + env: + DOCKER_BUILDKIT: "1" + soft_fail: true + {% endif %} + {% endfor %} + + - label: "Neuron Test" + depends_on: ~ + agents: + queue: neuron + command: bash .buildkite/run-neuron-test.sh + soft_fail: false + + - label: "Intel Test" + depends_on: ~ + agents: + queue: intel + command: bash .buildkite/run-cpu-test.sh + {% for step in steps %} - label: "{{ step.label }}" agents: @@ -31,7 +59,7 @@ steps: {% else %} queue: gpu_1_queue {% endif %} - soft_fail: true + soft_fail: {{ step.soft_fail or false }} {% if step.parallelism %} parallelism: {{ step.parallelism }} {% endif %}