diff --git a/.buildkite/test-template-aws.j2 b/.buildkite/test-template-aws.j2 index fb34b787..1a7fb44c 100644 --- a/.buildkite/test-template-aws.j2 +++ b/.buildkite/test-template-aws.j2 @@ -42,12 +42,18 @@ steps: command: bash .buildkite/run-neuron-test.sh soft_fail: false - - label: "Intel Test" + - label: "Intel CPU Test" depends_on: ~ agents: - queue: intel + queue: intel-cpu command: bash .buildkite/run-cpu-test.sh + - label: "Intel GPU Test" + depends_on: ~ + agents: + queue: intel-gpu + command: bash .buildkite/run-xpu-test.sh + {% for step in steps %} {% if step.gpu == "a100" %} - label: "{{ step.label }}" diff --git a/README.md b/README.md index c24768bf..3e0da945 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,7 @@ vLLM is flexible and easy to use with: - Tensor parallelism support for distributed inference - Streaming outputs - OpenAI-compatible API server -- Support NVIDIA GPUs, AMD GPUs, and Intel CPUs +- Support NVIDIA GPUs, AMD GPUs, Intel CPUs and GPUs - (Experimental) Prefix caching support - (Experimental) Multi-lora support