[CI/Build] AMD CI pipeline with extended set of tests. (#4267)

Co-authored-by: simon-mo <simon.mo@hey.com>
2024-05-02 14:29:07 -05:00 · 2024-05-02 14:29:07 -05:00 · 9b5c9f9484
commit 9b5c9f9484
parent 32881f3f31
5 changed files with 67 additions and 45 deletions
--- a/.buildkite/run-amd-test.sh
+++ b/.buildkite/run-amd-test.sh
@ -1,10 +1,11 @@
-# This script build the ROCm docker image and run the API server inside the container.
-# It serves a sanity check for compilation and basic model usage.
+# This script build the ROCm docker image and runs test inside it.
 set -ex

 # Print ROCm version
+echo "--- ROCm info"
 rocminfo

+echo "--- Resetting GPUs"

 echo "reset" > /opt/amdgpu/etc/gpu_state

@ -16,37 +17,28 @@ while true; do
        fi
 done

+echo "--- Building container"
+sha=$(git rev-parse --short HEAD)
+container_name=rocm_${sha}
+docker build \
+        -t ${container_name} \
+        -f Dockerfile.rocm \
+        --progress plain \
+        .

-
-# Try building the docker image
-docker build -t rocm -f Dockerfile.rocm .
-
-# Setup cleanup
-remove_docker_container() { docker rm -f rocm || true; }
-trap remove_docker_container EXIT
-remove_docker_container
-
-# Run the image
-export HIP_VISIBLE_DEVICES=1
-docker run --device /dev/kfd --device /dev/dri --network host -e HIP_VISIBLE_DEVICES --name rocm rocm python3 -m vllm.entrypoints.api_server &
-
-# Wait for the server to start
-wait_for_server_to_start() {
-    timeout=300
-    counter=0
-
-    while [ "$(curl -s -o /dev/null -w ''%{http_code}'' localhost:8000/health)" != "200" ]; do
-        sleep 1
-        counter=$((counter + 1))
-        if [ $counter -ge $timeout ]; then
-            echo "Timeout after $timeout seconds"
-            break
-        fi
-    done
+remove_docker_container() {
+   docker rm -f ${container_name} || docker image rm -f ${container_name} || true
 }
-wait_for_server_to_start
+trap remove_docker_container EXIT
+
+echo "--- Running container"
+
+docker run \
+        --device /dev/kfd --device /dev/dri \
+        --network host \
+        --rm \
+        -e HF_TOKEN \
+        --name ${container_name} \
+        ${container_name} \
+        /bin/bash -c $(echo $1 | sed "s/^'//" | sed "s/'$//")

-# Test a simple prompt
-curl -X POST -H "Content-Type: application/json" \
-    localhost:8000/generate \
-    -d '{"prompt": "San Francisco is a"}'
--- a/.buildkite/run-benchmarks.sh
+++ b/.buildkite/run-benchmarks.sh
@ -53,6 +53,11 @@ echo '```' >> benchmark_results.md
 tail -n 20 benchmark_serving.txt >> benchmark_results.md # last 20 lines
 echo '```' >> benchmark_results.md

+# if the agent binary is not found, skip uploading the results, exit 0
+if [ ! -f /workspace/buildkite-agent ]; then
+    exit 0
+fi
+
 # upload the results to buildkite
 /workspace/buildkite-agent annotate --style "info" --context "benchmark-results" < benchmark_results.md

--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@ -20,6 +20,7 @@ steps:
  - VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 pytest -v -s basic_correctness/test_preemption.py

 - label: Core Test
+  mirror_hardwares: [amd]
  command: pytest -v -s core

 - label: Distributed Comm Ops Test
@ -29,7 +30,10 @@ steps:

 - label: Distributed Tests
  working_dir: "/vllm-workspace/tests/distributed"
-  num_gpus: 2
+
+  num_gpus: 2 # only support 1 or 2 for now.
+  mirror_hardwares: [amd]
+
  commands:
  - pytest -v -s test_pynccl_library.py
  - TEST_DIST_MODEL=facebook/opt-125m pytest -v -s test_basic_distributed_correctness.py
@ -44,6 +48,7 @@ steps:
  - pytest -v -s test_pynccl.py

 - label: Engine Test
+  mirror_hardwares: [amd]
  command: pytest -v -s engine tokenization test_sequence.py test_config.py test_logger.py

 - label: Entrypoints Test
@ -54,6 +59,7 @@ steps:

 - label: Examples Test
  working_dir: "/vllm-workspace/examples"
+  mirror_hardwares: [amd]
  commands:
    # install aws cli for llava_example.py
    - pip install awscli
@ -67,16 +73,19 @@ steps:
  parallelism: 4

 - label: Models Test
+  mirror_hardwares: [amd]
  commands:
    - bash ../.buildkite/download-images.sh
    - pytest -v -s models --ignore=models/test_llava.py --ignore=models/test_mistral.py

 - label: Llava Test
+  mirror_hardwares: [amd]
  commands:
    - bash ../.buildkite/download-images.sh
    - pytest -v -s models/test_llava.py

 - label: Prefix Caching Test
+  mirror_hardwares: [amd]
  commands:
    - pytest -v -s prefix_caching

@ -84,12 +93,15 @@ steps:
  command: pytest -v -s samplers

 - label: LogitsProcessor Test
+  mirror_hardwares: [amd]
  command: pytest -v -s test_logits_processor.py

 - label: Worker Test
+  mirror_hardwares: [amd]
  command: pytest -v -s worker

 - label: Speculative decoding tests
+  mirror_hardwares: [amd]
  command: pytest -v -s spec_decode

 - label: LoRA Test %N
@ -107,6 +119,7 @@ steps:

 - label: Benchmarks
  working_dir: "/vllm-workspace/.buildkite"
+  mirror_hardwares: [amd]
  commands:
  - pip install aiohttp
  - bash run-benchmarks.sh
--- a/.buildkite/test-template.j2
+++ b/.buildkite/test-template.j2
@ -16,18 +16,29 @@ steps:
          limit: 5
  - wait

-  - label: "AMD Test"
-    agents:
-      queue: amd
-    command: bash .buildkite/run-amd-test.sh
+  - group: "AMD Tests"
+    depends_on: ~
+    steps:
+    {% for step in steps %}
+    {% if step.mirror_hardwares and "amd" in step.mirror_hardwares %}
+      - label: "AMD: {{ step.label }}"
+        agents:
+          queue: amd
+        command: bash .buildkite/run-amd-test.sh "'cd {{ (step.working_dir or default_working_dir) | safe  }} && {{ step.command  or (step.commands | join(' && ')) | safe }}'"
+        env:
+          DOCKER_BUILDKIT: "1"
+    {% endif %}
+    {% endfor %}

  - label: "Neuron Test"
+    depends_on: ~
    agents:
      queue: neuron
    command: bash .buildkite/run-neuron-test.sh
    soft_fail: true

-  - label: "CPU Test"
+  - label: "Intel Test"
+    depends_on: ~
    command: bash .buildkite/run-cpu-test.sh

  {% for step in steps %}
--- a/Dockerfile.rocm
+++ b/Dockerfile.rocm
@ -46,7 +46,7 @@ RUN apt-get update && apt-get install -y \

 ### Mount Point ###
 # When launching the container, mount the code directory to /app
-ARG APP_MOUNT=/app
+ARG APP_MOUNT=/vllm-workspace
 VOLUME [ ${APP_MOUNT} ]
 WORKDIR ${APP_MOUNT}

@ -89,15 +89,16 @@ RUN if [ "$BUILD_TRITON" = "1" ]; then \
    && cd ../..; \
    fi

-COPY ./ /app/vllm
+WORKDIR /vllm-workspace
+COPY . .

 RUN python3 -m pip install --upgrade pip numba

-RUN cd /app \
-    && cd vllm \
-    && pip install -U -r requirements-rocm.txt \
-    && patch /opt/rocm/include/hip/amd_detail/amd_hip_bf16.h /app/vllm/rocm_patch/rocm_bf16.patch \
+RUN --mount=type=cache,target=/root/.cache/pip \
+    pip install -U -r requirements-rocm.txt \
+    && patch /opt/rocm/include/hip/amd_detail/amd_hip_bf16.h ./rocm_patch/rocm_bf16.patch \
    && python3 setup.py install \
+    && cp build/lib.linux-x86_64-cpython-39/vllm/_C.cpython-39-x86_64-linux-gnu.so vllm/ \
    && cd ..

 RUN python3 -m pip install --upgrade pip