[CI/Build] AMD CI pipeline with extended set of tests. (#4267)

Co-authored-by: simon-mo <simon.mo@hey.com>
2024-05-02 14:29:07 -05:00 · 2024-05-02 14:29:07 -05:00 · 9b5c9f9484
commit 9b5c9f9484
parent 32881f3f31
5 changed files with 67 additions and 45 deletions
--- a/.buildkite/run-amd-test.sh
+++ b/.buildkite/run-amd-test.sh
@ -1,10 +1,11 @@
-# This script build the ROCm docker image and run the API server inside the container.
+# This script build the ROCm docker image and runs test inside it.
 # It serves a sanity check for compilation and basic model usage.
 set -ex
 # Print ROCm version
 echo "--- ROCm info"
 rocminfo
 echo "--- Resetting GPUs"
 echo "reset" > /opt/amdgpu/etc/gpu_state
@ -16,37 +17,28 @@ while true; do
        fi
 done
 echo "--- Building container"
 sha=$(git rev-parse --short HEAD)
 container_name=rocm_${sha}
 docker build \
        -t ${container_name} \
        -f Dockerfile.rocm \
        --progress plain \
        .
-
+remove_docker_container() {
-# Try building the docker image
+   docker rm -f ${container_name} || docker image rm -f ${container_name} || true
 docker build -t rocm -f Dockerfile.rocm .
 # Setup cleanup
 remove_docker_container() { docker rm -f rocm || true; }
 trap remove_docker_container EXIT
 remove_docker_container
 # Run the image
 export HIP_VISIBLE_DEVICES=1
 docker run --device /dev/kfd --device /dev/dri --network host -e HIP_VISIBLE_DEVICES --name rocm rocm python3 -m vllm.entrypoints.api_server &
 # Wait for the server to start
 wait_for_server_to_start() {
    timeout=300
    counter=0
    while [ "$(curl -s -o /dev/null -w ''%{http_code}'' localhost:8000/health)" != "200" ]; do
        sleep 1
        counter=$((counter + 1))
        if [ $counter -ge $timeout ]; then
            echo "Timeout after $timeout seconds"
            break
        fi
    done
 }
-wait_for_server_to_start
+trap remove_docker_container EXIT
 echo "--- Running container"
 docker run \
        --device /dev/kfd --device /dev/dri \
        --network host \
        --rm \
        -e HF_TOKEN \
        --name ${container_name} \
        ${container_name} \
        /bin/bash -c $(echo $1 | sed "s/^'//" | sed "s/'$//")
 # Test a simple prompt
 curl -X POST -H "Content-Type: application/json" \
    localhost:8000/generate \
    -d '{"prompt": "San Francisco is a"}'
--- a/.buildkite/run-benchmarks.sh
+++ b/.buildkite/run-benchmarks.sh
@ -53,6 +53,11 @@ echo '```' >> benchmark_results.md
 tail -n 20 benchmark_serving.txt >> benchmark_results.md # last 20 lines
 echo '```' >> benchmark_results.md
 # if the agent binary is not found, skip uploading the results, exit 0
 if [ ! -f /workspace/buildkite-agent ]; then
    exit 0
 fi
 # upload the results to buildkite
 /workspace/buildkite-agent annotate --style "info" --context "benchmark-results" < benchmark_results.md
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@ -20,6 +20,7 @@ steps:
  - VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 pytest -v -s basic_correctness/test_preemption.py
 - label: Core Test
  mirror_hardwares: [amd]
  command: pytest -v -s core
 - label: Distributed Comm Ops Test
@ -29,7 +30,10 @@ steps:
 - label: Distributed Tests
  working_dir: "/vllm-workspace/tests/distributed"
-  num_gpus: 2
+
  num_gpus: 2 # only support 1 or 2 for now.
  mirror_hardwares: [amd]
  commands:
  - pytest -v -s test_pynccl_library.py
  - TEST_DIST_MODEL=facebook/opt-125m pytest -v -s test_basic_distributed_correctness.py
@ -44,6 +48,7 @@ steps:
  - pytest -v -s test_pynccl.py
 - label: Engine Test
  mirror_hardwares: [amd]
  command: pytest -v -s engine tokenization test_sequence.py test_config.py test_logger.py
 - label: Entrypoints Test
@ -54,6 +59,7 @@ steps:
 - label: Examples Test
  working_dir: "/vllm-workspace/examples"
  mirror_hardwares: [amd]
  commands:
    # install aws cli for llava_example.py
    - pip install awscli
@ -67,16 +73,19 @@ steps:
  parallelism: 4
 - label: Models Test
  mirror_hardwares: [amd]
  commands:
    - bash ../.buildkite/download-images.sh
    - pytest -v -s models --ignore=models/test_llava.py --ignore=models/test_mistral.py
 - label: Llava Test
  mirror_hardwares: [amd]
  commands:
    - bash ../.buildkite/download-images.sh
    - pytest -v -s models/test_llava.py
 - label: Prefix Caching Test
  mirror_hardwares: [amd]
  commands:
    - pytest -v -s prefix_caching
@ -84,12 +93,15 @@ steps:
  command: pytest -v -s samplers
 - label: LogitsProcessor Test
  mirror_hardwares: [amd]
  command: pytest -v -s test_logits_processor.py
 - label: Worker Test
  mirror_hardwares: [amd]
  command: pytest -v -s worker
 - label: Speculative decoding tests
  mirror_hardwares: [amd]
  command: pytest -v -s spec_decode
 - label: LoRA Test %N
@ -107,6 +119,7 @@ steps:
 - label: Benchmarks
  working_dir: "/vllm-workspace/.buildkite"
  mirror_hardwares: [amd]
  commands:
  - pip install aiohttp
  - bash run-benchmarks.sh
--- a/.buildkite/test-template.j2
+++ b/.buildkite/test-template.j2
@ -16,18 +16,29 @@ steps:
          limit: 5
  - wait
-  - label: "AMD Test"
+  - group: "AMD Tests"
    depends_on: ~
    steps:
    {% for step in steps %}
    {% if step.mirror_hardwares and "amd" in step.mirror_hardwares %}
      - label: "AMD: {{ step.label }}"
        agents:
          queue: amd
-    command: bash .buildkite/run-amd-test.sh
+        command: bash .buildkite/run-amd-test.sh "'cd {{ (step.working_dir or default_working_dir) | safe  }} && {{ step.command  or (step.commands | join(' && ')) | safe }}'"
        env:
          DOCKER_BUILDKIT: "1"
    {% endif %}
    {% endfor %}
  - label: "Neuron Test"
    depends_on: ~
    agents:
      queue: neuron
    command: bash .buildkite/run-neuron-test.sh
    soft_fail: true
-  - label: "CPU Test"
+  - label: "Intel Test"
    depends_on: ~
    command: bash .buildkite/run-cpu-test.sh
  {% for step in steps %}
--- a/Dockerfile.rocm
+++ b/Dockerfile.rocm
@ -46,7 +46,7 @@ RUN apt-get update && apt-get install -y \
 ### Mount Point ###
 # When launching the container, mount the code directory to /app
-ARG APP_MOUNT=/app
+ARG APP_MOUNT=/vllm-workspace
 VOLUME [ ${APP_MOUNT} ]
 WORKDIR ${APP_MOUNT}
@ -89,15 +89,16 @@ RUN if [ "$BUILD_TRITON" = "1" ]; then \
    && cd ../..; \
    fi
-COPY ./ /app/vllm
+WORKDIR /vllm-workspace
 COPY . .
 RUN python3 -m pip install --upgrade pip numba
-RUN cd /app \
+RUN --mount=type=cache,target=/root/.cache/pip \
-    && cd vllm \
+    pip install -U -r requirements-rocm.txt \
-    && pip install -U -r requirements-rocm.txt \
+    && patch /opt/rocm/include/hip/amd_detail/amd_hip_bf16.h ./rocm_patch/rocm_bf16.patch \
    && patch /opt/rocm/include/hip/amd_detail/amd_hip_bf16.h /app/vllm/rocm_patch/rocm_bf16.patch \
    && python3 setup.py install \
    && cp build/lib.linux-x86_64-cpython-39/vllm/_C.cpython-39-x86_64-linux-gnu.so vllm/ \
    && cd ..
 RUN python3 -m pip install --upgrade pip