diff --git a/.buildkite/run-amd-test.sh b/.buildkite/run-amd-test.sh index 38aff57a..c04e05a9 100644 --- a/.buildkite/run-amd-test.sh +++ b/.buildkite/run-amd-test.sh @@ -1,10 +1,11 @@ -# This script build the ROCm docker image and run the API server inside the container. -# It serves a sanity check for compilation and basic model usage. +# This script build the ROCm docker image and runs test inside it. set -ex # Print ROCm version +echo "--- ROCm info" rocminfo +echo "--- Resetting GPUs" echo "reset" > /opt/amdgpu/etc/gpu_state @@ -16,37 +17,28 @@ while true; do fi done +echo "--- Building container" +sha=$(git rev-parse --short HEAD) +container_name=rocm_${sha} +docker build \ + -t ${container_name} \ + -f Dockerfile.rocm \ + --progress plain \ + . - -# Try building the docker image -docker build -t rocm -f Dockerfile.rocm . - -# Setup cleanup -remove_docker_container() { docker rm -f rocm || true; } -trap remove_docker_container EXIT -remove_docker_container - -# Run the image -export HIP_VISIBLE_DEVICES=1 -docker run --device /dev/kfd --device /dev/dri --network host -e HIP_VISIBLE_DEVICES --name rocm rocm python3 -m vllm.entrypoints.api_server & - -# Wait for the server to start -wait_for_server_to_start() { - timeout=300 - counter=0 - - while [ "$(curl -s -o /dev/null -w ''%{http_code}'' localhost:8000/health)" != "200" ]; do - sleep 1 - counter=$((counter + 1)) - if [ $counter -ge $timeout ]; then - echo "Timeout after $timeout seconds" - break - fi - done +remove_docker_container() { + docker rm -f ${container_name} || docker image rm -f ${container_name} || true } -wait_for_server_to_start +trap remove_docker_container EXIT + +echo "--- Running container" + +docker run \ + --device /dev/kfd --device /dev/dri \ + --network host \ + --rm \ + -e HF_TOKEN \ + --name ${container_name} \ + ${container_name} \ + /bin/bash -c $(echo $1 | sed "s/^'//" | sed "s/'$//") -# Test a simple prompt -curl -X POST -H "Content-Type: application/json" \ - localhost:8000/generate \ - -d '{"prompt": "San Francisco is a"}' diff --git a/.buildkite/run-benchmarks.sh b/.buildkite/run-benchmarks.sh index f6a542af..7fbad1c4 100644 --- a/.buildkite/run-benchmarks.sh +++ b/.buildkite/run-benchmarks.sh @@ -53,6 +53,11 @@ echo '```' >> benchmark_results.md tail -n 20 benchmark_serving.txt >> benchmark_results.md # last 20 lines echo '```' >> benchmark_results.md +# if the agent binary is not found, skip uploading the results, exit 0 +if [ ! -f /workspace/buildkite-agent ]; then + exit 0 +fi + # upload the results to buildkite /workspace/buildkite-agent annotate --style "info" --context "benchmark-results" < benchmark_results.md diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index d518fb9c..e49a5650 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -20,6 +20,7 @@ steps: - VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 pytest -v -s basic_correctness/test_preemption.py - label: Core Test + mirror_hardwares: [amd] command: pytest -v -s core - label: Distributed Comm Ops Test @@ -29,7 +30,10 @@ steps: - label: Distributed Tests working_dir: "/vllm-workspace/tests/distributed" - num_gpus: 2 + + num_gpus: 2 # only support 1 or 2 for now. + mirror_hardwares: [amd] + commands: - pytest -v -s test_pynccl_library.py - TEST_DIST_MODEL=facebook/opt-125m pytest -v -s test_basic_distributed_correctness.py @@ -44,6 +48,7 @@ steps: - pytest -v -s test_pynccl.py - label: Engine Test + mirror_hardwares: [amd] command: pytest -v -s engine tokenization test_sequence.py test_config.py test_logger.py - label: Entrypoints Test @@ -54,6 +59,7 @@ steps: - label: Examples Test working_dir: "/vllm-workspace/examples" + mirror_hardwares: [amd] commands: # install aws cli for llava_example.py - pip install awscli @@ -67,16 +73,19 @@ steps: parallelism: 4 - label: Models Test + mirror_hardwares: [amd] commands: - bash ../.buildkite/download-images.sh - pytest -v -s models --ignore=models/test_llava.py --ignore=models/test_mistral.py - label: Llava Test + mirror_hardwares: [amd] commands: - bash ../.buildkite/download-images.sh - pytest -v -s models/test_llava.py - label: Prefix Caching Test + mirror_hardwares: [amd] commands: - pytest -v -s prefix_caching @@ -84,12 +93,15 @@ steps: command: pytest -v -s samplers - label: LogitsProcessor Test + mirror_hardwares: [amd] command: pytest -v -s test_logits_processor.py - label: Worker Test + mirror_hardwares: [amd] command: pytest -v -s worker - label: Speculative decoding tests + mirror_hardwares: [amd] command: pytest -v -s spec_decode - label: LoRA Test %N @@ -107,6 +119,7 @@ steps: - label: Benchmarks working_dir: "/vllm-workspace/.buildkite" + mirror_hardwares: [amd] commands: - pip install aiohttp - bash run-benchmarks.sh diff --git a/.buildkite/test-template.j2 b/.buildkite/test-template.j2 index 2cb21cac..ea02b6b1 100644 --- a/.buildkite/test-template.j2 +++ b/.buildkite/test-template.j2 @@ -16,18 +16,29 @@ steps: limit: 5 - wait - - label: "AMD Test" - agents: - queue: amd - command: bash .buildkite/run-amd-test.sh + - group: "AMD Tests" + depends_on: ~ + steps: + {% for step in steps %} + {% if step.mirror_hardwares and "amd" in step.mirror_hardwares %} + - label: "AMD: {{ step.label }}" + agents: + queue: amd + command: bash .buildkite/run-amd-test.sh "'cd {{ (step.working_dir or default_working_dir) | safe }} && {{ step.command or (step.commands | join(' && ')) | safe }}'" + env: + DOCKER_BUILDKIT: "1" + {% endif %} + {% endfor %} - label: "Neuron Test" + depends_on: ~ agents: queue: neuron command: bash .buildkite/run-neuron-test.sh soft_fail: true - - label: "CPU Test" + - label: "Intel Test" + depends_on: ~ command: bash .buildkite/run-cpu-test.sh {% for step in steps %} diff --git a/Dockerfile.rocm b/Dockerfile.rocm index 3f84b949..d04bb991 100644 --- a/Dockerfile.rocm +++ b/Dockerfile.rocm @@ -46,7 +46,7 @@ RUN apt-get update && apt-get install -y \ ### Mount Point ### # When launching the container, mount the code directory to /app -ARG APP_MOUNT=/app +ARG APP_MOUNT=/vllm-workspace VOLUME [ ${APP_MOUNT} ] WORKDIR ${APP_MOUNT} @@ -89,15 +89,16 @@ RUN if [ "$BUILD_TRITON" = "1" ]; then \ && cd ../..; \ fi -COPY ./ /app/vllm +WORKDIR /vllm-workspace +COPY . . RUN python3 -m pip install --upgrade pip numba -RUN cd /app \ - && cd vllm \ - && pip install -U -r requirements-rocm.txt \ - && patch /opt/rocm/include/hip/amd_detail/amd_hip_bf16.h /app/vllm/rocm_patch/rocm_bf16.patch \ +RUN --mount=type=cache,target=/root/.cache/pip \ + pip install -U -r requirements-rocm.txt \ + && patch /opt/rocm/include/hip/amd_detail/amd_hip_bf16.h ./rocm_patch/rocm_bf16.patch \ && python3 setup.py install \ + && cp build/lib.linux-x86_64-cpython-39/vllm/_C.cpython-39-x86_64-linux-gnu.so vllm/ \ && cd .. RUN python3 -m pip install --upgrade pip