[CI/Build] AMD CI pipeline with extended set of tests. (#4267)
Co-authored-by: simon-mo <simon.mo@hey.com>
This commit is contained in:
parent
32881f3f31
commit
9b5c9f9484
@ -1,10 +1,11 @@
|
|||||||
# This script build the ROCm docker image and run the API server inside the container.
|
# This script build the ROCm docker image and runs test inside it.
|
||||||
# It serves a sanity check for compilation and basic model usage.
|
|
||||||
set -ex
|
set -ex
|
||||||
|
|
||||||
# Print ROCm version
|
# Print ROCm version
|
||||||
|
echo "--- ROCm info"
|
||||||
rocminfo
|
rocminfo
|
||||||
|
|
||||||
|
echo "--- Resetting GPUs"
|
||||||
|
|
||||||
echo "reset" > /opt/amdgpu/etc/gpu_state
|
echo "reset" > /opt/amdgpu/etc/gpu_state
|
||||||
|
|
||||||
@ -16,37 +17,28 @@ while true; do
|
|||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
|
echo "--- Building container"
|
||||||
|
sha=$(git rev-parse --short HEAD)
|
||||||
|
container_name=rocm_${sha}
|
||||||
|
docker build \
|
||||||
|
-t ${container_name} \
|
||||||
|
-f Dockerfile.rocm \
|
||||||
|
--progress plain \
|
||||||
|
.
|
||||||
|
|
||||||
|
remove_docker_container() {
|
||||||
# Try building the docker image
|
docker rm -f ${container_name} || docker image rm -f ${container_name} || true
|
||||||
docker build -t rocm -f Dockerfile.rocm .
|
|
||||||
|
|
||||||
# Setup cleanup
|
|
||||||
remove_docker_container() { docker rm -f rocm || true; }
|
|
||||||
trap remove_docker_container EXIT
|
|
||||||
remove_docker_container
|
|
||||||
|
|
||||||
# Run the image
|
|
||||||
export HIP_VISIBLE_DEVICES=1
|
|
||||||
docker run --device /dev/kfd --device /dev/dri --network host -e HIP_VISIBLE_DEVICES --name rocm rocm python3 -m vllm.entrypoints.api_server &
|
|
||||||
|
|
||||||
# Wait for the server to start
|
|
||||||
wait_for_server_to_start() {
|
|
||||||
timeout=300
|
|
||||||
counter=0
|
|
||||||
|
|
||||||
while [ "$(curl -s -o /dev/null -w ''%{http_code}'' localhost:8000/health)" != "200" ]; do
|
|
||||||
sleep 1
|
|
||||||
counter=$((counter + 1))
|
|
||||||
if [ $counter -ge $timeout ]; then
|
|
||||||
echo "Timeout after $timeout seconds"
|
|
||||||
break
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
}
|
}
|
||||||
wait_for_server_to_start
|
trap remove_docker_container EXIT
|
||||||
|
|
||||||
|
echo "--- Running container"
|
||||||
|
|
||||||
|
docker run \
|
||||||
|
--device /dev/kfd --device /dev/dri \
|
||||||
|
--network host \
|
||||||
|
--rm \
|
||||||
|
-e HF_TOKEN \
|
||||||
|
--name ${container_name} \
|
||||||
|
${container_name} \
|
||||||
|
/bin/bash -c $(echo $1 | sed "s/^'//" | sed "s/'$//")
|
||||||
|
|
||||||
# Test a simple prompt
|
|
||||||
curl -X POST -H "Content-Type: application/json" \
|
|
||||||
localhost:8000/generate \
|
|
||||||
-d '{"prompt": "San Francisco is a"}'
|
|
||||||
|
|||||||
@ -53,6 +53,11 @@ echo '```' >> benchmark_results.md
|
|||||||
tail -n 20 benchmark_serving.txt >> benchmark_results.md # last 20 lines
|
tail -n 20 benchmark_serving.txt >> benchmark_results.md # last 20 lines
|
||||||
echo '```' >> benchmark_results.md
|
echo '```' >> benchmark_results.md
|
||||||
|
|
||||||
|
# if the agent binary is not found, skip uploading the results, exit 0
|
||||||
|
if [ ! -f /workspace/buildkite-agent ]; then
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
# upload the results to buildkite
|
# upload the results to buildkite
|
||||||
/workspace/buildkite-agent annotate --style "info" --context "benchmark-results" < benchmark_results.md
|
/workspace/buildkite-agent annotate --style "info" --context "benchmark-results" < benchmark_results.md
|
||||||
|
|
||||||
|
|||||||
@ -20,6 +20,7 @@ steps:
|
|||||||
- VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 pytest -v -s basic_correctness/test_preemption.py
|
- VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 pytest -v -s basic_correctness/test_preemption.py
|
||||||
|
|
||||||
- label: Core Test
|
- label: Core Test
|
||||||
|
mirror_hardwares: [amd]
|
||||||
command: pytest -v -s core
|
command: pytest -v -s core
|
||||||
|
|
||||||
- label: Distributed Comm Ops Test
|
- label: Distributed Comm Ops Test
|
||||||
@ -29,7 +30,10 @@ steps:
|
|||||||
|
|
||||||
- label: Distributed Tests
|
- label: Distributed Tests
|
||||||
working_dir: "/vllm-workspace/tests/distributed"
|
working_dir: "/vllm-workspace/tests/distributed"
|
||||||
num_gpus: 2
|
|
||||||
|
num_gpus: 2 # only support 1 or 2 for now.
|
||||||
|
mirror_hardwares: [amd]
|
||||||
|
|
||||||
commands:
|
commands:
|
||||||
- pytest -v -s test_pynccl_library.py
|
- pytest -v -s test_pynccl_library.py
|
||||||
- TEST_DIST_MODEL=facebook/opt-125m pytest -v -s test_basic_distributed_correctness.py
|
- TEST_DIST_MODEL=facebook/opt-125m pytest -v -s test_basic_distributed_correctness.py
|
||||||
@ -44,6 +48,7 @@ steps:
|
|||||||
- pytest -v -s test_pynccl.py
|
- pytest -v -s test_pynccl.py
|
||||||
|
|
||||||
- label: Engine Test
|
- label: Engine Test
|
||||||
|
mirror_hardwares: [amd]
|
||||||
command: pytest -v -s engine tokenization test_sequence.py test_config.py test_logger.py
|
command: pytest -v -s engine tokenization test_sequence.py test_config.py test_logger.py
|
||||||
|
|
||||||
- label: Entrypoints Test
|
- label: Entrypoints Test
|
||||||
@ -54,6 +59,7 @@ steps:
|
|||||||
|
|
||||||
- label: Examples Test
|
- label: Examples Test
|
||||||
working_dir: "/vllm-workspace/examples"
|
working_dir: "/vllm-workspace/examples"
|
||||||
|
mirror_hardwares: [amd]
|
||||||
commands:
|
commands:
|
||||||
# install aws cli for llava_example.py
|
# install aws cli for llava_example.py
|
||||||
- pip install awscli
|
- pip install awscli
|
||||||
@ -67,16 +73,19 @@ steps:
|
|||||||
parallelism: 4
|
parallelism: 4
|
||||||
|
|
||||||
- label: Models Test
|
- label: Models Test
|
||||||
|
mirror_hardwares: [amd]
|
||||||
commands:
|
commands:
|
||||||
- bash ../.buildkite/download-images.sh
|
- bash ../.buildkite/download-images.sh
|
||||||
- pytest -v -s models --ignore=models/test_llava.py --ignore=models/test_mistral.py
|
- pytest -v -s models --ignore=models/test_llava.py --ignore=models/test_mistral.py
|
||||||
|
|
||||||
- label: Llava Test
|
- label: Llava Test
|
||||||
|
mirror_hardwares: [amd]
|
||||||
commands:
|
commands:
|
||||||
- bash ../.buildkite/download-images.sh
|
- bash ../.buildkite/download-images.sh
|
||||||
- pytest -v -s models/test_llava.py
|
- pytest -v -s models/test_llava.py
|
||||||
|
|
||||||
- label: Prefix Caching Test
|
- label: Prefix Caching Test
|
||||||
|
mirror_hardwares: [amd]
|
||||||
commands:
|
commands:
|
||||||
- pytest -v -s prefix_caching
|
- pytest -v -s prefix_caching
|
||||||
|
|
||||||
@ -84,12 +93,15 @@ steps:
|
|||||||
command: pytest -v -s samplers
|
command: pytest -v -s samplers
|
||||||
|
|
||||||
- label: LogitsProcessor Test
|
- label: LogitsProcessor Test
|
||||||
|
mirror_hardwares: [amd]
|
||||||
command: pytest -v -s test_logits_processor.py
|
command: pytest -v -s test_logits_processor.py
|
||||||
|
|
||||||
- label: Worker Test
|
- label: Worker Test
|
||||||
|
mirror_hardwares: [amd]
|
||||||
command: pytest -v -s worker
|
command: pytest -v -s worker
|
||||||
|
|
||||||
- label: Speculative decoding tests
|
- label: Speculative decoding tests
|
||||||
|
mirror_hardwares: [amd]
|
||||||
command: pytest -v -s spec_decode
|
command: pytest -v -s spec_decode
|
||||||
|
|
||||||
- label: LoRA Test %N
|
- label: LoRA Test %N
|
||||||
@ -107,6 +119,7 @@ steps:
|
|||||||
|
|
||||||
- label: Benchmarks
|
- label: Benchmarks
|
||||||
working_dir: "/vllm-workspace/.buildkite"
|
working_dir: "/vllm-workspace/.buildkite"
|
||||||
|
mirror_hardwares: [amd]
|
||||||
commands:
|
commands:
|
||||||
- pip install aiohttp
|
- pip install aiohttp
|
||||||
- bash run-benchmarks.sh
|
- bash run-benchmarks.sh
|
||||||
|
|||||||
@ -16,18 +16,29 @@ steps:
|
|||||||
limit: 5
|
limit: 5
|
||||||
- wait
|
- wait
|
||||||
|
|
||||||
- label: "AMD Test"
|
- group: "AMD Tests"
|
||||||
|
depends_on: ~
|
||||||
|
steps:
|
||||||
|
{% for step in steps %}
|
||||||
|
{% if step.mirror_hardwares and "amd" in step.mirror_hardwares %}
|
||||||
|
- label: "AMD: {{ step.label }}"
|
||||||
agents:
|
agents:
|
||||||
queue: amd
|
queue: amd
|
||||||
command: bash .buildkite/run-amd-test.sh
|
command: bash .buildkite/run-amd-test.sh "'cd {{ (step.working_dir or default_working_dir) | safe }} && {{ step.command or (step.commands | join(' && ')) | safe }}'"
|
||||||
|
env:
|
||||||
|
DOCKER_BUILDKIT: "1"
|
||||||
|
{% endif %}
|
||||||
|
{% endfor %}
|
||||||
|
|
||||||
- label: "Neuron Test"
|
- label: "Neuron Test"
|
||||||
|
depends_on: ~
|
||||||
agents:
|
agents:
|
||||||
queue: neuron
|
queue: neuron
|
||||||
command: bash .buildkite/run-neuron-test.sh
|
command: bash .buildkite/run-neuron-test.sh
|
||||||
soft_fail: true
|
soft_fail: true
|
||||||
|
|
||||||
- label: "CPU Test"
|
- label: "Intel Test"
|
||||||
|
depends_on: ~
|
||||||
command: bash .buildkite/run-cpu-test.sh
|
command: bash .buildkite/run-cpu-test.sh
|
||||||
|
|
||||||
{% for step in steps %}
|
{% for step in steps %}
|
||||||
|
|||||||
@ -46,7 +46,7 @@ RUN apt-get update && apt-get install -y \
|
|||||||
|
|
||||||
### Mount Point ###
|
### Mount Point ###
|
||||||
# When launching the container, mount the code directory to /app
|
# When launching the container, mount the code directory to /app
|
||||||
ARG APP_MOUNT=/app
|
ARG APP_MOUNT=/vllm-workspace
|
||||||
VOLUME [ ${APP_MOUNT} ]
|
VOLUME [ ${APP_MOUNT} ]
|
||||||
WORKDIR ${APP_MOUNT}
|
WORKDIR ${APP_MOUNT}
|
||||||
|
|
||||||
@ -89,15 +89,16 @@ RUN if [ "$BUILD_TRITON" = "1" ]; then \
|
|||||||
&& cd ../..; \
|
&& cd ../..; \
|
||||||
fi
|
fi
|
||||||
|
|
||||||
COPY ./ /app/vllm
|
WORKDIR /vllm-workspace
|
||||||
|
COPY . .
|
||||||
|
|
||||||
RUN python3 -m pip install --upgrade pip numba
|
RUN python3 -m pip install --upgrade pip numba
|
||||||
|
|
||||||
RUN cd /app \
|
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||||
&& cd vllm \
|
pip install -U -r requirements-rocm.txt \
|
||||||
&& pip install -U -r requirements-rocm.txt \
|
&& patch /opt/rocm/include/hip/amd_detail/amd_hip_bf16.h ./rocm_patch/rocm_bf16.patch \
|
||||||
&& patch /opt/rocm/include/hip/amd_detail/amd_hip_bf16.h /app/vllm/rocm_patch/rocm_bf16.patch \
|
|
||||||
&& python3 setup.py install \
|
&& python3 setup.py install \
|
||||||
|
&& cp build/lib.linux-x86_64-cpython-39/vllm/_C.cpython-39-x86_64-linux-gnu.so vllm/ \
|
||||||
&& cd ..
|
&& cd ..
|
||||||
|
|
||||||
RUN python3 -m pip install --upgrade pip
|
RUN python3 -m pip install --upgrade pip
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user