vllm/.buildkite/run-amd-test.sh
Alexei-V-Ivanov-AMD 9b5c9f9484
[CI/Build] AMD CI pipeline with extended set of tests. (#4267)
Co-authored-by: simon-mo <simon.mo@hey.com>
2024-05-02 12:29:07 -07:00

45 lines
999 B
Bash

# This script build the ROCm docker image and runs test inside it.
set -ex
# Print ROCm version
echo "--- ROCm info"
rocminfo
echo "--- Resetting GPUs"
echo "reset" > /opt/amdgpu/etc/gpu_state
while true; do
sleep 3
if grep -q clean /opt/amdgpu/etc/gpu_state; then
echo "GPUs state is \"clean\""
break
fi
done
echo "--- Building container"
sha=$(git rev-parse --short HEAD)
container_name=rocm_${sha}
docker build \
-t ${container_name} \
-f Dockerfile.rocm \
--progress plain \
.
remove_docker_container() {
docker rm -f ${container_name} || docker image rm -f ${container_name} || true
}
trap remove_docker_container EXIT
echo "--- Running container"
docker run \
--device /dev/kfd --device /dev/dri \
--network host \
--rm \
-e HF_TOKEN \
--name ${container_name} \
${container_name} \
/bin/bash -c $(echo $1 | sed "s/^'//" | sed "s/'$//")