From 7e0861bd0bb25ea5ceaa3a513da4133fb828b5fe Mon Sep 17 00:00:00 2001 From: Sage Moore Date: Thu, 1 Aug 2024 11:11:24 -0700 Subject: [PATCH] [CI/Build] Update PyTorch to 2.4.0 (#6951) Co-authored-by: Michael Goin --- .buildkite/test-pipeline.yaml | 6 +++--- .github/workflows/publish.yml | 2 +- CMakeLists.txt | 2 +- Dockerfile | 2 +- pyproject.toml | 2 +- requirements-build.txt | 2 +- requirements-cuda.txt | 8 ++++---- vllm/model_executor/layers/ops/sample.py | 2 +- 8 files changed, 13 insertions(+), 13 deletions(-) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 9ec9ec12..573c3740 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -44,7 +44,7 @@ steps: fast_check: true commands: # This flashinfer installation will fail on AMD ROCm, so it is set as optional. - - pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.0.8/flashinfer-0.0.8+cu121torch2.3-cp310-cp310-linux_x86_64.whl || true + - pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.1.2/flashinfer-0.1.2+cu121torch2.4-cp310-cp310-linux_x86_64.whl || true - pytest -v -s basic_correctness/test_basic_correctness.py - pytest -v -s basic_correctness/test_cpu_offload.py - VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_chunked_prefill.py @@ -164,7 +164,7 @@ steps: - label: Models Test #mirror_hardwares: [amd] commands: - - pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.0.8/flashinfer-0.0.8+cu121torch2.3-cp310-cp310-linux_x86_64.whl + - pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.1.2/flashinfer-0.1.2+cu121torch2.4-cp310-cp310-linux_x86_64.whl - pytest -v -s models -m \"not vlm\" - label: Vision Language Models Test @@ -281,7 +281,7 @@ steps: - pytest -v -s distributed/test_custom_all_reduce.py - TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_basic_distributed_correctness.py - TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=mp pytest -v -s distributed/test_basic_distributed_correctness.py - - pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.0.8/flashinfer-0.0.8+cu121torch2.3-cp310-cp310-linux_x86_64.whl + - pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.1.2/flashinfer-0.1.2+cu121torch2.4-cp310-cp310-linux_x86_64.whl - VLLM_ATTENTION_BACKEND=FLASHINFER TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_basic_distributed_correctness.py - VLLM_ATTENTION_BACKEND=FLASHINFER TEST_DIST_MODEL=meta-llama/Meta-Llama-3-8B DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_basic_distributed_correctness.py - pytest -v -s -x lora/test_mixtral.py diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 15c2ec05..607fda75 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -49,7 +49,7 @@ jobs: matrix: os: ['ubuntu-20.04'] python-version: ['3.8', '3.9', '3.10', '3.11'] - pytorch-version: ['2.3.1'] # Must be the most recent version that meets requirements-cuda.txt. + pytorch-version: ['2.4.0'] # Must be the most recent version that meets requirements-cuda.txt. cuda-version: ['11.8', '12.1'] steps: diff --git a/CMakeLists.txt b/CMakeLists.txt index 0d599c54..9c2cb360 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -32,7 +32,7 @@ set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx11 # requirements.txt files and should be kept consistent. The ROCm torch # versions are derived from Dockerfile.rocm # -set(TORCH_SUPPORTED_VERSION_CUDA "2.3.1") +set(TORCH_SUPPORTED_VERSION_CUDA "2.4.0") set(TORCH_SUPPORTED_VERSION_ROCM "2.5.0") # diff --git a/Dockerfile b/Dockerfile index db4453ab..72947070 100644 --- a/Dockerfile +++ b/Dockerfile @@ -192,7 +192,7 @@ RUN --mount=type=bind,from=mamba-builder,src=/usr/src/mamba,target=/usr/src/mamb python3 -m pip install /usr/src/mamba/*.whl --no-cache-dir RUN --mount=type=cache,target=/root/.cache/pip \ - python3 -m pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.0.9/flashinfer-0.0.9+cu121torch2.3-cp310-cp310-linux_x86_64.whl + python3 -m pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.1.2/flashinfer-0.1.2+cu121torch2.4-cp310-cp310-linux_x86_64.whl #################### vLLM installation IMAGE #################### diff --git a/pyproject.toml b/pyproject.toml index b0d115a0..26d963aa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ requires = [ "ninja", "packaging", "setuptools >= 49.4.0", - "torch == 2.3.1", + "torch == 2.4.0", "wheel", ] build-backend = "setuptools.build_meta" diff --git a/requirements-build.txt b/requirements-build.txt index b05f38a0..d0f677fd 100644 --- a/requirements-build.txt +++ b/requirements-build.txt @@ -3,5 +3,5 @@ cmake>=3.21 ninja packaging setuptools>=49.4.0 -torch==2.3.1 +torch==2.4.0 wheel diff --git a/requirements-cuda.txt b/requirements-cuda.txt index 3eb91212..1f60d540 100644 --- a/requirements-cuda.txt +++ b/requirements-cuda.txt @@ -4,8 +4,8 @@ # Dependencies for NVIDIA GPUs ray >= 2.9 nvidia-ml-py # for pynvml package -torch == 2.3.1 +torch == 2.4.0 # These must be updated alongside torch -torchvision == 0.18.1 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version -xformers == 0.0.27 # Requires PyTorch 2.3.1 -vllm-flash-attn == 2.5.9.post1 # Requires PyTorch 2.3.1 +torchvision == 0.19 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version +xformers == 0.0.27.post2 # Requires PyTorch 2.4.0 +vllm-flash-attn == 2.6.0 # Requires PyTorch 2.4.0 diff --git a/vllm/model_executor/layers/ops/sample.py b/vllm/model_executor/layers/ops/sample.py index bdb577da..fb88a05d 100644 --- a/vllm/model_executor/layers/ops/sample.py +++ b/vllm/model_executor/layers/ops/sample.py @@ -7,7 +7,7 @@ import triton.language as tl from vllm.model_executor.layers.ops.rand import seeded_uniform from vllm.triton_utils.sample import get_num_triton_sampler_splits -_EPS = 1e-6 +_EPS: tl.constexpr = 1e-6 def _multi_split_sample(