[CI/Build] Update PyTorch to 2.4.0 (#6951)
Co-authored-by: Michael Goin <michael@neuralmagic.com>
This commit is contained in:
parent
a72a424b3e
commit
7e0861bd0b
@ -44,7 +44,7 @@ steps:
|
|||||||
fast_check: true
|
fast_check: true
|
||||||
commands:
|
commands:
|
||||||
# This flashinfer installation will fail on AMD ROCm, so it is set as optional.
|
# This flashinfer installation will fail on AMD ROCm, so it is set as optional.
|
||||||
- pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.0.8/flashinfer-0.0.8+cu121torch2.3-cp310-cp310-linux_x86_64.whl || true
|
- pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.1.2/flashinfer-0.1.2+cu121torch2.4-cp310-cp310-linux_x86_64.whl || true
|
||||||
- pytest -v -s basic_correctness/test_basic_correctness.py
|
- pytest -v -s basic_correctness/test_basic_correctness.py
|
||||||
- pytest -v -s basic_correctness/test_cpu_offload.py
|
- pytest -v -s basic_correctness/test_cpu_offload.py
|
||||||
- VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_chunked_prefill.py
|
- VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_chunked_prefill.py
|
||||||
@ -164,7 +164,7 @@ steps:
|
|||||||
- label: Models Test
|
- label: Models Test
|
||||||
#mirror_hardwares: [amd]
|
#mirror_hardwares: [amd]
|
||||||
commands:
|
commands:
|
||||||
- pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.0.8/flashinfer-0.0.8+cu121torch2.3-cp310-cp310-linux_x86_64.whl
|
- pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.1.2/flashinfer-0.1.2+cu121torch2.4-cp310-cp310-linux_x86_64.whl
|
||||||
- pytest -v -s models -m \"not vlm\"
|
- pytest -v -s models -m \"not vlm\"
|
||||||
|
|
||||||
- label: Vision Language Models Test
|
- label: Vision Language Models Test
|
||||||
@ -281,7 +281,7 @@ steps:
|
|||||||
- pytest -v -s distributed/test_custom_all_reduce.py
|
- pytest -v -s distributed/test_custom_all_reduce.py
|
||||||
- TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_basic_distributed_correctness.py
|
- TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_basic_distributed_correctness.py
|
||||||
- TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=mp pytest -v -s distributed/test_basic_distributed_correctness.py
|
- TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=mp pytest -v -s distributed/test_basic_distributed_correctness.py
|
||||||
- pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.0.8/flashinfer-0.0.8+cu121torch2.3-cp310-cp310-linux_x86_64.whl
|
- pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.1.2/flashinfer-0.1.2+cu121torch2.4-cp310-cp310-linux_x86_64.whl
|
||||||
- VLLM_ATTENTION_BACKEND=FLASHINFER TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_basic_distributed_correctness.py
|
- VLLM_ATTENTION_BACKEND=FLASHINFER TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_basic_distributed_correctness.py
|
||||||
- VLLM_ATTENTION_BACKEND=FLASHINFER TEST_DIST_MODEL=meta-llama/Meta-Llama-3-8B DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_basic_distributed_correctness.py
|
- VLLM_ATTENTION_BACKEND=FLASHINFER TEST_DIST_MODEL=meta-llama/Meta-Llama-3-8B DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_basic_distributed_correctness.py
|
||||||
- pytest -v -s -x lora/test_mixtral.py
|
- pytest -v -s -x lora/test_mixtral.py
|
||||||
|
|||||||
2
.github/workflows/publish.yml
vendored
2
.github/workflows/publish.yml
vendored
@ -49,7 +49,7 @@ jobs:
|
|||||||
matrix:
|
matrix:
|
||||||
os: ['ubuntu-20.04']
|
os: ['ubuntu-20.04']
|
||||||
python-version: ['3.8', '3.9', '3.10', '3.11']
|
python-version: ['3.8', '3.9', '3.10', '3.11']
|
||||||
pytorch-version: ['2.3.1'] # Must be the most recent version that meets requirements-cuda.txt.
|
pytorch-version: ['2.4.0'] # Must be the most recent version that meets requirements-cuda.txt.
|
||||||
cuda-version: ['11.8', '12.1']
|
cuda-version: ['11.8', '12.1']
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
|
|||||||
@ -32,7 +32,7 @@ set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx11
|
|||||||
# requirements.txt files and should be kept consistent. The ROCm torch
|
# requirements.txt files and should be kept consistent. The ROCm torch
|
||||||
# versions are derived from Dockerfile.rocm
|
# versions are derived from Dockerfile.rocm
|
||||||
#
|
#
|
||||||
set(TORCH_SUPPORTED_VERSION_CUDA "2.3.1")
|
set(TORCH_SUPPORTED_VERSION_CUDA "2.4.0")
|
||||||
set(TORCH_SUPPORTED_VERSION_ROCM "2.5.0")
|
set(TORCH_SUPPORTED_VERSION_ROCM "2.5.0")
|
||||||
|
|
||||||
#
|
#
|
||||||
|
|||||||
@ -192,7 +192,7 @@ RUN --mount=type=bind,from=mamba-builder,src=/usr/src/mamba,target=/usr/src/mamb
|
|||||||
python3 -m pip install /usr/src/mamba/*.whl --no-cache-dir
|
python3 -m pip install /usr/src/mamba/*.whl --no-cache-dir
|
||||||
|
|
||||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||||
python3 -m pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.0.9/flashinfer-0.0.9+cu121torch2.3-cp310-cp310-linux_x86_64.whl
|
python3 -m pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.1.2/flashinfer-0.1.2+cu121torch2.4-cp310-cp310-linux_x86_64.whl
|
||||||
#################### vLLM installation IMAGE ####################
|
#################### vLLM installation IMAGE ####################
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -5,7 +5,7 @@ requires = [
|
|||||||
"ninja",
|
"ninja",
|
||||||
"packaging",
|
"packaging",
|
||||||
"setuptools >= 49.4.0",
|
"setuptools >= 49.4.0",
|
||||||
"torch == 2.3.1",
|
"torch == 2.4.0",
|
||||||
"wheel",
|
"wheel",
|
||||||
]
|
]
|
||||||
build-backend = "setuptools.build_meta"
|
build-backend = "setuptools.build_meta"
|
||||||
|
|||||||
@ -3,5 +3,5 @@ cmake>=3.21
|
|||||||
ninja
|
ninja
|
||||||
packaging
|
packaging
|
||||||
setuptools>=49.4.0
|
setuptools>=49.4.0
|
||||||
torch==2.3.1
|
torch==2.4.0
|
||||||
wheel
|
wheel
|
||||||
|
|||||||
@ -4,8 +4,8 @@
|
|||||||
# Dependencies for NVIDIA GPUs
|
# Dependencies for NVIDIA GPUs
|
||||||
ray >= 2.9
|
ray >= 2.9
|
||||||
nvidia-ml-py # for pynvml package
|
nvidia-ml-py # for pynvml package
|
||||||
torch == 2.3.1
|
torch == 2.4.0
|
||||||
# These must be updated alongside torch
|
# These must be updated alongside torch
|
||||||
torchvision == 0.18.1 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
|
torchvision == 0.19 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
|
||||||
xformers == 0.0.27 # Requires PyTorch 2.3.1
|
xformers == 0.0.27.post2 # Requires PyTorch 2.4.0
|
||||||
vllm-flash-attn == 2.5.9.post1 # Requires PyTorch 2.3.1
|
vllm-flash-attn == 2.6.0 # Requires PyTorch 2.4.0
|
||||||
|
|||||||
@ -7,7 +7,7 @@ import triton.language as tl
|
|||||||
from vllm.model_executor.layers.ops.rand import seeded_uniform
|
from vllm.model_executor.layers.ops.rand import seeded_uniform
|
||||||
from vllm.triton_utils.sample import get_num_triton_sampler_splits
|
from vllm.triton_utils.sample import get_num_triton_sampler_splits
|
||||||
|
|
||||||
_EPS = 1e-6
|
_EPS: tl.constexpr = 1e-6
|
||||||
|
|
||||||
|
|
||||||
def _multi_split_sample(
|
def _multi_split_sample(
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user