[Core] manage nccl via a pypi package & upgrade to pt 2.2.1 (#3805)
This commit is contained in:
parent
b7782002e1
commit
ca81ff5196
2
.github/workflows/publish.yml
vendored
2
.github/workflows/publish.yml
vendored
@ -49,7 +49,7 @@ jobs:
|
|||||||
matrix:
|
matrix:
|
||||||
os: ['ubuntu-20.04']
|
os: ['ubuntu-20.04']
|
||||||
python-version: ['3.8', '3.9', '3.10', '3.11']
|
python-version: ['3.8', '3.9', '3.10', '3.11']
|
||||||
pytorch-version: ['2.1.2'] # Must be the most recent version that meets requirements.txt.
|
pytorch-version: ['2.2.1'] # Must be the most recent version that meets requirements.txt.
|
||||||
cuda-version: ['11.8', '12.1']
|
cuda-version: ['11.8', '12.1']
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
|
|||||||
@ -31,7 +31,7 @@ set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx11
|
|||||||
# requirements.txt files and should be kept consistent. The ROCm torch
|
# requirements.txt files and should be kept consistent. The ROCm torch
|
||||||
# versions are derived from Dockerfile.rocm
|
# versions are derived from Dockerfile.rocm
|
||||||
#
|
#
|
||||||
set(TORCH_SUPPORTED_VERSION_CUDA "2.1.2")
|
set(TORCH_SUPPORTED_VERSION_CUDA "2.2.1")
|
||||||
set(TORCH_SUPPORTED_VERSION_ROCM_5X "2.0.1")
|
set(TORCH_SUPPORTED_VERSION_ROCM_5X "2.0.1")
|
||||||
set(TORCH_SUPPORTED_VERSION_ROCM_6X "2.1.1")
|
set(TORCH_SUPPORTED_VERSION_ROCM_6X "2.1.1")
|
||||||
|
|
||||||
|
|||||||
10
Dockerfile
10
Dockerfile
@ -24,6 +24,13 @@ RUN --mount=type=cache,target=/root/.cache/pip \
|
|||||||
COPY requirements-dev.txt requirements-dev.txt
|
COPY requirements-dev.txt requirements-dev.txt
|
||||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||||
pip install -r requirements-dev.txt
|
pip install -r requirements-dev.txt
|
||||||
|
|
||||||
|
# cuda arch list used by torch
|
||||||
|
# can be useful for both `dev` and `test`
|
||||||
|
# explicitly set the list to avoid issues with torch 2.2
|
||||||
|
# see https://github.com/pytorch/pytorch/pull/123243
|
||||||
|
ARG torch_cuda_arch_list='7.0 7.5 8.0 8.6 8.9 9.0+PTX'
|
||||||
|
ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
|
||||||
#################### BASE BUILD IMAGE ####################
|
#################### BASE BUILD IMAGE ####################
|
||||||
|
|
||||||
|
|
||||||
@ -47,9 +54,6 @@ COPY requirements.txt requirements.txt
|
|||||||
COPY pyproject.toml pyproject.toml
|
COPY pyproject.toml pyproject.toml
|
||||||
COPY vllm/__init__.py vllm/__init__.py
|
COPY vllm/__init__.py vllm/__init__.py
|
||||||
|
|
||||||
# cuda arch list used by torch
|
|
||||||
ARG torch_cuda_arch_list='7.0 7.5 8.0 8.6 8.9 9.0+PTX'
|
|
||||||
ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
|
|
||||||
# max jobs used by Ninja to build extensions
|
# max jobs used by Ninja to build extensions
|
||||||
ARG max_jobs=2
|
ARG max_jobs=2
|
||||||
ENV MAX_JOBS=${max_jobs}
|
ENV MAX_JOBS=${max_jobs}
|
||||||
|
|||||||
@ -5,7 +5,7 @@ requires = [
|
|||||||
"ninja",
|
"ninja",
|
||||||
"packaging",
|
"packaging",
|
||||||
"setuptools >= 49.4.0",
|
"setuptools >= 49.4.0",
|
||||||
"torch == 2.1.2",
|
"torch == 2.2.1",
|
||||||
"wheel",
|
"wheel",
|
||||||
]
|
]
|
||||||
build-backend = "setuptools.build_meta"
|
build-backend = "setuptools.build_meta"
|
||||||
|
|||||||
@ -3,5 +3,5 @@ cmake>=3.21
|
|||||||
ninja
|
ninja
|
||||||
packaging
|
packaging
|
||||||
setuptools>=49.4.0
|
setuptools>=49.4.0
|
||||||
torch==2.1.2
|
torch==2.2.1
|
||||||
wheel
|
wheel
|
||||||
|
|||||||
@ -4,11 +4,11 @@ psutil
|
|||||||
ray >= 2.9
|
ray >= 2.9
|
||||||
sentencepiece # Required for LLaMA tokenizer.
|
sentencepiece # Required for LLaMA tokenizer.
|
||||||
numpy
|
numpy
|
||||||
torch == 2.1.2
|
torch == 2.2.1
|
||||||
requests
|
requests
|
||||||
py-cpuinfo
|
py-cpuinfo
|
||||||
transformers >= 4.39.1 # Required for StarCoder2 & Llava.
|
transformers >= 4.39.1 # Required for StarCoder2 & Llava.
|
||||||
xformers == 0.0.23.post1 # Required for CUDA 12.1.
|
xformers == 0.0.25 # Requires PyTorch 2.2.1.
|
||||||
fastapi
|
fastapi
|
||||||
uvicorn[standard]
|
uvicorn[standard]
|
||||||
pydantic >= 2.0 # Required for OpenAI server.
|
pydantic >= 2.0 # Required for OpenAI server.
|
||||||
@ -17,3 +17,4 @@ pynvml == 11.5.0
|
|||||||
triton >= 2.1.0
|
triton >= 2.1.0
|
||||||
outlines == 0.0.34
|
outlines == 0.0.34
|
||||||
tiktoken == 0.6.0 # Required for DBRX tokenizer
|
tiktoken == 0.6.0 # Required for DBRX tokenizer
|
||||||
|
vllm-nccl-cu12>=2.18<2.19 # for downloading nccl library
|
||||||
|
|||||||
10
setup.py
10
setup.py
@ -328,6 +328,16 @@ def get_requirements() -> List[str]:
|
|||||||
if _is_cuda():
|
if _is_cuda():
|
||||||
with open(get_path("requirements.txt")) as f:
|
with open(get_path("requirements.txt")) as f:
|
||||||
requirements = f.read().strip().split("\n")
|
requirements = f.read().strip().split("\n")
|
||||||
|
cuda_major = torch.version.cuda.split(".")[0]
|
||||||
|
modified_requirements = []
|
||||||
|
for req in requirements:
|
||||||
|
if "vllm-nccl-cu12" in req:
|
||||||
|
modified_requirements.append(
|
||||||
|
req.replace("vllm-nccl-cu12",
|
||||||
|
f"vllm-nccl-cu{cuda_major}"))
|
||||||
|
else:
|
||||||
|
modified_requirements.append(req)
|
||||||
|
requirements = modified_requirements
|
||||||
elif _is_hip():
|
elif _is_hip():
|
||||||
with open(get_path("requirements-rocm.txt")) as f:
|
with open(get_path("requirements-rocm.txt")) as f:
|
||||||
requirements = f.read().strip().split("\n")
|
requirements = f.read().strip().split("\n")
|
||||||
|
|||||||
@ -21,6 +21,7 @@
|
|||||||
|
|
||||||
import ctypes
|
import ctypes
|
||||||
import datetime
|
import datetime
|
||||||
|
import glob
|
||||||
import os
|
import os
|
||||||
|
|
||||||
# ===================== import region =====================
|
# ===================== import region =====================
|
||||||
@ -34,18 +35,27 @@ logger = init_logger(__name__)
|
|||||||
|
|
||||||
so_file = os.environ.get("VLLM_NCCL_SO_PATH", "")
|
so_file = os.environ.get("VLLM_NCCL_SO_PATH", "")
|
||||||
|
|
||||||
|
# check if we have vllm-managed nccl
|
||||||
|
vllm_nccl_path = None
|
||||||
|
if torch.version.cuda is not None:
|
||||||
|
cuda_major = torch.version.cuda.split(".")[0]
|
||||||
|
path = os.path.expanduser(
|
||||||
|
f"~/.config/vllm/nccl/cu{cuda_major}/libnccl.so.*")
|
||||||
|
files = glob.glob(path)
|
||||||
|
vllm_nccl_path = files[0] if files else None
|
||||||
|
|
||||||
# manually load the nccl library
|
# manually load the nccl library
|
||||||
if so_file:
|
if so_file:
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Loading nccl from environment variable VLLM_NCCL_SO_PATH={so_file}")
|
f"Loading nccl from environment variable VLLM_NCCL_SO_PATH={so_file}")
|
||||||
else:
|
else:
|
||||||
if torch.version.cuda is not None:
|
if torch.version.cuda is not None:
|
||||||
so_file = "libnccl.so.2"
|
so_file = vllm_nccl_path or "libnccl.so.2"
|
||||||
elif torch.version.hip is not None:
|
elif torch.version.hip is not None:
|
||||||
so_file = "librccl.so.1"
|
so_file = "librccl.so.1"
|
||||||
else:
|
else:
|
||||||
raise ValueError("NCCL only supports CUDA and ROCm backends.")
|
raise ValueError("NCCL only supports CUDA and ROCm backends.")
|
||||||
logger.debug(f"Loading nccl from library {so_file}")
|
logger.info(f"Loading nccl from library {so_file}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
nccl = ctypes.CDLL(so_file)
|
nccl = ctypes.CDLL(so_file)
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user