[Bugfix] use AF_INET6 for OpenAI Compatible Server with ipv6 (#9583)

Signed-off-by: xiaozijin <xiaozijin@bytedance.com>
This commit is contained in:
Zijin Xiao 2024-11-15 08:38:53 +08:00 committed by GitHub
parent b2e0ad3b59
commit 554af9228d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -12,7 +12,7 @@ from argparse import Namespace
from contextlib import asynccontextmanager
from functools import partial
from http import HTTPStatus
from typing import AsyncIterator, Optional, Set
from typing import AsyncIterator, Optional, Set, Tuple
import uvloop
from fastapi import APIRouter, FastAPI, Request
@ -57,7 +57,8 @@ from vllm.entrypoints.openai.serving_tokenization import (
from vllm.entrypoints.openai.tool_parsers import ToolParserManager
from vllm.logger import init_logger
from vllm.usage.usage_lib import UsageContext
from vllm.utils import FlexibleArgumentParser, get_open_zmq_ipc_path
from vllm.utils import (FlexibleArgumentParser, get_open_zmq_ipc_path,
is_valid_ipv6_address)
from vllm.version import __version__ as VLLM_VERSION
if envs.VLLM_USE_V1:
@ -568,6 +569,18 @@ def init_app_state(
)
def create_server_socket(addr: Tuple[str, int]) -> socket.socket:
family = socket.AF_INET
if is_valid_ipv6_address(addr[0]):
family = socket.AF_INET6
sock = socket.socket(family=family, type=socket.SOCK_STREAM)
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
sock.bind(addr)
return sock
async def run_server(args, **uvicorn_kwargs) -> None:
logger.info("vLLM API server version %s", VLLM_VERSION)
logger.info("args: %s", args)
@ -584,9 +597,8 @@ async def run_server(args, **uvicorn_kwargs) -> None:
# workaround to make sure that we bind the port before the engine is set up.
# This avoids race conditions with ray.
# see https://github.com/vllm-project/vllm/issues/8204
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.bind((args.host or "", args.port))
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
sock_addr = (args.host or "", args.port)
sock = create_server_socket(sock_addr)
def signal_handler(*_) -> None:
# Interrupt server on sigterm while initializing