From 5952d811398d3a22f30d72d2d2943787a78f66ea Mon Sep 17 00:00:00 2001 From: Russell Bryant Date: Tue, 5 Nov 2024 10:50:57 -0500 Subject: [PATCH] [Frontend] Fix tcp port reservation for api server (#10012) Signed-off-by: Russell Bryant --- vllm/entrypoints/openai/api_server.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index bef36ffd..917b347f 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -569,7 +569,8 @@ async def run_server(args, **uvicorn_kwargs) -> None: # This avoids race conditions with ray. # see https://github.com/vllm-project/vllm/issues/8204 sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - sock.bind(("", args.port)) + sock.bind((args.host or "", args.port)) + sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) def signal_handler(*_) -> None: # Interrupt server on sigterm while initializing @@ -593,13 +594,14 @@ async def run_server(args, **uvicorn_kwargs) -> None: ssl_certfile=args.ssl_certfile, ssl_ca_certs=args.ssl_ca_certs, ssl_cert_reqs=args.ssl_cert_reqs, - fd=sock.fileno(), **uvicorn_kwargs, ) # NB: Await server shutdown only after the backend context is exited await shutdown_task + sock.close() + if __name__ == "__main__": # NOTE(simon):