From 84c8b5163198e515dd8684c4e4f9df134e5b68cb Mon Sep 17 00:00:00 2001 From: Russell Bryant Date: Tue, 5 Nov 2024 10:50:57 -0500 Subject: [PATCH] [Frontend] Fix tcp port reservation for api server (#10012) Signed-off-by: Russell Bryant Signed-off-by: Tyler Michael Smith --- vllm/entrypoints/openai/api_server.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index bef36ffdbfcd3..917b347ff1161 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -569,7 +569,8 @@ async def run_server(args, **uvicorn_kwargs) -> None: # This avoids race conditions with ray. # see https://github.com/vllm-project/vllm/issues/8204 sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - sock.bind(("", args.port)) + sock.bind((args.host or "", args.port)) + sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) def signal_handler(*_) -> None: # Interrupt server on sigterm while initializing @@ -593,13 +594,14 @@ def signal_handler(*_) -> None: ssl_certfile=args.ssl_certfile, ssl_ca_certs=args.ssl_ca_certs, ssl_cert_reqs=args.ssl_cert_reqs, - fd=sock.fileno(), **uvicorn_kwargs, ) # NB: Await server shutdown only after the backend context is exited await shutdown_task + sock.close() + if __name__ == "__main__": # NOTE(simon):