[Bugfix] Bind api server port before starting engine (#8491)

This commit is contained in:
Kevin Lin 2024-09-16 15:56:28 -05:00 committed by GitHub
parent 2759a43a26
commit 47f5e03b5b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -5,6 +5,7 @@ import multiprocessing
import os import os
import re import re
import signal import signal
import socket
import tempfile import tempfile
from argparse import Namespace from argparse import Namespace
from contextlib import asynccontextmanager from contextlib import asynccontextmanager
@ -525,6 +526,9 @@ async def run_server(args, **uvicorn_kwargs) -> None:
logger.info("vLLM API server version %s", VLLM_VERSION) logger.info("vLLM API server version %s", VLLM_VERSION)
logger.info("args: %s", args) logger.info("args: %s", args)
temp_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
temp_socket.bind(("", args.port))
def signal_handler(*_) -> None: def signal_handler(*_) -> None:
# Interrupt server on sigterm while initializing # Interrupt server on sigterm while initializing
raise KeyboardInterrupt("terminated") raise KeyboardInterrupt("terminated")
@ -541,6 +545,8 @@ async def run_server(args, **uvicorn_kwargs) -> None:
model_config = await async_engine_client.get_model_config() model_config = await async_engine_client.get_model_config()
init_app_state(async_engine_client, model_config, app.state, args) init_app_state(async_engine_client, model_config, app.state, args)
temp_socket.close()
shutdown_task = await serve_http( shutdown_task = await serve_http(
app, app,
limit_concurrency=async_engine_client.limit_concurrency, limit_concurrency=async_engine_client.limit_concurrency,