mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 08:04:58 +08:00
[Bugfix] Bind api server port before starting engine (#8491)
This commit is contained in:
parent
2759a43a26
commit
47f5e03b5b
@ -5,6 +5,7 @@ import multiprocessing
|
||||
import os
|
||||
import re
|
||||
import signal
|
||||
import socket
|
||||
import tempfile
|
||||
from argparse import Namespace
|
||||
from contextlib import asynccontextmanager
|
||||
@ -525,6 +526,9 @@ async def run_server(args, **uvicorn_kwargs) -> None:
|
||||
logger.info("vLLM API server version %s", VLLM_VERSION)
|
||||
logger.info("args: %s", args)
|
||||
|
||||
temp_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
temp_socket.bind(("", args.port))
|
||||
|
||||
def signal_handler(*_) -> None:
|
||||
# Interrupt server on sigterm while initializing
|
||||
raise KeyboardInterrupt("terminated")
|
||||
@ -541,6 +545,8 @@ async def run_server(args, **uvicorn_kwargs) -> None:
|
||||
model_config = await async_engine_client.get_model_config()
|
||||
init_app_state(async_engine_client, model_config, app.state, args)
|
||||
|
||||
temp_socket.close()
|
||||
|
||||
shutdown_task = await serve_http(
|
||||
app,
|
||||
limit_concurrency=async_engine_client.limit_concurrency,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user