[Bugfix] use AF_INET6 for OpenAI Compatible Server with ipv6 (#9583)

Signed-off-by: xiaozijin <xiaozijin@bytedance.com>
This commit is contained in:
Zijin Xiao 2024-11-15 08:38:53 +08:00 committed by GitHub
parent b2e0ad3b59
commit 554af9228d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -12,7 +12,7 @@ from argparse import Namespace
from contextlib import asynccontextmanager from contextlib import asynccontextmanager
from functools import partial from functools import partial
from http import HTTPStatus from http import HTTPStatus
from typing import AsyncIterator, Optional, Set from typing import AsyncIterator, Optional, Set, Tuple
import uvloop import uvloop
from fastapi import APIRouter, FastAPI, Request from fastapi import APIRouter, FastAPI, Request
@ -57,7 +57,8 @@ from vllm.entrypoints.openai.serving_tokenization import (
from vllm.entrypoints.openai.tool_parsers import ToolParserManager from vllm.entrypoints.openai.tool_parsers import ToolParserManager
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.usage.usage_lib import UsageContext from vllm.usage.usage_lib import UsageContext
from vllm.utils import FlexibleArgumentParser, get_open_zmq_ipc_path from vllm.utils import (FlexibleArgumentParser, get_open_zmq_ipc_path,
is_valid_ipv6_address)
from vllm.version import __version__ as VLLM_VERSION from vllm.version import __version__ as VLLM_VERSION
if envs.VLLM_USE_V1: if envs.VLLM_USE_V1:
@ -568,6 +569,18 @@ def init_app_state(
) )
def create_server_socket(addr: Tuple[str, int]) -> socket.socket:
family = socket.AF_INET
if is_valid_ipv6_address(addr[0]):
family = socket.AF_INET6
sock = socket.socket(family=family, type=socket.SOCK_STREAM)
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
sock.bind(addr)
return sock
async def run_server(args, **uvicorn_kwargs) -> None: async def run_server(args, **uvicorn_kwargs) -> None:
logger.info("vLLM API server version %s", VLLM_VERSION) logger.info("vLLM API server version %s", VLLM_VERSION)
logger.info("args: %s", args) logger.info("args: %s", args)
@ -584,9 +597,8 @@ async def run_server(args, **uvicorn_kwargs) -> None:
# workaround to make sure that we bind the port before the engine is set up. # workaround to make sure that we bind the port before the engine is set up.
# This avoids race conditions with ray. # This avoids race conditions with ray.
# see https://github.com/vllm-project/vllm/issues/8204 # see https://github.com/vllm-project/vllm/issues/8204
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock_addr = (args.host or "", args.port)
sock.bind((args.host or "", args.port)) sock = create_server_socket(sock_addr)
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
def signal_handler(*_) -> None: def signal_handler(*_) -> None:
# Interrupt server on sigterm while initializing # Interrupt server on sigterm while initializing