mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-03 08:01:50 +08:00
332 lines
9.8 KiB
Python
332 lines
9.8 KiB
Python
# SPDX-License-Identifier: Apache-2.0
|
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
|
import contextlib
|
|
import ipaddress
|
|
import os
|
|
import socket
|
|
import sys
|
|
import warnings
|
|
from collections.abc import (
|
|
Iterator,
|
|
Sequence,
|
|
)
|
|
from typing import Any
|
|
from urllib.parse import urlparse
|
|
from uuid import uuid4
|
|
|
|
import psutil
|
|
import zmq
|
|
import zmq.asyncio
|
|
|
|
import vllm.envs as envs
|
|
from vllm.logger import init_logger
|
|
|
|
logger = init_logger(__name__)
|
|
|
|
|
|
def close_sockets(sockets: Sequence[zmq.Socket | zmq.asyncio.Socket]):
|
|
for sock in sockets:
|
|
if sock is not None:
|
|
sock.close(linger=0)
|
|
|
|
|
|
def get_ip() -> str:
|
|
host_ip = envs.VLLM_HOST_IP
|
|
if "HOST_IP" in os.environ and "VLLM_HOST_IP" not in os.environ:
|
|
logger.warning(
|
|
"The environment variable HOST_IP is deprecated and ignored, as"
|
|
" it is often used by Docker and other software to"
|
|
" interact with the container's network stack. Please "
|
|
"use VLLM_HOST_IP instead to set the IP address for vLLM processes"
|
|
" to communicate with each other."
|
|
)
|
|
if host_ip:
|
|
return host_ip
|
|
|
|
# IP is not set, try to get it from the network interface
|
|
|
|
# try ipv4
|
|
try:
|
|
with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as s:
|
|
s.connect(("8.8.8.8", 80)) # Doesn't need to be reachable
|
|
return s.getsockname()[0]
|
|
except Exception:
|
|
pass
|
|
|
|
# try ipv6
|
|
try:
|
|
with socket.socket(socket.AF_INET6, socket.SOCK_DGRAM) as s:
|
|
# Google's public DNS server, see
|
|
# https://developers.google.com/speed/public-dns/docs/using#addresses
|
|
s.connect(("2001:4860:4860::8888", 80)) # Doesn't need to be reachable
|
|
return s.getsockname()[0]
|
|
except Exception:
|
|
pass
|
|
|
|
warnings.warn(
|
|
"Failed to get the IP address, using 0.0.0.0 by default."
|
|
"The value can be set by the environment variable"
|
|
" VLLM_HOST_IP or HOST_IP.",
|
|
stacklevel=2,
|
|
)
|
|
return "0.0.0.0"
|
|
|
|
|
|
def test_loopback_bind(address: str, family: int) -> bool:
|
|
try:
|
|
s = socket.socket(family, socket.SOCK_DGRAM)
|
|
s.bind((address, 0)) # Port 0 = auto assign
|
|
s.close()
|
|
return True
|
|
except OSError:
|
|
return False
|
|
|
|
|
|
def get_loopback_ip() -> str:
|
|
loopback_ip = envs.VLLM_LOOPBACK_IP
|
|
if loopback_ip:
|
|
return loopback_ip
|
|
|
|
# VLLM_LOOPBACK_IP is not set, try to get it based on network interface
|
|
|
|
if test_loopback_bind("127.0.0.1", socket.AF_INET):
|
|
return "127.0.0.1"
|
|
elif test_loopback_bind("::1", socket.AF_INET6):
|
|
return "::1"
|
|
else:
|
|
raise RuntimeError(
|
|
"Neither 127.0.0.1 nor ::1 are bound to a local interface. "
|
|
"Set the VLLM_LOOPBACK_IP environment variable explicitly."
|
|
)
|
|
|
|
|
|
def is_valid_ipv6_address(address: str) -> bool:
|
|
try:
|
|
ipaddress.IPv6Address(address)
|
|
return True
|
|
except ValueError:
|
|
return False
|
|
|
|
|
|
def split_host_port(host_port: str) -> tuple[str, int]:
|
|
# ipv6
|
|
if host_port.startswith("["):
|
|
host, port = host_port.rsplit("]", 1)
|
|
host = host[1:]
|
|
port = port.split(":")[1]
|
|
return host, int(port)
|
|
else:
|
|
host, port = host_port.split(":")
|
|
return host, int(port)
|
|
|
|
|
|
def join_host_port(host: str, port: int) -> str:
|
|
if is_valid_ipv6_address(host):
|
|
return f"[{host}]:{port}"
|
|
else:
|
|
return f"{host}:{port}"
|
|
|
|
|
|
def get_distributed_init_method(ip: str, port: int) -> str:
|
|
return get_tcp_uri(ip, port)
|
|
|
|
|
|
def get_tcp_uri(ip: str, port: int) -> str:
|
|
if is_valid_ipv6_address(ip):
|
|
return f"tcp://[{ip}]:{port}"
|
|
else:
|
|
return f"tcp://{ip}:{port}"
|
|
|
|
|
|
def get_open_zmq_ipc_path() -> str:
|
|
base_rpc_path = envs.VLLM_RPC_BASE_PATH
|
|
return f"ipc://{base_rpc_path}/{uuid4()}"
|
|
|
|
|
|
def get_open_zmq_inproc_path() -> str:
|
|
return f"inproc://{uuid4()}"
|
|
|
|
|
|
def get_open_port() -> int:
|
|
"""
|
|
Get an open port for the vLLM process to listen on.
|
|
An edge case to handle, is when we run data parallel,
|
|
we need to avoid ports that are potentially used by
|
|
the data parallel master process.
|
|
Right now we reserve 10 ports for the data parallel master
|
|
process. Currently it uses 2 ports.
|
|
"""
|
|
if "VLLM_DP_MASTER_PORT" in os.environ:
|
|
dp_master_port = envs.VLLM_DP_MASTER_PORT
|
|
reserved_port_range = range(dp_master_port, dp_master_port + 10)
|
|
while True:
|
|
candidate_port = _get_open_port()
|
|
if candidate_port not in reserved_port_range:
|
|
return candidate_port
|
|
return _get_open_port()
|
|
|
|
|
|
def get_open_ports_list(count: int = 5) -> list[int]:
|
|
"""Get a list of open ports."""
|
|
ports = set[int]()
|
|
while len(ports) < count:
|
|
ports.add(get_open_port())
|
|
return list(ports)
|
|
|
|
|
|
def _get_open_port() -> int:
|
|
port = envs.VLLM_PORT
|
|
if port is not None:
|
|
while True:
|
|
try:
|
|
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
|
s.bind(("", port))
|
|
return port
|
|
except OSError:
|
|
port += 1 # Increment port number if already in use
|
|
logger.info("Port %d is already in use, trying port %d", port - 1, port)
|
|
# try ipv4
|
|
try:
|
|
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
|
s.bind(("", 0))
|
|
return s.getsockname()[1]
|
|
except OSError:
|
|
# try ipv6
|
|
with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s:
|
|
s.bind(("", 0))
|
|
return s.getsockname()[1]
|
|
|
|
|
|
def find_process_using_port(port: int) -> psutil.Process | None:
|
|
# TODO: We can not check for running processes with network
|
|
# port on macOS. Therefore, we can not have a full graceful shutdown
|
|
# of vLLM. For now, let's not look for processes in this case.
|
|
# Ref: https://www.florianreinhard.de/accessdenied-in-psutil/
|
|
if sys.platform.startswith("darwin"):
|
|
return None
|
|
|
|
our_pid = os.getpid()
|
|
for conn in psutil.net_connections():
|
|
if conn.laddr.port == port and (conn.pid is not None and conn.pid != our_pid):
|
|
try:
|
|
return psutil.Process(conn.pid)
|
|
except psutil.NoSuchProcess:
|
|
return None
|
|
return None
|
|
|
|
|
|
def split_zmq_path(path: str) -> tuple[str, str, str]:
|
|
"""Split a zmq path into its parts."""
|
|
parsed = urlparse(path)
|
|
if not parsed.scheme:
|
|
raise ValueError(f"Invalid zmq path: {path}")
|
|
|
|
scheme = parsed.scheme
|
|
host = parsed.hostname or ""
|
|
port = str(parsed.port or "")
|
|
|
|
if scheme == "tcp" and not all((host, port)):
|
|
# The host and port fields are required for tcp
|
|
raise ValueError(f"Invalid zmq path: {path}")
|
|
|
|
if scheme != "tcp" and port:
|
|
# port only makes sense with tcp
|
|
raise ValueError(f"Invalid zmq path: {path}")
|
|
|
|
return scheme, host, port
|
|
|
|
|
|
def make_zmq_path(scheme: str, host: str, port: int | None = None) -> str:
|
|
"""Make a ZMQ path from its parts.
|
|
|
|
Args:
|
|
scheme: The ZMQ transport scheme (e.g. tcp, ipc, inproc).
|
|
host: The host - can be an IPv4 address, IPv6 address, or hostname.
|
|
port: Optional port number, only used for TCP sockets.
|
|
|
|
Returns:
|
|
A properly formatted ZMQ path string.
|
|
"""
|
|
if port is None:
|
|
return f"{scheme}://{host}"
|
|
if is_valid_ipv6_address(host):
|
|
return f"{scheme}://[{host}]:{port}"
|
|
return f"{scheme}://{host}:{port}"
|
|
|
|
|
|
# Adapted from: https://github.com/sgl-project/sglang/blob/v0.4.1/python/sglang/srt/utils.py#L783 # noqa: E501
|
|
def make_zmq_socket(
|
|
ctx: zmq.asyncio.Context | zmq.Context, # type: ignore[name-defined]
|
|
path: str,
|
|
socket_type: Any,
|
|
bind: bool | None = None,
|
|
identity: bytes | None = None,
|
|
linger: int | None = None,
|
|
) -> zmq.Socket | zmq.asyncio.Socket: # type: ignore[name-defined]
|
|
"""Make a ZMQ socket with the proper bind/connect semantics."""
|
|
|
|
mem = psutil.virtual_memory()
|
|
socket = ctx.socket(socket_type)
|
|
|
|
# Calculate buffer size based on system memory
|
|
total_mem = mem.total / 1024**3
|
|
available_mem = mem.available / 1024**3
|
|
# For systems with substantial memory (>32GB total, >16GB available):
|
|
# - Set a large 0.5GB buffer to improve throughput
|
|
# For systems with less memory:
|
|
# - Use system default (-1) to avoid excessive memory consumption
|
|
buf_size = int(0.5 * 1024**3) if total_mem > 32 and available_mem > 16 else -1
|
|
|
|
if bind is None:
|
|
bind = socket_type not in (zmq.PUSH, zmq.SUB, zmq.XSUB)
|
|
|
|
if socket_type in (zmq.PULL, zmq.DEALER, zmq.ROUTER):
|
|
socket.setsockopt(zmq.RCVHWM, 0)
|
|
socket.setsockopt(zmq.RCVBUF, buf_size)
|
|
|
|
if socket_type in (zmq.PUSH, zmq.DEALER, zmq.ROUTER):
|
|
socket.setsockopt(zmq.SNDHWM, 0)
|
|
socket.setsockopt(zmq.SNDBUF, buf_size)
|
|
|
|
if identity is not None:
|
|
socket.setsockopt(zmq.IDENTITY, identity)
|
|
|
|
if linger is not None:
|
|
socket.setsockopt(zmq.LINGER, linger)
|
|
|
|
if socket_type == zmq.XPUB:
|
|
socket.setsockopt(zmq.XPUB_VERBOSE, True)
|
|
|
|
# Determine if the path is a TCP socket with an IPv6 address.
|
|
# Enable IPv6 on the zmq socket if so.
|
|
scheme, host, _ = split_zmq_path(path)
|
|
if scheme == "tcp" and is_valid_ipv6_address(host):
|
|
socket.setsockopt(zmq.IPV6, 1)
|
|
|
|
if bind:
|
|
socket.bind(path)
|
|
else:
|
|
socket.connect(path)
|
|
|
|
return socket
|
|
|
|
|
|
@contextlib.contextmanager
|
|
def zmq_socket_ctx(
|
|
path: str,
|
|
socket_type: Any,
|
|
bind: bool | None = None,
|
|
linger: int = 0,
|
|
identity: bytes | None = None,
|
|
) -> Iterator[zmq.Socket]:
|
|
"""Context manager for a ZMQ socket"""
|
|
|
|
ctx = zmq.Context() # type: ignore[attr-defined]
|
|
try:
|
|
yield make_zmq_socket(ctx, path, socket_type, bind=bind, identity=identity)
|
|
except KeyboardInterrupt:
|
|
logger.debug("Got Keyboard Interrupt.")
|
|
|
|
finally:
|
|
ctx.destroy(linger=linger)
|