From 10be20949350153651c86cdecb862a9ec324965a Mon Sep 17 00:00:00 2001 From: Chen LI Date: Tue, 15 Jul 2025 14:23:52 -0700 Subject: [PATCH] =?UTF-8?q?[Bug=20Fix]=20get=5Fdistributed=5Finit=5Fmethod?= =?UTF-8?q?=20should=20get=20the=20ip=20from=20get=5Fip=20i=E2=80=A6=20(#2?= =?UTF-8?q?0889)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Chen Li Co-authored-by: Russell Bryant Signed-off-by: Russell Bryant --- vllm/envs.py | 5 +++++ vllm/utils/__init__.py | 27 ++++++++++++++++++++++++++ vllm/v1/executor/multiproc_executor.py | 8 ++++---- 3 files changed, 36 insertions(+), 4 deletions(-) diff --git a/vllm/envs.py b/vllm/envs.py index 7bff6ade81512..37dd8146c060b 100644 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -139,6 +139,7 @@ if TYPE_CHECKING: VLLM_ROCM_QUICK_REDUCE_CAST_BF16_TO_FP16: bool = True VLLM_ROCM_QUICK_REDUCE_MAX_SIZE_BYTES_MB: Optional[int] = None VLLM_NIXL_ABORT_REQUEST_TIMEOUT: int = 120 + VLLM_LOOPBACK_IP: str = "" def get_default_cache_root(): @@ -964,6 +965,10 @@ environment_variables: dict[str, Callable[[], Any]] = { # If set to 1, use the TRTLLM Decode Attention backend in flashinfer. "VLLM_USE_TRTLLM_DECODE_ATTENTION": lambda: os.getenv("VLLM_USE_TRTLLM_DECODE_ATTENTION", None), + + # Used to force set up loopback IP + "VLLM_LOOPBACK_IP": + lambda: os.getenv("VLLM_LOOPBACK_IP", ""), } # --8<-- [end:env-vars-definition] diff --git a/vllm/utils/__init__.py b/vllm/utils/__init__.py index 0fed490a1fcb1..c18f1d12ba97f 100644 --- a/vllm/utils/__init__.py +++ b/vllm/utils/__init__.py @@ -813,6 +813,33 @@ def get_ip() -> str: return "0.0.0.0" +def test_loopback_bind(address, family): + try: + s = socket.socket(family, socket.SOCK_DGRAM) + s.bind((address, 0)) # Port 0 = auto assign + s.close() + return True + except OSError: + return False + + +def get_loopback_ip() -> str: + loopback_ip = envs.VLLM_LOOPBACK_IP + if loopback_ip: + return loopback_ip + + # VLLM_LOOPBACK_IP is not set, try to get it based on network interface + + if test_loopback_bind("127.0.0.1", socket.AF_INET): + return "127.0.0.1" + elif test_loopback_bind("::1", socket.AF_INET6): + return "::1" + else: + raise RuntimeError( + "Neither 127.0.0.1 nor ::1 are bound to a local interface. " + "Set the VLLM_LOOPBACK_IP environment variable explicitly.") + + def is_valid_ipv6_address(address: str) -> bool: try: ipaddress.IPv6Address(address) diff --git a/vllm/v1/executor/multiproc_executor.py b/vllm/v1/executor/multiproc_executor.py index d29da55ce8853..5960dd766c819 100644 --- a/vllm/v1/executor/multiproc_executor.py +++ b/vllm/v1/executor/multiproc_executor.py @@ -30,8 +30,8 @@ from vllm.distributed.device_communicators.shm_broadcast import (Handle, from vllm.executor.multiproc_worker_utils import ( _add_prefix, set_multiprocessing_worker_envs) from vllm.logger import init_logger -from vllm.utils import (get_distributed_init_method, get_mp_context, - get_open_port) +from vllm.utils import (get_distributed_init_method, get_loopback_ip, + get_mp_context, get_open_port) from vllm.v1.executor.abstract import Executor, FailureCallback from vllm.v1.outputs import ModelRunnerOutput from vllm.worker.worker_base import WorkerWrapperBase @@ -63,9 +63,9 @@ class MultiprocExecutor(Executor): # Multiprocessing-based executor does not support multi-node setting. # Since it only works for single node, we can use the loopback address - # 127.0.0.1 for communication. + # get_loopback_ip() for communication. distributed_init_method = get_distributed_init_method( - "127.0.0.1", get_open_port()) + get_loopback_ip(), get_open_port()) # Initialize worker and set up message queues for SchedulerOutputs # and ModelRunnerOutputs