From 10be20949350153651c86cdecb862a9ec324965a Mon Sep 17 00:00:00 2001
From: Chen LI <lcpingping@gmail.com>
Date: Tue, 15 Jul 2025 14:23:52 -0700
Subject: [PATCH] =?UTF-8?q?[Bug=20Fix]=20get=5Fdistributed=5Finit=5Fmethod?=
 =?UTF-8?q?=20should=20get=20the=20ip=20from=20get=5Fip=20i=E2=80=A6=20(#2?=
 =?UTF-8?q?0889)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Chen Li <lcpingping@gmail.com>
Co-authored-by: Russell Bryant <rbryant@redhat.com>
Signed-off-by: Russell Bryant <rbryant@redhat.com>
---
 vllm/envs.py                           |  5 +++++
 vllm/utils/__init__.py                 | 27 ++++++++++++++++++++++++++
 vllm/v1/executor/multiproc_executor.py |  8 ++++----
 3 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/vllm/envs.py b/vllm/envs.py
index 7bff6ade81512..37dd8146c060b 100644
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -139,6 +139,7 @@ if TYPE_CHECKING:
     VLLM_ROCM_QUICK_REDUCE_CAST_BF16_TO_FP16: bool = True
     VLLM_ROCM_QUICK_REDUCE_MAX_SIZE_BYTES_MB: Optional[int] = None
     VLLM_NIXL_ABORT_REQUEST_TIMEOUT: int = 120
+    VLLM_LOOPBACK_IP: str = ""
 
 
 def get_default_cache_root():
@@ -964,6 +965,10 @@ environment_variables: dict[str, Callable[[], Any]] = {
     # If set to 1, use the TRTLLM Decode Attention backend in flashinfer.
     "VLLM_USE_TRTLLM_DECODE_ATTENTION":
     lambda: os.getenv("VLLM_USE_TRTLLM_DECODE_ATTENTION", None),
+
+    # Used to force set up loopback IP
+    "VLLM_LOOPBACK_IP":
+    lambda: os.getenv("VLLM_LOOPBACK_IP", ""),
 }
 
 # --8<-- [end:env-vars-definition]
diff --git a/vllm/utils/__init__.py b/vllm/utils/__init__.py
index 0fed490a1fcb1..c18f1d12ba97f 100644
--- a/vllm/utils/__init__.py
+++ b/vllm/utils/__init__.py
@@ -813,6 +813,33 @@ def get_ip() -> str:
     return "0.0.0.0"
 
 
+def test_loopback_bind(address, family):
+    try:
+        s = socket.socket(family, socket.SOCK_DGRAM)
+        s.bind((address, 0))  # Port 0 = auto assign
+        s.close()
+        return True
+    except OSError:
+        return False
+
+
+def get_loopback_ip() -> str:
+    loopback_ip = envs.VLLM_LOOPBACK_IP
+    if loopback_ip:
+        return loopback_ip
+
+    # VLLM_LOOPBACK_IP is not set, try to get it based on network interface
+
+    if test_loopback_bind("127.0.0.1", socket.AF_INET):
+        return "127.0.0.1"
+    elif test_loopback_bind("::1", socket.AF_INET6):
+        return "::1"
+    else:
+        raise RuntimeError(
+            "Neither 127.0.0.1 nor ::1 are bound to a local interface. "
+            "Set the VLLM_LOOPBACK_IP environment variable explicitly.")
+
+
 def is_valid_ipv6_address(address: str) -> bool:
     try:
         ipaddress.IPv6Address(address)
diff --git a/vllm/v1/executor/multiproc_executor.py b/vllm/v1/executor/multiproc_executor.py
index d29da55ce8853..5960dd766c819 100644
--- a/vllm/v1/executor/multiproc_executor.py
+++ b/vllm/v1/executor/multiproc_executor.py
@@ -30,8 +30,8 @@ from vllm.distributed.device_communicators.shm_broadcast import (Handle,
 from vllm.executor.multiproc_worker_utils import (
     _add_prefix, set_multiprocessing_worker_envs)
 from vllm.logger import init_logger
-from vllm.utils import (get_distributed_init_method, get_mp_context,
-                        get_open_port)
+from vllm.utils import (get_distributed_init_method, get_loopback_ip,
+                        get_mp_context, get_open_port)
 from vllm.v1.executor.abstract import Executor, FailureCallback
 from vllm.v1.outputs import ModelRunnerOutput
 from vllm.worker.worker_base import WorkerWrapperBase
@@ -63,9 +63,9 @@ class MultiprocExecutor(Executor):
 
         # Multiprocessing-based executor does not support multi-node setting.
         # Since it only works for single node, we can use the loopback address
-        # 127.0.0.1 for communication.
+        # get_loopback_ip() for communication.
         distributed_init_method = get_distributed_init_method(
-            "127.0.0.1", get_open_port())
+            get_loopback_ip(), get_open_port())
 
         # Initialize worker and set up message queues for SchedulerOutputs
         # and ModelRunnerOutputs