diff --git a/vllm/distributed/parallel_state.py b/vllm/distributed/parallel_state.py
index 1bb0ca79cc1da..10a2ce3e05464 100644
--- a/vllm/distributed/parallel_state.py
+++ b/vllm/distributed/parallel_state.py
@@ -222,10 +222,13 @@ class GroupCoordinator:
 
         for ranks in group_ranks:
             device_group = torch.distributed.new_group(
-                ranks, backend=torch_distributed_backend)
+                ranks, backend=torch_distributed_backend,
+                timeout=envs.VLLM_DISTRIBUTED_INIT_TIMEOUT_SECONDS)
             # a group with `gloo` backend, to allow direct coordination between
             # processes through the CPU.
-            cpu_group = torch.distributed.new_group(ranks, backend="gloo")
+            cpu_group = torch.distributed.new_group(ranks, 
+                                                    backend="gloo",
+                                                    timeout=envs.VLLM_DISTRIBUTED_INIT_TIMEOUT_SECONDS)
             if self.rank in ranks:
                 self.ranks = ranks
                 self.world_size = len(ranks)
@@ -965,7 +968,8 @@ def init_distributed_environment(
             backend=backend,
             init_method=distributed_init_method,
             world_size=world_size,
-            rank=rank)
+            rank=rank,
+            timeout=envs.VLLM_DISTRIBUTED_INIT_TIMEOUT_SECONDS)
     # set the local rank
     # local_rank is not available in torch ProcessGroup,
     # see https://github.com/pytorch/pytorch/issues/122816
diff --git a/vllm/envs.py b/vllm/envs.py
index 261cc7855b705..821a86291f42a 100755
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -140,6 +140,7 @@ if TYPE_CHECKING:
     VLLM_NIXL_ABORT_REQUEST_TIMEOUT: int = 120
     VLLM_USE_CUDNN_PREFILL: bool = False
     VLLM_LOOPBACK_IP: str = ""
+    VLLM_DISTRIBUTED_INIT_TIMEOUT_SECONDS: Optional[int] = None
 
 
 def get_default_cache_root():
@@ -505,6 +506,10 @@ environment_variables: dict[str, Callable[[], Any]] = {
     "VLLM_IMAGE_FETCH_TIMEOUT":
     lambda: int(os.getenv("VLLM_IMAGE_FETCH_TIMEOUT", "5")),
 
+    # Timeout for torch distributed calls
+    "VLLM_DISTRIBUTED_INIT_TIMEOUT_SECONDS": 
+    lambda: maybe_convert_int(os.getenv("VLLM_IMAGE_FETCH_TIMEOUT", None)),
+
     # Timeout for fetching videos when serving multimodal models
     # Default is 30 seconds
     "VLLM_VIDEO_FETCH_TIMEOUT":