diff --git a/vllm/engine/ray_utils.py b/vllm/engine/ray_utils.py
index 66ca2b725efa..01faf0d59035 100644
--- a/vllm/engine/ray_utils.py
+++ b/vllm/engine/ray_utils.py
@@ -1,4 +1,4 @@
-import random
+import socket
 from typing import List, Optional, Tuple
 
 try:
@@ -12,6 +12,12 @@ from vllm.config import ParallelConfig
 DeviceID = Tuple[int, Optional[str], int]
 
 
+def get_open_port():
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+        s.bind(("", 0))
+        return s.getsockname()[1]
+
+
 def initialize_cluster(
     parallel_config: ParallelConfig,
     engine_use_ray: bool = False,
@@ -42,7 +48,7 @@ def initialize_cluster(
 
     if not parallel_config.worker_use_ray:
         # Initialize cluster locally.
-        port = random.randint(10000, 20000)
+        port = get_open_port()
         # We need to setup the distributed init method to make sure
         # the distributed megatron code (e.g., get world size) works correctly.
         distributed_init_method = f"tcp://localhost:{port}"
@@ -96,7 +102,7 @@ def initialize_cluster(
             stage_devices.append((rank, node_resource, current_device_id))
             if distributed_init_method is None:
                 ip = node_resource.split("node:")[-1]
-                port = random.randint(10000, 20000)
+                port = get_open_port()
                 distributed_init_method = f"tcp://{ip}:{port}"
             rank += 1
             current_device_id += 1