diff --git a/vllm/engine/ray_utils.py b/vllm/engine/ray_utils.py index 66ca2b725efa..01faf0d59035 100644 --- a/vllm/engine/ray_utils.py +++ b/vllm/engine/ray_utils.py @@ -1,4 +1,4 @@ -import random +import socket from typing import List, Optional, Tuple try: @@ -12,6 +12,12 @@ from vllm.config import ParallelConfig DeviceID = Tuple[int, Optional[str], int] +def get_open_port(): + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.bind(("", 0)) + return s.getsockname()[1] + + def initialize_cluster( parallel_config: ParallelConfig, engine_use_ray: bool = False, @@ -42,7 +48,7 @@ def initialize_cluster( if not parallel_config.worker_use_ray: # Initialize cluster locally. - port = random.randint(10000, 20000) + port = get_open_port() # We need to setup the distributed init method to make sure # the distributed megatron code (e.g., get world size) works correctly. distributed_init_method = f"tcp://localhost:{port}" @@ -96,7 +102,7 @@ def initialize_cluster( stage_devices.append((rank, node_resource, current_device_id)) if distributed_init_method is None: ip = node_resource.split("node:")[-1] - port = random.randint(10000, 20000) + port = get_open_port() distributed_init_method = f"tcp://{ip}:{port}" rank += 1 current_device_id += 1