mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-16 10:16:23 +08:00
Offload port selection to OS (#467)
This commit is contained in:
parent
96853af5a8
commit
6d7d95a70a
@ -1,4 +1,4 @@
|
||||
import random
|
||||
import socket
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
try:
|
||||
@ -12,6 +12,12 @@ from vllm.config import ParallelConfig
|
||||
DeviceID = Tuple[int, Optional[str], int]
|
||||
|
||||
|
||||
def get_open_port():
|
||||
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
||||
s.bind(("", 0))
|
||||
return s.getsockname()[1]
|
||||
|
||||
|
||||
def initialize_cluster(
|
||||
parallel_config: ParallelConfig,
|
||||
engine_use_ray: bool = False,
|
||||
@ -42,7 +48,7 @@ def initialize_cluster(
|
||||
|
||||
if not parallel_config.worker_use_ray:
|
||||
# Initialize cluster locally.
|
||||
port = random.randint(10000, 20000)
|
||||
port = get_open_port()
|
||||
# We need to setup the distributed init method to make sure
|
||||
# the distributed megatron code (e.g., get world size) works correctly.
|
||||
distributed_init_method = f"tcp://localhost:{port}"
|
||||
@ -96,7 +102,7 @@ def initialize_cluster(
|
||||
stage_devices.append((rank, node_resource, current_device_id))
|
||||
if distributed_init_method is None:
|
||||
ip = node_resource.split("node:")[-1]
|
||||
port = random.randint(10000, 20000)
|
||||
port = get_open_port()
|
||||
distributed_init_method = f"tcp://{ip}:{port}"
|
||||
rank += 1
|
||||
current_device_id += 1
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user