mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-18 07:15:01 +08:00
Offload port selection to OS (#467)
This commit is contained in:
parent
96853af5a8
commit
6d7d95a70a
@ -1,4 +1,4 @@
|
|||||||
import random
|
import socket
|
||||||
from typing import List, Optional, Tuple
|
from typing import List, Optional, Tuple
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -12,6 +12,12 @@ from vllm.config import ParallelConfig
|
|||||||
DeviceID = Tuple[int, Optional[str], int]
|
DeviceID = Tuple[int, Optional[str], int]
|
||||||
|
|
||||||
|
|
||||||
|
def get_open_port():
|
||||||
|
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
||||||
|
s.bind(("", 0))
|
||||||
|
return s.getsockname()[1]
|
||||||
|
|
||||||
|
|
||||||
def initialize_cluster(
|
def initialize_cluster(
|
||||||
parallel_config: ParallelConfig,
|
parallel_config: ParallelConfig,
|
||||||
engine_use_ray: bool = False,
|
engine_use_ray: bool = False,
|
||||||
@ -42,7 +48,7 @@ def initialize_cluster(
|
|||||||
|
|
||||||
if not parallel_config.worker_use_ray:
|
if not parallel_config.worker_use_ray:
|
||||||
# Initialize cluster locally.
|
# Initialize cluster locally.
|
||||||
port = random.randint(10000, 20000)
|
port = get_open_port()
|
||||||
# We need to setup the distributed init method to make sure
|
# We need to setup the distributed init method to make sure
|
||||||
# the distributed megatron code (e.g., get world size) works correctly.
|
# the distributed megatron code (e.g., get world size) works correctly.
|
||||||
distributed_init_method = f"tcp://localhost:{port}"
|
distributed_init_method = f"tcp://localhost:{port}"
|
||||||
@ -96,7 +102,7 @@ def initialize_cluster(
|
|||||||
stage_devices.append((rank, node_resource, current_device_id))
|
stage_devices.append((rank, node_resource, current_device_id))
|
||||||
if distributed_init_method is None:
|
if distributed_init_method is None:
|
||||||
ip = node_resource.split("node:")[-1]
|
ip = node_resource.split("node:")[-1]
|
||||||
port = random.randint(10000, 20000)
|
port = get_open_port()
|
||||||
distributed_init_method = f"tcp://{ip}:{port}"
|
distributed_init_method = f"tcp://{ip}:{port}"
|
||||||
rank += 1
|
rank += 1
|
||||||
current_device_id += 1
|
current_device_id += 1
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user