mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-26 01:51:18 +08:00
[Bugfix][P/D] Reduce num_threads used by nixl ucx backend (#27196)
Signed-off-by: David Whyte-Gray <40244437+dagrayvid@users.noreply.github.com>
This commit is contained in:
parent
86ed77022d
commit
ddeec11ba9
@ -607,15 +607,25 @@ class NixlConnectorWorker:
|
|||||||
# TODO temporary, once nixl allows for telemetry flag in config
|
# TODO temporary, once nixl allows for telemetry flag in config
|
||||||
# (next release), we can remove this env var.
|
# (next release), we can remove this env var.
|
||||||
os.environ["NIXL_TELEMETRY_ENABLE"] = "1"
|
os.environ["NIXL_TELEMETRY_ENABLE"] = "1"
|
||||||
|
|
||||||
# Agent.
|
# Agent.
|
||||||
non_ucx_backends = [b for b in self.nixl_backends if b != "UCX"]
|
non_ucx_backends = [b for b in self.nixl_backends if b != "UCX"]
|
||||||
|
# Configure NIXL num_threads to avoid UAR exhaustion on Mellanox NICs.
|
||||||
|
# Each UCX thread allocates UARs (doorbell pages) via DevX, and
|
||||||
|
# excessive NIXL UAR usage can exhaust NIC UAR space. This can cause
|
||||||
|
# components like NVSHMEM (used by DeepEP kernels) to fail during RDMA
|
||||||
|
# initialization with "mlx5dv_devx_alloc_uar" errors.
|
||||||
|
# Ref: https://network.nvidia.com/files/doc-2020/ethernet-adapters-programming-manual.pdf#page=63
|
||||||
|
num_threads = vllm_config.kv_transfer_config.get_from_extra_config(
|
||||||
|
"num_threads", 4
|
||||||
|
)
|
||||||
if nixl_agent_config is None:
|
if nixl_agent_config is None:
|
||||||
config = None
|
config = None
|
||||||
else:
|
else:
|
||||||
config = (
|
config = (
|
||||||
nixl_agent_config(backends=self.nixl_backends)
|
nixl_agent_config(backends=self.nixl_backends)
|
||||||
if len(non_ucx_backends) > 0
|
if len(non_ucx_backends) > 0
|
||||||
else nixl_agent_config(num_threads=8)
|
else nixl_agent_config(num_threads=num_threads)
|
||||||
)
|
)
|
||||||
|
|
||||||
self.nixl_wrapper = NixlWrapper(str(uuid.uuid4()), config)
|
self.nixl_wrapper = NixlWrapper(str(uuid.uuid4()), config)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user