[Bugfix][P/D] Reduce num_threads used by nixl ucx backend (#27196)

Signed-off-by: David Whyte-Gray <40244437+dagrayvid@users.noreply.github.com>
This commit is contained in:
David Whyte-Gray 2025-10-21 13:41:52 -04:00 committed by GitHub
parent 86ed77022d
commit ddeec11ba9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -607,15 +607,25 @@ class NixlConnectorWorker:
# TODO temporary, once nixl allows for telemetry flag in config
# (next release), we can remove this env var.
os.environ["NIXL_TELEMETRY_ENABLE"] = "1"
# Agent.
non_ucx_backends = [b for b in self.nixl_backends if b != "UCX"]
# Configure NIXL num_threads to avoid UAR exhaustion on Mellanox NICs.
# Each UCX thread allocates UARs (doorbell pages) via DevX, and
# excessive NIXL UAR usage can exhaust NIC UAR space. This can cause
# components like NVSHMEM (used by DeepEP kernels) to fail during RDMA
# initialization with "mlx5dv_devx_alloc_uar" errors.
# Ref: https://network.nvidia.com/files/doc-2020/ethernet-adapters-programming-manual.pdf#page=63
num_threads = vllm_config.kv_transfer_config.get_from_extra_config(
"num_threads", 4
)
if nixl_agent_config is None:
config = None
else:
config = (
nixl_agent_config(backends=self.nixl_backends)
if len(non_ucx_backends) > 0
else nixl_agent_config(num_threads=8)
else nixl_agent_config(num_threads=num_threads)
)
self.nixl_wrapper = NixlWrapper(str(uuid.uuid4()), config)