From ff5a0cfa6edbe78d901527f2019c2ac0eeb68983 Mon Sep 17 00:00:00 2001 From: "rshaw@neuralmagic.com" Date: Tue, 1 Jul 2025 02:49:54 +0000 Subject: [PATCH] updated Signed-off-by: rshaw@neuralmagic.com --- pd_justfile/Justfile | 6 ++++-- .../kv_transfer/kv_connector/v1/nixl_connector.py | 3 +-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/pd_justfile/Justfile b/pd_justfile/Justfile index 5641967b4ea07..aa83ea66e1fe7 100644 --- a/pd_justfile/Justfile +++ b/pd_justfile/Justfile @@ -12,8 +12,9 @@ port PORT: prefill: + VLLM_IS_PREFILL=1 \ VLLM_NIXL_SIDE_CHANNEL_PORT=5557 \ - CUDA_VISIBLE_DEVICES=0 \ + CUDA_VISIBLE_DEVICES=7 \ vllm serve {{MODEL}} \ --port $(just port 8100) \ --tensor-parallel-size 1 \ @@ -23,8 +24,9 @@ prefill: --kv-transfer-config '{"kv_connector":"NixlConnector","kv_role":"kv_both"}' decode: + VLLM_IS_PREFILL=0 \ VLLM_NIXL_SIDE_CHANNEL_PORT=$(just port 5559) \ - CUDA_VISIBLE_DEVICES=1 \ + CUDA_VISIBLE_DEVICES=6 \ vllm serve {{MODEL}} \ --port $(just port 8300) \ --tensor-parallel-size 1 \ diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py index 54b867e9fead6..fba9d15ef9a58 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py @@ -7,7 +7,6 @@ import time import uuid from collections import defaultdict from collections.abc import Iterator -from concurrent.futures import ThreadPoolExecutor from dataclasses import dataclass from typing import TYPE_CHECKING, Any, Optional @@ -335,7 +334,7 @@ class NixlConnectorWorker: # Agent. import os - num_workers = 32 + num_workers = 64 # setting num_workers on the prefiller causes no notifs to be recved??? # this is a hack to make sure we set num workers on the prefiller to 1. if os.getenv("VLLM_IS_PREFILL", "0") == "1":