updated

Signed-off-by: rshaw@neuralmagic.com <robertgshaw2@gmail.com>
2026-06-29 13:07:21 +08:00 · 2025-07-01 02:49:54 +00:00 · 2025-07-01 02:49:54 +00:00 · ff5a0cfa6e
commit ff5a0cfa6e
parent 56939c835d
2 changed files with 5 additions and 4 deletions
--- a/pd_justfile/Justfile
+++ b/pd_justfile/Justfile
@ -12,8 +12,9 @@ port PORT:


 prefill:
+    VLLM_IS_PREFILL=1 \
    VLLM_NIXL_SIDE_CHANNEL_PORT=5557 \
-    CUDA_VISIBLE_DEVICES=0 \
+    CUDA_VISIBLE_DEVICES=7 \
    vllm serve {{MODEL}} \
      --port $(just port 8100) \
      --tensor-parallel-size 1 \
@ -23,8 +24,9 @@ prefill:
      --kv-transfer-config '{"kv_connector":"NixlConnector","kv_role":"kv_both"}'

 decode:
+    VLLM_IS_PREFILL=0 \
    VLLM_NIXL_SIDE_CHANNEL_PORT=$(just port 5559) \
-    CUDA_VISIBLE_DEVICES=1 \
+    CUDA_VISIBLE_DEVICES=6 \
    vllm serve {{MODEL}} \
      --port $(just port 8300) \
      --tensor-parallel-size 1 \
--- a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
@ -7,7 +7,6 @@ import time
 import uuid
 from collections import defaultdict
 from collections.abc import Iterator
-from concurrent.futures import ThreadPoolExecutor
 from dataclasses import dataclass
 from typing import TYPE_CHECKING, Any, Optional

@ -335,7 +334,7 @@ class NixlConnectorWorker:

        # Agent.
        import os
-        num_workers = 32
+        num_workers = 64
        # setting num_workers on the prefiller causes no notifs to be recved???
        # this is a hack to make sure we set num workers on the prefiller to 1.
        if os.getenv("VLLM_IS_PREFILL", "0") == "1":