updated

Signed-off-by: rshaw@neuralmagic.com <robertgshaw2@gmail.com>
2026-06-28 02:17:12 +08:00 · 2025-06-19 13:16:50 +00:00 · 2025-06-19 13:16:50 +00:00 · 852ee4b132
commit 852ee4b132
parent 87bf6812b2
1 changed files with 4 additions and 21 deletions
--- a/pd_justfile/Justfile
+++ b/pd_justfile/Justfile
@ -13,20 +13,10 @@ port PORT:

 prefill:
    VLLM_NIXL_SIDE_CHANNEL_PORT=$(just port 5557) \
-    CUDA_VISIBLE_DEVICES=0,1 \
+    CUDA_VISIBLE_DEVICES=0 \
    vllm serve {{MODEL}} \
      --port $(just port 8100) \
-      --tensor-parallel-size 2 \
-      --enforce-eager \
-      --disable-log-requests \
-      --block-size 128 \
-      --kv-transfer-config '{"kv_connector":"NixlConnector","kv_role":"kv_both"}'
-
-prefill_b:
-    VLLM_NIXL_SIDE_CHANNEL_PORT=$(just port 5558) \
-    CUDA_VISIBLE_DEVICES=6 \
-    vllm serve {{MODEL}} \
-      --port $(just port 8200) \
+      --tensor-parallel-size 1 \
      --enforce-eager \
      --disable-log-requests \
      --block-size 128 \
@ -34,22 +24,15 @@ prefill_b:

 decode:
    VLLM_NIXL_SIDE_CHANNEL_PORT=$(just port 5559) \
-    CUDA_VISIBLE_DEVICES=2,3,4,5 \
+    CUDA_VISIBLE_DEVICES=1 \
    vllm serve {{MODEL}} \
      --port $(just port 8300) \
-      --tensor-parallel-size 2 \
+      --tensor-parallel-size 1 \
      --enforce-eager \
      --disable-log-requests \
      --block-size 128 \
      --kv-transfer-config '{"kv_connector":"NixlConnector","kv_role":"kv_both"}'

-# proxy:
-#     python "{{vllm-directory}}tests/v1/kv_connector/nixl_integration/toy_proxy_server.py" \
-#       --port $(just port 8192) \
-#       --prefiller-port $(just port 8100) $(just port 8200) \
-#       --prefiller-host localhost localhost \
-#       --decoder-port $(just port 8300)
-
 proxy:
    python "{{vllm-directory}}tests/v1/kv_connector/nixl_integration/toy_proxy_server.py" \
      --port $(just port 8192) \