From 852ee4b132164a4e8f9ddc8fbfa96796dca0248d Mon Sep 17 00:00:00 2001 From: "rshaw@neuralmagic.com" Date: Thu, 19 Jun 2025 13:16:50 +0000 Subject: [PATCH] updated Signed-off-by: rshaw@neuralmagic.com --- pd_justfile/Justfile | 25 ++++--------------------- 1 file changed, 4 insertions(+), 21 deletions(-) diff --git a/pd_justfile/Justfile b/pd_justfile/Justfile index 92f7c6ab2d4dd..92433c6604edb 100644 --- a/pd_justfile/Justfile +++ b/pd_justfile/Justfile @@ -13,20 +13,10 @@ port PORT: prefill: VLLM_NIXL_SIDE_CHANNEL_PORT=$(just port 5557) \ - CUDA_VISIBLE_DEVICES=0,1 \ + CUDA_VISIBLE_DEVICES=0 \ vllm serve {{MODEL}} \ --port $(just port 8100) \ - --tensor-parallel-size 2 \ - --enforce-eager \ - --disable-log-requests \ - --block-size 128 \ - --kv-transfer-config '{"kv_connector":"NixlConnector","kv_role":"kv_both"}' - -prefill_b: - VLLM_NIXL_SIDE_CHANNEL_PORT=$(just port 5558) \ - CUDA_VISIBLE_DEVICES=6 \ - vllm serve {{MODEL}} \ - --port $(just port 8200) \ + --tensor-parallel-size 1 \ --enforce-eager \ --disable-log-requests \ --block-size 128 \ @@ -34,22 +24,15 @@ prefill_b: decode: VLLM_NIXL_SIDE_CHANNEL_PORT=$(just port 5559) \ - CUDA_VISIBLE_DEVICES=2,3,4,5 \ + CUDA_VISIBLE_DEVICES=1 \ vllm serve {{MODEL}} \ --port $(just port 8300) \ - --tensor-parallel-size 2 \ + --tensor-parallel-size 1 \ --enforce-eager \ --disable-log-requests \ --block-size 128 \ --kv-transfer-config '{"kv_connector":"NixlConnector","kv_role":"kv_both"}' -# proxy: -# python "{{vllm-directory}}tests/v1/kv_connector/nixl_integration/toy_proxy_server.py" \ -# --port $(just port 8192) \ -# --prefiller-port $(just port 8100) $(just port 8200) \ -# --prefiller-host localhost localhost \ -# --decoder-port $(just port 8300) - proxy: python "{{vllm-directory}}tests/v1/kv_connector/nixl_integration/toy_proxy_server.py" \ --port $(just port 8192) \