mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-29 18:47:16 +08:00
parent
87bf6812b2
commit
852ee4b132
@ -13,20 +13,10 @@ port PORT:
|
|||||||
|
|
||||||
prefill:
|
prefill:
|
||||||
VLLM_NIXL_SIDE_CHANNEL_PORT=$(just port 5557) \
|
VLLM_NIXL_SIDE_CHANNEL_PORT=$(just port 5557) \
|
||||||
CUDA_VISIBLE_DEVICES=0,1 \
|
CUDA_VISIBLE_DEVICES=0 \
|
||||||
vllm serve {{MODEL}} \
|
vllm serve {{MODEL}} \
|
||||||
--port $(just port 8100) \
|
--port $(just port 8100) \
|
||||||
--tensor-parallel-size 2 \
|
--tensor-parallel-size 1 \
|
||||||
--enforce-eager \
|
|
||||||
--disable-log-requests \
|
|
||||||
--block-size 128 \
|
|
||||||
--kv-transfer-config '{"kv_connector":"NixlConnector","kv_role":"kv_both"}'
|
|
||||||
|
|
||||||
prefill_b:
|
|
||||||
VLLM_NIXL_SIDE_CHANNEL_PORT=$(just port 5558) \
|
|
||||||
CUDA_VISIBLE_DEVICES=6 \
|
|
||||||
vllm serve {{MODEL}} \
|
|
||||||
--port $(just port 8200) \
|
|
||||||
--enforce-eager \
|
--enforce-eager \
|
||||||
--disable-log-requests \
|
--disable-log-requests \
|
||||||
--block-size 128 \
|
--block-size 128 \
|
||||||
@ -34,22 +24,15 @@ prefill_b:
|
|||||||
|
|
||||||
decode:
|
decode:
|
||||||
VLLM_NIXL_SIDE_CHANNEL_PORT=$(just port 5559) \
|
VLLM_NIXL_SIDE_CHANNEL_PORT=$(just port 5559) \
|
||||||
CUDA_VISIBLE_DEVICES=2,3,4,5 \
|
CUDA_VISIBLE_DEVICES=1 \
|
||||||
vllm serve {{MODEL}} \
|
vllm serve {{MODEL}} \
|
||||||
--port $(just port 8300) \
|
--port $(just port 8300) \
|
||||||
--tensor-parallel-size 2 \
|
--tensor-parallel-size 1 \
|
||||||
--enforce-eager \
|
--enforce-eager \
|
||||||
--disable-log-requests \
|
--disable-log-requests \
|
||||||
--block-size 128 \
|
--block-size 128 \
|
||||||
--kv-transfer-config '{"kv_connector":"NixlConnector","kv_role":"kv_both"}'
|
--kv-transfer-config '{"kv_connector":"NixlConnector","kv_role":"kv_both"}'
|
||||||
|
|
||||||
# proxy:
|
|
||||||
# python "{{vllm-directory}}tests/v1/kv_connector/nixl_integration/toy_proxy_server.py" \
|
|
||||||
# --port $(just port 8192) \
|
|
||||||
# --prefiller-port $(just port 8100) $(just port 8200) \
|
|
||||||
# --prefiller-host localhost localhost \
|
|
||||||
# --decoder-port $(just port 8300)
|
|
||||||
|
|
||||||
proxy:
|
proxy:
|
||||||
python "{{vllm-directory}}tests/v1/kv_connector/nixl_integration/toy_proxy_server.py" \
|
python "{{vllm-directory}}tests/v1/kv_connector/nixl_integration/toy_proxy_server.py" \
|
||||||
--port $(just port 8192) \
|
--port $(just port 8192) \
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user