mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-30 15:07:12 +08:00
updated to make send_notif work
Signed-off-by: rshaw@neuralmagic.com <robertgshaw2@gmail.com>
This commit is contained in:
parent
8283d7b85c
commit
b9be6fd35a
@ -12,7 +12,7 @@ port PORT:
|
|||||||
|
|
||||||
|
|
||||||
prefill:
|
prefill:
|
||||||
VLLM_NIXL_SIDE_CHANNEL_PORT=$(just port 5557) \
|
VLLM_NIXL_SIDE_CHANNEL_PORT=5557 \
|
||||||
CUDA_VISIBLE_DEVICES=0 \
|
CUDA_VISIBLE_DEVICES=0 \
|
||||||
vllm serve {{MODEL}} \
|
vllm serve {{MODEL}} \
|
||||||
--port $(just port 8100) \
|
--port $(just port 8100) \
|
||||||
|
|||||||
@ -330,11 +330,16 @@ class NixlConnectorWorker:
|
|||||||
self.block_size = vllm_config.cache_config.block_size
|
self.block_size = vllm_config.cache_config.block_size
|
||||||
|
|
||||||
# Agent.
|
# Agent.
|
||||||
|
import os
|
||||||
|
num_workers = 16
|
||||||
|
# setting num workers on the prefiller causes the notifs to not be recved???
|
||||||
|
if os.getenv("VLLM_NIXL_SIDE_CHANNEL_PORT", "") == "5557":
|
||||||
|
num_workers = None
|
||||||
|
print(f"NUM_WORKERS: {num_workers=}")
|
||||||
self.nixl_wrapper = NixlWrapper(str(uuid.uuid4()),
|
self.nixl_wrapper = NixlWrapper(str(uuid.uuid4()),
|
||||||
None,
|
None,
|
||||||
num_workers=None,
|
num_workers=num_workers,
|
||||||
num_shared_workers=16) # setting this > 0 causes the notifs to be recved
|
num_shared_workers=None)
|
||||||
# num_shared_workers=None)
|
|
||||||
# Map of engine_id -> {rank0: agent_name0, rank1: agent_name1..}.
|
# Map of engine_id -> {rank0: agent_name0, rank1: agent_name1..}.
|
||||||
self._remote_agents: dict[str, dict[int, str]] = defaultdict(dict)
|
self._remote_agents: dict[str, dict[int, str]] = defaultdict(dict)
|
||||||
|
|
||||||
@ -820,6 +825,7 @@ class NixlConnectorWorker:
|
|||||||
"""
|
"""
|
||||||
notified_req_ids: set[str] = set()
|
notified_req_ids: set[str] = set()
|
||||||
for notifs in self.nixl_wrapper.get_new_notifs().values():
|
for notifs in self.nixl_wrapper.get_new_notifs().values():
|
||||||
|
# WE GET NOTHING FROM HERE IF NUM_WORKERS > 0.
|
||||||
print(f"{notifs=}")
|
print(f"{notifs=}")
|
||||||
for notif in notifs:
|
for notif in notifs:
|
||||||
req_id, tp_ratio = notif.decode("utf-8").rsplit(":", 1)
|
req_id, tp_ratio = notif.decode("utf-8").rsplit(":", 1)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user