Add collective_rpc to llm engine (#16999)

Signed-off-by: Yinghai Lu <yinghai@thinkingmachines.ai>
This commit is contained in:
Yinghai Lu 2025-04-24 13:16:52 -07:00 committed by GitHub
parent 6d0df0ebeb
commit fe92176321
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 29 additions and 0 deletions

View File

@ -528,6 +528,13 @@ class _AsyncLLMEngine(LLMEngine):
async def check_health_async(self) -> None:
self.model_executor.check_health()
async def collective_rpc_async(self,
method: str,
timeout: Optional[float] = None,
args: tuple = (),
kwargs: Optional[dict] = None):
raise NotImplementedError
async def build_guided_decoding_logits_processor_async(
sampling_params: SamplingParams, tokenizer: AnyTokenizer,
@ -1236,6 +1243,17 @@ class AsyncLLMEngine(EngineClient):
async def add_lora(self, lora_request: LoRARequest) -> None:
self.engine.add_lora(lora_request)
async def collective_rpc(self,
method: str,
timeout: Optional[float] = None,
args: tuple = (),
kwargs: Optional[dict] = None):
"""
Perform a collective RPC call to the given path.
"""
return await self.engine.collective_rpc_async(method, timeout, args,
kwargs)
# TODO(v1): Remove this class proxy when V1 goes default.
if envs.is_set("VLLM_USE_V1") and envs.VLLM_USE_V1:

View File

@ -492,6 +492,17 @@ class AsyncLLM(EngineClient):
"""Prevent an adapter from being evicted."""
return await self.engine_core.pin_lora_async(lora_id)
async def collective_rpc(self,
method: str,
timeout: Optional[float] = None,
args: tuple = (),
kwargs: Optional[dict] = None):
"""
Perform a collective RPC call to the given path.
"""
return await self.engine_core.collective_rpc_async(
method, timeout, args, kwargs)
@property
def is_running(self) -> bool:
# Is None before the loop is started.