mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-03 18:47:04 +08:00
fix dp router
Signed-off-by: inkcherry <mingzhi.liu@amd.com>
This commit is contained in:
parent
a7ea23d16d
commit
675943e018
@ -225,6 +225,7 @@ class OpenAIServingCompletion(OpenAIServing):
|
||||
lora_request=lora_request,
|
||||
trace_headers=trace_headers,
|
||||
priority=request.priority,
|
||||
data_parallel_rank=data_parallel_rank,
|
||||
)
|
||||
|
||||
generator = self.engine_client.generate(
|
||||
|
||||
@ -1172,6 +1172,7 @@ class OpenAIServing:
|
||||
lora_request: LoRARequest | None,
|
||||
trace_headers: Mapping[str, str] | None,
|
||||
priority: int,
|
||||
data_parallel_rank: int,
|
||||
) -> tuple[EngineCoreRequest, dict[str, Any]]:
|
||||
"""Use the Processor to process inputs for AsyncLLM."""
|
||||
tokenization_kwargs: dict[str, Any] = {}
|
||||
@ -1187,6 +1188,7 @@ class OpenAIServing:
|
||||
tokenization_kwargs=tokenization_kwargs,
|
||||
trace_headers=trace_headers,
|
||||
priority=priority,
|
||||
data_parallel_rank=data_parallel_rank,
|
||||
)
|
||||
return engine_request, tokenization_kwargs
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user