From 675943e018f12cbe5c37d0e00eb754f5ab08a3a0 Mon Sep 17 00:00:00 2001 From: inkcherry Date: Wed, 19 Nov 2025 08:35:28 +0000 Subject: [PATCH] fix dp router Signed-off-by: inkcherry --- vllm/entrypoints/openai/serving_completion.py | 1 + vllm/entrypoints/openai/serving_engine.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/vllm/entrypoints/openai/serving_completion.py b/vllm/entrypoints/openai/serving_completion.py index a114b77ebc16b..4d47912d25315 100644 --- a/vllm/entrypoints/openai/serving_completion.py +++ b/vllm/entrypoints/openai/serving_completion.py @@ -225,6 +225,7 @@ class OpenAIServingCompletion(OpenAIServing): lora_request=lora_request, trace_headers=trace_headers, priority=request.priority, + data_parallel_rank=data_parallel_rank, ) generator = self.engine_client.generate( diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py index c50b0c4a23e17..4d9903c9c5745 100644 --- a/vllm/entrypoints/openai/serving_engine.py +++ b/vllm/entrypoints/openai/serving_engine.py @@ -1172,6 +1172,7 @@ class OpenAIServing: lora_request: LoRARequest | None, trace_headers: Mapping[str, str] | None, priority: int, + data_parallel_rank: int, ) -> tuple[EngineCoreRequest, dict[str, Any]]: """Use the Processor to process inputs for AsyncLLM.""" tokenization_kwargs: dict[str, Any] = {} @@ -1187,6 +1188,7 @@ class OpenAIServing: tokenization_kwargs=tokenization_kwargs, trace_headers=trace_headers, priority=priority, + data_parallel_rank=data_parallel_rank, ) return engine_request, tokenization_kwargs