From bba01338ca866192318b23afbce5bd2ece8f13cf Mon Sep 17 00:00:00 2001 From: inkcherry Date: Wed, 17 Dec 2025 08:36:11 +0000 Subject: [PATCH] remove merge Signed-off-by: inkcherry --- vllm/entrypoints/openai/serving_chat.py | 1 - vllm/entrypoints/openai/serving_completion.py | 1 - vllm/entrypoints/openai/serving_engine.py | 3 --- 3 files changed, 5 deletions(-) diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py index 37fc5e4a9a9d7..9a7051e0920af 100644 --- a/vllm/entrypoints/openai/serving_chat.py +++ b/vllm/entrypoints/openai/serving_chat.py @@ -335,7 +335,6 @@ class OpenAIServingChat(OpenAIServing): lora_request=lora_request, trace_headers=trace_headers, priority=request.priority, - data_parallel_rank=data_parallel_rank, ) generator = self.engine_client.generate( diff --git a/vllm/entrypoints/openai/serving_completion.py b/vllm/entrypoints/openai/serving_completion.py index 6cf000c3e79c1..9681aa8c71e6d 100644 --- a/vllm/entrypoints/openai/serving_completion.py +++ b/vllm/entrypoints/openai/serving_completion.py @@ -226,7 +226,6 @@ class OpenAIServingCompletion(OpenAIServing): lora_request=lora_request, trace_headers=trace_headers, priority=request.priority, - data_parallel_rank=data_parallel_rank, ) generator = self.engine_client.generate( diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py index 624b936814b69..d9feee917ff4e 100644 --- a/vllm/entrypoints/openai/serving_engine.py +++ b/vllm/entrypoints/openai/serving_engine.py @@ -1207,7 +1207,6 @@ class OpenAIServing: lora_request: LoRARequest | None, trace_headers: Mapping[str, str] | None, priority: int, - data_parallel_rank: int | None, ) -> tuple[EngineCoreRequest, dict[str, Any]]: """Use the Processor to process inputs for AsyncLLM.""" tokenization_kwargs: dict[str, Any] = {} @@ -1223,7 +1222,6 @@ class OpenAIServing: tokenization_kwargs=tokenization_kwargs, trace_headers=trace_headers, priority=priority, - data_parallel_rank=data_parallel_rank, ) return engine_request, tokenization_kwargs @@ -1258,7 +1256,6 @@ class OpenAIServing: lora_request=lora_request, trace_headers=trace_headers, priority=priority, - data_parallel_rank=None, ) generator = self.engine_client.generate(