From 0808eb813b6b219324092ab8cc25d3223e5ccb77 Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Fri, 28 Nov 2025 23:07:23 +0800 Subject: [PATCH] [Misc] Remove `yapf` directives (#29675) Signed-off-by: DarkLight1337 --- .../ec_transfer/ec_connector/factory.py | 3 - vllm/entrypoints/openai/serving_tokens.py | 92 +++++++++++-------- 2 files changed, 52 insertions(+), 43 deletions(-) diff --git a/vllm/distributed/ec_transfer/ec_connector/factory.py b/vllm/distributed/ec_transfer/ec_connector/factory.py index bfdf51d775bd..e51b32e6f6df 100644 --- a/vllm/distributed/ec_transfer/ec_connector/factory.py +++ b/vllm/distributed/ec_transfer/ec_connector/factory.py @@ -5,15 +5,12 @@ import importlib from collections.abc import Callable from typing import TYPE_CHECKING -# yapf: disable from vllm.distributed.ec_transfer.ec_connector.base import ( ECConnectorBase, ECConnectorRole, ) from vllm.logger import init_logger -# yapf: enable - if TYPE_CHECKING: from vllm.config import ECTransferConfig, VllmConfig diff --git a/vllm/entrypoints/openai/serving_tokens.py b/vllm/entrypoints/openai/serving_tokens.py index 69a526b9b70d..daa739e41fa0 100644 --- a/vllm/entrypoints/openai/serving_tokens.py +++ b/vllm/entrypoints/openai/serving_tokens.py @@ -7,7 +7,6 @@ from collections.abc import Sequence as GenericSequence from fastapi import Request -# yapf: disable from vllm.engine.protocol import EngineClient from vllm.entrypoints.logger import RequestLogger from vllm.entrypoints.openai.protocol import ( @@ -49,22 +48,26 @@ class ServingTokens(OpenAIServing): enable_prompt_tokens_details: bool = False, enable_log_outputs: bool = False, ): - super().__init__(engine_client=engine_client, - models=models, - request_logger=request_logger, - return_tokens_as_token_ids=return_tokens_as_token_ids, - log_error_stack=log_error_stack) + super().__init__( + engine_client=engine_client, + models=models, + request_logger=request_logger, + return_tokens_as_token_ids=return_tokens_as_token_ids, + log_error_stack=log_error_stack, + ) self.enable_prompt_tokens_details = enable_prompt_tokens_details self.enable_log_outputs = enable_log_outputs self.force_no_detokenize = force_no_detokenize if force_no_detokenize: - logger.info("Tokens-only mode is enabled, skipping detokenization " - "step for incoming requests.") + logger.info( + "Tokens-only mode is enabled, skipping detokenization " + "step for incoming requests." + ) async def serve_tokens( self, request: GenerateRequest, - raw_request: Request | None = None + raw_request: Request | None = None, ) -> GenerateResponse | ErrorResponse: error_check_ret = await self._check_model(request) if error_check_ret is not None: @@ -78,13 +81,13 @@ class ServingTokens(OpenAIServing): raise self.engine_client.dead_error lora_request = None - lora_request = self._maybe_get_adapters(request, - supports_default_mm_loras=True) + lora_request = self._maybe_get_adapters(request, supports_default_mm_loras=True) model_name = self.models.model_name(lora_request) - request_id = "generate-tokens-" \ - f"{self._base_request_id(raw_request, request.request_id)}" + request_id = ( + f"generate-tokens-{self._base_request_id(raw_request, request.request_id)}" + ) request_metadata = RequestResponseMetadata(request_id=request_id) if raw_request: @@ -106,13 +109,18 @@ class ServingTokens(OpenAIServing): if self.force_no_detokenize: sampling_params.detokenize = False - self._log_inputs(request_id, - request.token_ids, - params=sampling_params, - lora_request=lora_request) + self._log_inputs( + request_id, + request.token_ids, + params=sampling_params, + lora_request=lora_request, + ) - trace_headers = (None if raw_request is None else await - self._get_trace_headers(raw_request.headers)) + trace_headers = ( + None + if raw_request is None + else await self._get_trace_headers(raw_request.headers) + ) result_generator = self.engine_client.generate( engine_prompt, @@ -131,8 +139,8 @@ class ServingTokens(OpenAIServing): try: assert result_generator is not None return await self.serve_tokens_full_generator( - request, result_generator, request_id, model_name, - request_metadata) + request, result_generator, request_id, model_name, request_metadata + ) except ValueError as e: return self.create_error_response(str(e)) @@ -144,7 +152,6 @@ class ServingTokens(OpenAIServing): model_name: str, request_metadata: RequestResponseMetadata, ) -> ErrorResponse | GenerateResponse: - created_time = int(time.time()) final_res: RequestOutput | None = None sampling_params: SamplingParams = request.sampling_params @@ -179,9 +186,9 @@ class ServingTokens(OpenAIServing): choice_data = GenerateResponseChoice( index=output.index, logprobs=logprobs, - finish_reason=output.finish_reason - if output.finish_reason else "stop", - token_ids=as_list(output.token_ids)) + finish_reason=output.finish_reason if output.finish_reason else "stop", + token_ids=as_list(output.token_ids), + ) choices.append(choice_data) num_generated_tokens += len(output.token_ids) @@ -191,14 +198,16 @@ class ServingTokens(OpenAIServing): if final_res.encoder_prompt_token_ids is not None: num_prompt_tokens += len(final_res.encoder_prompt_token_ids) - usage = UsageInfo(prompt_tokens=num_prompt_tokens, - completion_tokens=num_generated_tokens, - total_tokens=num_prompt_tokens + - num_generated_tokens) + usage = UsageInfo( + prompt_tokens=num_prompt_tokens, + completion_tokens=num_generated_tokens, + total_tokens=num_prompt_tokens + num_generated_tokens, + ) if self.enable_prompt_tokens_details and final_res.num_cached_tokens: # This info is not available at the /coordinator level usage.prompt_tokens_details = PromptTokenUsageInfo( - cached_tokens=final_res.num_cached_tokens) + cached_tokens=final_res.num_cached_tokens + ) request_metadata.final_usage_info = usage @@ -218,14 +227,13 @@ class ServingTokens(OpenAIServing): # Get the corresponding output token IDs output_token_ids = None if choice.index < len(final_res.outputs): - output_token_ids = final_res.outputs[ - choice.index].token_ids + output_token_ids = final_res.outputs[choice.index].token_ids if output_token_ids: # Log token_ids only. self.request_logger.log_outputs( request_id=request_id, - outputs="", + outputs="", output_token_ids=output_token_ids, finish_reason=choice.finish_reason, is_streaming=False, @@ -246,10 +254,12 @@ class ServingTokens(OpenAIServing): for i, token_id in enumerate(token_ids): token = f"token_id:{token_id}" step_top_logprobs = top_logprobs[i] - if step_top_logprobs is None or step_top_logprobs.get( - token_id) is None: + if step_top_logprobs is None or step_top_logprobs.get(token_id) is None: logprobs_content.append( - ChatCompletionLogProbsContent(token=token, )) + ChatCompletionLogProbsContent( + token=token, + ) + ) else: step_token = step_top_logprobs[token_id] @@ -261,9 +271,11 @@ class ServingTokens(OpenAIServing): ChatCompletionLogProb( token=token, logprob=max(p[1].logprob, -9999.0), - ) for i, p in enumerate(step_top_logprobs.items()) - if num_output_top_logprobs - and i < num_output_top_logprobs - ])) + ) + for i, p in enumerate(step_top_logprobs.items()) + if num_output_top_logprobs and i < num_output_top_logprobs + ], + ) + ) return ChatCompletionLogProbs(content=logprobs_content)