diff --git a/tests/conftest.py b/tests/conftest.py index a03f40a9a72ac..30e25294925ca 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -410,7 +410,7 @@ class HfRunner: # don't put this import at the top level # it will call torch.cuda.device_count() - from transformers import AutoProcessor # noqa: F401 + from transformers import AutoProcessor self.processor = AutoProcessor.from_pretrained( model_name, diff --git a/tests/entrypoints/openai/test_async_tokenization.py b/tests/entrypoints/openai/test_async_tokenization.py index 682420a83a442..1d3d110d30271 100644 --- a/tests/entrypoints/openai/test_async_tokenization.py +++ b/tests/entrypoints/openai/test_async_tokenization.py @@ -15,7 +15,7 @@ MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct" @pytest.fixture(scope="module") -def server(): # noqa: F811 +def server(): args = [ # use half precision for speed and memory savings in CI environment "--dtype", diff --git a/tests/entrypoints/openai/test_chat.py b/tests/entrypoints/openai/test_chat.py index ab78a79774564..ae94c149017e7 100644 --- a/tests/entrypoints/openai/test_chat.py +++ b/tests/entrypoints/openai/test_chat.py @@ -28,7 +28,7 @@ def zephyr_lora_files(): @pytest.fixture(scope="module") -def server(zephyr_lora_files): # noqa: F811 +def server(zephyr_lora_files): args = [ # use half precision for speed and memory savings in CI environment "--dtype", diff --git a/tests/entrypoints/openai/test_chat_with_tool_reasoning.py b/tests/entrypoints/openai/test_chat_with_tool_reasoning.py index 7b3092b563030..445fa389d0007 100644 --- a/tests/entrypoints/openai/test_chat_with_tool_reasoning.py +++ b/tests/entrypoints/openai/test_chat_with_tool_reasoning.py @@ -12,7 +12,7 @@ MODEL_NAME = "Qwen/QwQ-32B" @pytest.fixture(scope="module") -def server(): # noqa: F811 +def server(): args = [ "--max-model-len", "8192", diff --git a/tests/entrypoints/openai/test_completion_with_function_calling.py b/tests/entrypoints/openai/test_completion_with_function_calling.py index 53369f074eca8..c6a5841ec3bfb 100644 --- a/tests/entrypoints/openai/test_completion_with_function_calling.py +++ b/tests/entrypoints/openai/test_completion_with_function_calling.py @@ -125,7 +125,7 @@ messages = [ @pytest.fixture(scope="module") -def server(): # noqa: F811 +def server(): args = [ # use half precision for speed and memory savings in CI environment "--dtype", @@ -212,7 +212,7 @@ async def test_function_tool_use( @pytest.fixture(scope="module") -def k2_server(): # noqa: F811 +def k2_server(): args = [ # use half precision for speed and memory savings in CI environment "--dtype", diff --git a/tests/entrypoints/openai/test_default_mm_loras.py b/tests/entrypoints/openai/test_default_mm_loras.py index 818ee2644b547..dd8f9d67d6903 100644 --- a/tests/entrypoints/openai/test_default_mm_loras.py +++ b/tests/entrypoints/openai/test_default_mm_loras.py @@ -23,7 +23,7 @@ ACTIVE_MM_LORA_RESPONSE = "Spoken text: The first words I spoke in the original @pytest.fixture(scope="module") -def multimodal_server(): # noqa: F811 +def multimodal_server(): args = [ # use half precision for speed and memory savings in CI environment "--dtype", diff --git a/tests/entrypoints/openai/test_enable_force_include_usage.py b/tests/entrypoints/openai/test_enable_force_include_usage.py index 9d527c45c1fae..8e7e34ee2b71b 100644 --- a/tests/entrypoints/openai/test_enable_force_include_usage.py +++ b/tests/entrypoints/openai/test_enable_force_include_usage.py @@ -8,7 +8,7 @@ from ...utils import RemoteOpenAIServer @pytest.fixture(scope="module") -def chat_server_with_force_include_usage(request): # noqa: F811 +def chat_server_with_force_include_usage(request): args = [ # use half precision for speed and memory savings in CI environment "--dtype", diff --git a/tests/entrypoints/openai/test_messages.py b/tests/entrypoints/openai/test_messages.py index 8de6c4cb6c887..ce8c3ff4a71a5 100644 --- a/tests/entrypoints/openai/test_messages.py +++ b/tests/entrypoints/openai/test_messages.py @@ -11,7 +11,7 @@ MODEL_NAME = "Qwen/Qwen3-0.6B" @pytest.fixture(scope="module") -def server(): # noqa: F811 +def server(): args = [ "--max-model-len", "2048", diff --git a/tests/entrypoints/openai/test_return_tokens_as_ids.py b/tests/entrypoints/openai/test_return_tokens_as_ids.py index d4d9a6c5b6120..05a36febad0cc 100644 --- a/tests/entrypoints/openai/test_return_tokens_as_ids.py +++ b/tests/entrypoints/openai/test_return_tokens_as_ids.py @@ -37,7 +37,7 @@ def default_server_args(qwen3_lora_files): @pytest.fixture(scope="module") -def server_fixture(request, default_server_args): # noqa: F811 +def server_fixture(request, default_server_args): use_server_flag = request.param if use_server_flag: args_with_flag = default_server_args + ["--return-tokens-as-token-ids"] diff --git a/tests/models/multimodal/generation/test_qwen2_vl.py b/tests/models/multimodal/generation/test_qwen2_vl.py index e1b7dbf99f1fd..d46dd640229d0 100644 --- a/tests/models/multimodal/generation/test_qwen2_vl.py +++ b/tests/models/multimodal/generation/test_qwen2_vl.py @@ -267,7 +267,7 @@ def run_embedding_input_test( """Inference result should be the same between original image/video input and image/video embeddings input. """ - from transformers import AutoProcessor # noqa: F401 + from transformers import AutoProcessor processor = AutoProcessor.from_pretrained(model) diff --git a/tests/v1/kv_connector/unit/test_example_connector.py b/tests/v1/kv_connector/unit/test_example_connector.py index 8312231716935..d415608c95faa 100644 --- a/tests/v1/kv_connector/unit/test_example_connector.py +++ b/tests/v1/kv_connector/unit/test_example_connector.py @@ -145,7 +145,7 @@ def test_shared_storage_connector_hashes(tmp_path): # don't put this import at the top level # it will call torch.cuda.device_count() - from transformers import AutoProcessor # noqa: F401 + from transformers import AutoProcessor # Create processor to handle the chat prompt processor = AutoProcessor.from_pretrained(MODEL_NAME) diff --git a/vllm/distributed/ec_transfer/ec_connector/example_connector.py b/vllm/distributed/ec_transfer/ec_connector/example_connector.py index 3518044ce2e00..48a7d41908fd4 100644 --- a/vllm/distributed/ec_transfer/ec_connector/example_connector.py +++ b/vllm/distributed/ec_transfer/ec_connector/example_connector.py @@ -81,10 +81,7 @@ class ECExampleConnector(ECConnectorBase): assert encoder_cache is not None if metadata is None: logger.warning( - ( - "In connector.start_load_caches, ", - "but the connector metadata is None", - ) + "In connector.start_load_caches, but the connector metadata is None" ) return # Load the EC for each mm data diff --git a/vllm/entrypoints/serve/elastic_ep/api_router.py b/vllm/entrypoints/serve/elastic_ep/api_router.py index 21d5d2e60778a..e5adb81051ffd 100644 --- a/vllm/entrypoints/serve/elastic_ep/api_router.py +++ b/vllm/entrypoints/serve/elastic_ep/api_router.py @@ -43,7 +43,7 @@ async def scale_elastic_ep(raw_request: Request): try: body = await raw_request.json() except json.JSONDecodeError as e: - raise HTTPException(status_code=400, detail="Invalid JSON format") from e # noqa: B904 + raise HTTPException(status_code=400, detail="Invalid JSON format") from e new_data_parallel_size = body.get("new_data_parallel_size") drain_timeout = body.get("drain_timeout", 120) # Default 2 minutes diff --git a/vllm/model_executor/model_loader/bitsandbytes_loader.py b/vllm/model_executor/model_loader/bitsandbytes_loader.py index 97c7a20bc4d5a..aa020645021ea 100644 --- a/vllm/model_executor/model_loader/bitsandbytes_loader.py +++ b/vllm/model_executor/model_loader/bitsandbytes_loader.py @@ -1,6 +1,5 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -# ruff: noqa: SIM117 import fnmatch import glob import itertools @@ -59,7 +58,7 @@ def is_moe_model(model: torch.nn.Module) -> bool: class BitsAndBytesModelLoader(BaseModelLoader): - """Model loader to load model weights with BitAndBytes quantization.""" + """Model loader to load model weights with BitsAndBytes quantization.""" possible_config_file_names = ["adapter_config.json"] diff --git a/vllm/model_executor/model_loader/runai_streamer_loader.py b/vllm/model_executor/model_loader/runai_streamer_loader.py index 93da07c550195..fb33d3c6448bd 100644 --- a/vllm/model_executor/model_loader/runai_streamer_loader.py +++ b/vllm/model_executor/model_loader/runai_streamer_loader.py @@ -1,6 +1,5 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -# ruff: noqa: SIM117 import os from collections.abc import Generator diff --git a/vllm/v1/worker/ec_connector_model_runner_mixin.py b/vllm/v1/worker/ec_connector_model_runner_mixin.py index 08a41532ea8e1..1a347a0b98ab2 100644 --- a/vllm/v1/worker/ec_connector_model_runner_mixin.py +++ b/vllm/v1/worker/ec_connector_model_runner_mixin.py @@ -6,9 +6,7 @@ Define EC connector functionality mixin for model runners. from collections.abc import Generator from contextlib import AbstractContextManager, contextmanager, nullcontext -from typing import ( - TYPE_CHECKING, # noqa: UP035 -) +from typing import TYPE_CHECKING import torch diff --git a/vllm/v1/worker/kv_connector_model_runner_mixin.py b/vllm/v1/worker/kv_connector_model_runner_mixin.py index 2bcc87b63bcdf..7bb4ebe476ecf 100644 --- a/vllm/v1/worker/kv_connector_model_runner_mixin.py +++ b/vllm/v1/worker/kv_connector_model_runner_mixin.py @@ -7,9 +7,7 @@ Define KV connector functionality mixin for model runners. import copy from collections.abc import Generator from contextlib import AbstractContextManager, contextmanager, nullcontext -from typing import ( - TYPE_CHECKING, # noqa: UP035 -) +from typing import TYPE_CHECKING import torch