[Chore] Remove unused noqas (#31263)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2025-12-27 23:11:50 +08:00 · 2025-12-24 21:38:46 +08:00 · 2025-12-24 21:38:46 +08:00 · aa3868ecfe
commit aa3868ecfe
parent 7adeb4bfa8
17 changed files with 17 additions and 26 deletions
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -410,7 +410,7 @@ class HfRunner:

        # don't put this import at the top level
        # it will call torch.cuda.device_count()
-        from transformers import AutoProcessor  # noqa: F401
+        from transformers import AutoProcessor

        self.processor = AutoProcessor.from_pretrained(
            model_name,
--- a/tests/entrypoints/openai/test_async_tokenization.py
+++ b/tests/entrypoints/openai/test_async_tokenization.py
@ -15,7 +15,7 @@ MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct"


@pytest.fixture(scope="module")
-def server():  # noqa: F811
+def server():
    args = [
        # use half precision for speed and memory savings in CI environment
        "--dtype",
--- a/tests/entrypoints/openai/test_chat.py
+++ b/tests/entrypoints/openai/test_chat.py
@ -28,7 +28,7 @@ def zephyr_lora_files():


@pytest.fixture(scope="module")
-def server(zephyr_lora_files):  # noqa: F811
+def server(zephyr_lora_files):
    args = [
        # use half precision for speed and memory savings in CI environment
        "--dtype",
--- a/tests/entrypoints/openai/test_chat_with_tool_reasoning.py
+++ b/tests/entrypoints/openai/test_chat_with_tool_reasoning.py
@ -12,7 +12,7 @@ MODEL_NAME = "Qwen/QwQ-32B"


@pytest.fixture(scope="module")
-def server():  # noqa: F811
+def server():
    args = [
        "--max-model-len",
        "8192",
--- a/tests/entrypoints/openai/test_completion_with_function_calling.py
+++ b/tests/entrypoints/openai/test_completion_with_function_calling.py
@ -125,7 +125,7 @@ messages = [


@pytest.fixture(scope="module")
-def server():  # noqa: F811
+def server():
    args = [
        # use half precision for speed and memory savings in CI environment
        "--dtype",
@ -212,7 +212,7 @@ async def test_function_tool_use(


@pytest.fixture(scope="module")
-def k2_server():  # noqa: F811
+def k2_server():
    args = [
        # use half precision for speed and memory savings in CI environment
        "--dtype",
--- a/tests/entrypoints/openai/test_default_mm_loras.py
+++ b/tests/entrypoints/openai/test_default_mm_loras.py
@ -23,7 +23,7 @@ ACTIVE_MM_LORA_RESPONSE = "Spoken text: The first words I spoke in the original


@pytest.fixture(scope="module")
-def multimodal_server():  # noqa: F811
+def multimodal_server():
    args = [
        # use half precision for speed and memory savings in CI environment
        "--dtype",
--- a/tests/entrypoints/openai/test_enable_force_include_usage.py
+++ b/tests/entrypoints/openai/test_enable_force_include_usage.py
@ -8,7 +8,7 @@ from ...utils import RemoteOpenAIServer


@pytest.fixture(scope="module")
-def chat_server_with_force_include_usage(request):  # noqa: F811
+def chat_server_with_force_include_usage(request):
    args = [
        # use half precision for speed and memory savings in CI environment
        "--dtype",
--- a/tests/entrypoints/openai/test_messages.py
+++ b/tests/entrypoints/openai/test_messages.py
@ -11,7 +11,7 @@ MODEL_NAME = "Qwen/Qwen3-0.6B"


@pytest.fixture(scope="module")
-def server():  # noqa: F811
+def server():
    args = [
        "--max-model-len",
        "2048",
--- a/tests/entrypoints/openai/test_return_tokens_as_ids.py
+++ b/tests/entrypoints/openai/test_return_tokens_as_ids.py
@ -37,7 +37,7 @@ def default_server_args(qwen3_lora_files):


@pytest.fixture(scope="module")
-def server_fixture(request, default_server_args):  # noqa: F811
+def server_fixture(request, default_server_args):
    use_server_flag = request.param
    if use_server_flag:
        args_with_flag = default_server_args + ["--return-tokens-as-token-ids"]
--- a/tests/models/multimodal/generation/test_qwen2_vl.py
+++ b/tests/models/multimodal/generation/test_qwen2_vl.py
@ -267,7 +267,7 @@ def run_embedding_input_test(
    """Inference result should be the same between
    original image/video input and image/video embeddings input.
    """
-    from transformers import AutoProcessor  # noqa: F401
+    from transformers import AutoProcessor

    processor = AutoProcessor.from_pretrained(model)

--- a/tests/v1/kv_connector/unit/test_example_connector.py
+++ b/tests/v1/kv_connector/unit/test_example_connector.py
@ -145,7 +145,7 @@ def test_shared_storage_connector_hashes(tmp_path):

    # don't put this import at the top level
    # it will call torch.cuda.device_count()
-    from transformers import AutoProcessor  # noqa: F401
+    from transformers import AutoProcessor

    # Create processor to handle the chat prompt
    processor = AutoProcessor.from_pretrained(MODEL_NAME)
--- a/vllm/distributed/ec_transfer/ec_connector/example_connector.py
+++ b/vllm/distributed/ec_transfer/ec_connector/example_connector.py
@ -81,10 +81,7 @@ class ECExampleConnector(ECConnectorBase):
        assert encoder_cache is not None
        if metadata is None:
            logger.warning(
-                (
-                    "In connector.start_load_caches, ",
-                    "but the connector metadata is None",
-                )
+                "In connector.start_load_caches, but the connector metadata is None"
            )
            return
        # Load the EC for each mm data
--- a/vllm/entrypoints/serve/elastic_ep/api_router.py
+++ b/vllm/entrypoints/serve/elastic_ep/api_router.py
@ -43,7 +43,7 @@ async def scale_elastic_ep(raw_request: Request):
    try:
        body = await raw_request.json()
    except json.JSONDecodeError as e:
-        raise HTTPException(status_code=400, detail="Invalid JSON format") from e  # noqa: B904
+        raise HTTPException(status_code=400, detail="Invalid JSON format") from e

    new_data_parallel_size = body.get("new_data_parallel_size")
    drain_timeout = body.get("drain_timeout", 120)  # Default 2 minutes
--- a/vllm/model_executor/model_loader/bitsandbytes_loader.py
+++ b/vllm/model_executor/model_loader/bitsandbytes_loader.py
@ -1,6 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-# ruff: noqa: SIM117
 import fnmatch
 import glob
 import itertools
@ -59,7 +58,7 @@ def is_moe_model(model: torch.nn.Module) -> bool:


 class BitsAndBytesModelLoader(BaseModelLoader):
-    """Model loader to load model weights with BitAndBytes quantization."""
+    """Model loader to load model weights with BitsAndBytes quantization."""

    possible_config_file_names = ["adapter_config.json"]

--- a/vllm/model_executor/model_loader/runai_streamer_loader.py
+++ b/vllm/model_executor/model_loader/runai_streamer_loader.py
@ -1,6 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-# ruff: noqa: SIM117
 import os
 from collections.abc import Generator

--- a/vllm/v1/worker/ec_connector_model_runner_mixin.py
+++ b/vllm/v1/worker/ec_connector_model_runner_mixin.py
@ -6,9 +6,7 @@ Define EC connector functionality mixin for model runners.

 from collections.abc import Generator
 from contextlib import AbstractContextManager, contextmanager, nullcontext
-from typing import (
-    TYPE_CHECKING,  # noqa: UP035
-)
+from typing import TYPE_CHECKING

 import torch

--- a/vllm/v1/worker/kv_connector_model_runner_mixin.py
+++ b/vllm/v1/worker/kv_connector_model_runner_mixin.py
@ -7,9 +7,7 @@ Define KV connector functionality mixin for model runners.
 import copy
 from collections.abc import Generator
 from contextlib import AbstractContextManager, contextmanager, nullcontext
-from typing import (
-    TYPE_CHECKING,  # noqa: UP035
-)
+from typing import TYPE_CHECKING

 import torch