mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-27 23:11:50 +08:00
[Chore] Remove unused noqas (#31263)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
parent
7adeb4bfa8
commit
aa3868ecfe
@ -410,7 +410,7 @@ class HfRunner:
|
||||
|
||||
# don't put this import at the top level
|
||||
# it will call torch.cuda.device_count()
|
||||
from transformers import AutoProcessor # noqa: F401
|
||||
from transformers import AutoProcessor
|
||||
|
||||
self.processor = AutoProcessor.from_pretrained(
|
||||
model_name,
|
||||
|
||||
@ -15,7 +15,7 @@ MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def server(): # noqa: F811
|
||||
def server():
|
||||
args = [
|
||||
# use half precision for speed and memory savings in CI environment
|
||||
"--dtype",
|
||||
|
||||
@ -28,7 +28,7 @@ def zephyr_lora_files():
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def server(zephyr_lora_files): # noqa: F811
|
||||
def server(zephyr_lora_files):
|
||||
args = [
|
||||
# use half precision for speed and memory savings in CI environment
|
||||
"--dtype",
|
||||
|
||||
@ -12,7 +12,7 @@ MODEL_NAME = "Qwen/QwQ-32B"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def server(): # noqa: F811
|
||||
def server():
|
||||
args = [
|
||||
"--max-model-len",
|
||||
"8192",
|
||||
|
||||
@ -125,7 +125,7 @@ messages = [
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def server(): # noqa: F811
|
||||
def server():
|
||||
args = [
|
||||
# use half precision for speed and memory savings in CI environment
|
||||
"--dtype",
|
||||
@ -212,7 +212,7 @@ async def test_function_tool_use(
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def k2_server(): # noqa: F811
|
||||
def k2_server():
|
||||
args = [
|
||||
# use half precision for speed and memory savings in CI environment
|
||||
"--dtype",
|
||||
|
||||
@ -23,7 +23,7 @@ ACTIVE_MM_LORA_RESPONSE = "Spoken text: The first words I spoke in the original
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def multimodal_server(): # noqa: F811
|
||||
def multimodal_server():
|
||||
args = [
|
||||
# use half precision for speed and memory savings in CI environment
|
||||
"--dtype",
|
||||
|
||||
@ -8,7 +8,7 @@ from ...utils import RemoteOpenAIServer
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def chat_server_with_force_include_usage(request): # noqa: F811
|
||||
def chat_server_with_force_include_usage(request):
|
||||
args = [
|
||||
# use half precision for speed and memory savings in CI environment
|
||||
"--dtype",
|
||||
|
||||
@ -11,7 +11,7 @@ MODEL_NAME = "Qwen/Qwen3-0.6B"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def server(): # noqa: F811
|
||||
def server():
|
||||
args = [
|
||||
"--max-model-len",
|
||||
"2048",
|
||||
|
||||
@ -37,7 +37,7 @@ def default_server_args(qwen3_lora_files):
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def server_fixture(request, default_server_args): # noqa: F811
|
||||
def server_fixture(request, default_server_args):
|
||||
use_server_flag = request.param
|
||||
if use_server_flag:
|
||||
args_with_flag = default_server_args + ["--return-tokens-as-token-ids"]
|
||||
|
||||
@ -267,7 +267,7 @@ def run_embedding_input_test(
|
||||
"""Inference result should be the same between
|
||||
original image/video input and image/video embeddings input.
|
||||
"""
|
||||
from transformers import AutoProcessor # noqa: F401
|
||||
from transformers import AutoProcessor
|
||||
|
||||
processor = AutoProcessor.from_pretrained(model)
|
||||
|
||||
|
||||
@ -145,7 +145,7 @@ def test_shared_storage_connector_hashes(tmp_path):
|
||||
|
||||
# don't put this import at the top level
|
||||
# it will call torch.cuda.device_count()
|
||||
from transformers import AutoProcessor # noqa: F401
|
||||
from transformers import AutoProcessor
|
||||
|
||||
# Create processor to handle the chat prompt
|
||||
processor = AutoProcessor.from_pretrained(MODEL_NAME)
|
||||
|
||||
@ -81,10 +81,7 @@ class ECExampleConnector(ECConnectorBase):
|
||||
assert encoder_cache is not None
|
||||
if metadata is None:
|
||||
logger.warning(
|
||||
(
|
||||
"In connector.start_load_caches, ",
|
||||
"but the connector metadata is None",
|
||||
)
|
||||
"In connector.start_load_caches, but the connector metadata is None"
|
||||
)
|
||||
return
|
||||
# Load the EC for each mm data
|
||||
|
||||
@ -43,7 +43,7 @@ async def scale_elastic_ep(raw_request: Request):
|
||||
try:
|
||||
body = await raw_request.json()
|
||||
except json.JSONDecodeError as e:
|
||||
raise HTTPException(status_code=400, detail="Invalid JSON format") from e # noqa: B904
|
||||
raise HTTPException(status_code=400, detail="Invalid JSON format") from e
|
||||
|
||||
new_data_parallel_size = body.get("new_data_parallel_size")
|
||||
drain_timeout = body.get("drain_timeout", 120) # Default 2 minutes
|
||||
|
||||
@ -1,6 +1,5 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
# ruff: noqa: SIM117
|
||||
import fnmatch
|
||||
import glob
|
||||
import itertools
|
||||
@ -59,7 +58,7 @@ def is_moe_model(model: torch.nn.Module) -> bool:
|
||||
|
||||
|
||||
class BitsAndBytesModelLoader(BaseModelLoader):
|
||||
"""Model loader to load model weights with BitAndBytes quantization."""
|
||||
"""Model loader to load model weights with BitsAndBytes quantization."""
|
||||
|
||||
possible_config_file_names = ["adapter_config.json"]
|
||||
|
||||
|
||||
@ -1,6 +1,5 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
# ruff: noqa: SIM117
|
||||
import os
|
||||
from collections.abc import Generator
|
||||
|
||||
|
||||
@ -6,9 +6,7 @@ Define EC connector functionality mixin for model runners.
|
||||
|
||||
from collections.abc import Generator
|
||||
from contextlib import AbstractContextManager, contextmanager, nullcontext
|
||||
from typing import (
|
||||
TYPE_CHECKING, # noqa: UP035
|
||||
)
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import torch
|
||||
|
||||
|
||||
@ -7,9 +7,7 @@ Define KV connector functionality mixin for model runners.
|
||||
import copy
|
||||
from collections.abc import Generator
|
||||
from contextlib import AbstractContextManager, contextmanager, nullcontext
|
||||
from typing import (
|
||||
TYPE_CHECKING, # noqa: UP035
|
||||
)
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import torch
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user