mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-03 02:31:20 +08:00
[Chore] Remove unused noqas (#31263)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
parent
7adeb4bfa8
commit
aa3868ecfe
@ -410,7 +410,7 @@ class HfRunner:
|
|||||||
|
|
||||||
# don't put this import at the top level
|
# don't put this import at the top level
|
||||||
# it will call torch.cuda.device_count()
|
# it will call torch.cuda.device_count()
|
||||||
from transformers import AutoProcessor # noqa: F401
|
from transformers import AutoProcessor
|
||||||
|
|
||||||
self.processor = AutoProcessor.from_pretrained(
|
self.processor = AutoProcessor.from_pretrained(
|
||||||
model_name,
|
model_name,
|
||||||
|
|||||||
@ -15,7 +15,7 @@ MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct"
|
|||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
def server(): # noqa: F811
|
def server():
|
||||||
args = [
|
args = [
|
||||||
# use half precision for speed and memory savings in CI environment
|
# use half precision for speed and memory savings in CI environment
|
||||||
"--dtype",
|
"--dtype",
|
||||||
|
|||||||
@ -28,7 +28,7 @@ def zephyr_lora_files():
|
|||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
def server(zephyr_lora_files): # noqa: F811
|
def server(zephyr_lora_files):
|
||||||
args = [
|
args = [
|
||||||
# use half precision for speed and memory savings in CI environment
|
# use half precision for speed and memory savings in CI environment
|
||||||
"--dtype",
|
"--dtype",
|
||||||
|
|||||||
@ -12,7 +12,7 @@ MODEL_NAME = "Qwen/QwQ-32B"
|
|||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
def server(): # noqa: F811
|
def server():
|
||||||
args = [
|
args = [
|
||||||
"--max-model-len",
|
"--max-model-len",
|
||||||
"8192",
|
"8192",
|
||||||
|
|||||||
@ -125,7 +125,7 @@ messages = [
|
|||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
def server(): # noqa: F811
|
def server():
|
||||||
args = [
|
args = [
|
||||||
# use half precision for speed and memory savings in CI environment
|
# use half precision for speed and memory savings in CI environment
|
||||||
"--dtype",
|
"--dtype",
|
||||||
@ -212,7 +212,7 @@ async def test_function_tool_use(
|
|||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
def k2_server(): # noqa: F811
|
def k2_server():
|
||||||
args = [
|
args = [
|
||||||
# use half precision for speed and memory savings in CI environment
|
# use half precision for speed and memory savings in CI environment
|
||||||
"--dtype",
|
"--dtype",
|
||||||
|
|||||||
@ -23,7 +23,7 @@ ACTIVE_MM_LORA_RESPONSE = "Spoken text: The first words I spoke in the original
|
|||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
def multimodal_server(): # noqa: F811
|
def multimodal_server():
|
||||||
args = [
|
args = [
|
||||||
# use half precision for speed and memory savings in CI environment
|
# use half precision for speed and memory savings in CI environment
|
||||||
"--dtype",
|
"--dtype",
|
||||||
|
|||||||
@ -8,7 +8,7 @@ from ...utils import RemoteOpenAIServer
|
|||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
def chat_server_with_force_include_usage(request): # noqa: F811
|
def chat_server_with_force_include_usage(request):
|
||||||
args = [
|
args = [
|
||||||
# use half precision for speed and memory savings in CI environment
|
# use half precision for speed and memory savings in CI environment
|
||||||
"--dtype",
|
"--dtype",
|
||||||
|
|||||||
@ -11,7 +11,7 @@ MODEL_NAME = "Qwen/Qwen3-0.6B"
|
|||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
def server(): # noqa: F811
|
def server():
|
||||||
args = [
|
args = [
|
||||||
"--max-model-len",
|
"--max-model-len",
|
||||||
"2048",
|
"2048",
|
||||||
|
|||||||
@ -37,7 +37,7 @@ def default_server_args(qwen3_lora_files):
|
|||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
def server_fixture(request, default_server_args): # noqa: F811
|
def server_fixture(request, default_server_args):
|
||||||
use_server_flag = request.param
|
use_server_flag = request.param
|
||||||
if use_server_flag:
|
if use_server_flag:
|
||||||
args_with_flag = default_server_args + ["--return-tokens-as-token-ids"]
|
args_with_flag = default_server_args + ["--return-tokens-as-token-ids"]
|
||||||
|
|||||||
@ -267,7 +267,7 @@ def run_embedding_input_test(
|
|||||||
"""Inference result should be the same between
|
"""Inference result should be the same between
|
||||||
original image/video input and image/video embeddings input.
|
original image/video input and image/video embeddings input.
|
||||||
"""
|
"""
|
||||||
from transformers import AutoProcessor # noqa: F401
|
from transformers import AutoProcessor
|
||||||
|
|
||||||
processor = AutoProcessor.from_pretrained(model)
|
processor = AutoProcessor.from_pretrained(model)
|
||||||
|
|
||||||
|
|||||||
@ -145,7 +145,7 @@ def test_shared_storage_connector_hashes(tmp_path):
|
|||||||
|
|
||||||
# don't put this import at the top level
|
# don't put this import at the top level
|
||||||
# it will call torch.cuda.device_count()
|
# it will call torch.cuda.device_count()
|
||||||
from transformers import AutoProcessor # noqa: F401
|
from transformers import AutoProcessor
|
||||||
|
|
||||||
# Create processor to handle the chat prompt
|
# Create processor to handle the chat prompt
|
||||||
processor = AutoProcessor.from_pretrained(MODEL_NAME)
|
processor = AutoProcessor.from_pretrained(MODEL_NAME)
|
||||||
|
|||||||
@ -81,10 +81,7 @@ class ECExampleConnector(ECConnectorBase):
|
|||||||
assert encoder_cache is not None
|
assert encoder_cache is not None
|
||||||
if metadata is None:
|
if metadata is None:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
(
|
"In connector.start_load_caches, but the connector metadata is None"
|
||||||
"In connector.start_load_caches, ",
|
|
||||||
"but the connector metadata is None",
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
# Load the EC for each mm data
|
# Load the EC for each mm data
|
||||||
|
|||||||
@ -43,7 +43,7 @@ async def scale_elastic_ep(raw_request: Request):
|
|||||||
try:
|
try:
|
||||||
body = await raw_request.json()
|
body = await raw_request.json()
|
||||||
except json.JSONDecodeError as e:
|
except json.JSONDecodeError as e:
|
||||||
raise HTTPException(status_code=400, detail="Invalid JSON format") from e # noqa: B904
|
raise HTTPException(status_code=400, detail="Invalid JSON format") from e
|
||||||
|
|
||||||
new_data_parallel_size = body.get("new_data_parallel_size")
|
new_data_parallel_size = body.get("new_data_parallel_size")
|
||||||
drain_timeout = body.get("drain_timeout", 120) # Default 2 minutes
|
drain_timeout = body.get("drain_timeout", 120) # Default 2 minutes
|
||||||
|
|||||||
@ -1,6 +1,5 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
# ruff: noqa: SIM117
|
|
||||||
import fnmatch
|
import fnmatch
|
||||||
import glob
|
import glob
|
||||||
import itertools
|
import itertools
|
||||||
@ -59,7 +58,7 @@ def is_moe_model(model: torch.nn.Module) -> bool:
|
|||||||
|
|
||||||
|
|
||||||
class BitsAndBytesModelLoader(BaseModelLoader):
|
class BitsAndBytesModelLoader(BaseModelLoader):
|
||||||
"""Model loader to load model weights with BitAndBytes quantization."""
|
"""Model loader to load model weights with BitsAndBytes quantization."""
|
||||||
|
|
||||||
possible_config_file_names = ["adapter_config.json"]
|
possible_config_file_names = ["adapter_config.json"]
|
||||||
|
|
||||||
|
|||||||
@ -1,6 +1,5 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
# ruff: noqa: SIM117
|
|
||||||
import os
|
import os
|
||||||
from collections.abc import Generator
|
from collections.abc import Generator
|
||||||
|
|
||||||
|
|||||||
@ -6,9 +6,7 @@ Define EC connector functionality mixin for model runners.
|
|||||||
|
|
||||||
from collections.abc import Generator
|
from collections.abc import Generator
|
||||||
from contextlib import AbstractContextManager, contextmanager, nullcontext
|
from contextlib import AbstractContextManager, contextmanager, nullcontext
|
||||||
from typing import (
|
from typing import TYPE_CHECKING
|
||||||
TYPE_CHECKING, # noqa: UP035
|
|
||||||
)
|
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
|||||||
@ -7,9 +7,7 @@ Define KV connector functionality mixin for model runners.
|
|||||||
import copy
|
import copy
|
||||||
from collections.abc import Generator
|
from collections.abc import Generator
|
||||||
from contextlib import AbstractContextManager, contextmanager, nullcontext
|
from contextlib import AbstractContextManager, contextmanager, nullcontext
|
||||||
from typing import (
|
from typing import TYPE_CHECKING
|
||||||
TYPE_CHECKING, # noqa: UP035
|
|
||||||
)
|
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user