mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-27 00:54:31 +08:00
[Docs] Reduce requirements for docs build (#23651)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
parent
1fdc732419
commit
379f828fba
@ -1,5 +1,6 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
|
import importlib
|
||||||
import logging
|
import logging
|
||||||
import sys
|
import sys
|
||||||
from argparse import SUPPRESS, HelpFormatter
|
from argparse import SUPPRESS, HelpFormatter
|
||||||
@ -7,25 +8,52 @@ from pathlib import Path
|
|||||||
from typing import Literal
|
from typing import Literal
|
||||||
from unittest.mock import MagicMock, patch
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
from pydantic_core import core_schema
|
||||||
|
|
||||||
|
logger = logging.getLogger("mkdocs")
|
||||||
|
|
||||||
ROOT_DIR = Path(__file__).parent.parent.parent.parent
|
ROOT_DIR = Path(__file__).parent.parent.parent.parent
|
||||||
ARGPARSE_DOC_DIR = ROOT_DIR / "docs/argparse"
|
ARGPARSE_DOC_DIR = ROOT_DIR / "docs/argparse"
|
||||||
|
|
||||||
sys.path.insert(0, str(ROOT_DIR))
|
sys.path.insert(0, str(ROOT_DIR))
|
||||||
sys.modules["aiohttp"] = MagicMock()
|
|
||||||
sys.modules["blake3"] = MagicMock()
|
|
||||||
sys.modules["vllm._C"] = MagicMock()
|
sys.modules["vllm._C"] = MagicMock()
|
||||||
|
|
||||||
from vllm.benchmarks import latency # noqa: E402
|
|
||||||
from vllm.benchmarks import serve # noqa: E402
|
|
||||||
from vllm.benchmarks import throughput # noqa: E402
|
|
||||||
from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs # noqa: E402
|
|
||||||
from vllm.entrypoints.cli.openai import ChatCommand # noqa: E402
|
|
||||||
from vllm.entrypoints.cli.openai import CompleteCommand # noqa: E402
|
|
||||||
from vllm.entrypoints.openai import cli_args # noqa: E402
|
|
||||||
from vllm.entrypoints.openai import run_batch # noqa: E402
|
|
||||||
from vllm.utils import FlexibleArgumentParser # noqa: E402
|
|
||||||
|
|
||||||
logger = logging.getLogger("mkdocs")
|
class PydanticMagicMock(MagicMock):
|
||||||
|
"""`MagicMock` that's able to generate pydantic-core schemas."""
|
||||||
|
|
||||||
|
def __get_pydantic_core_schema__(self, source_type, handler):
|
||||||
|
return core_schema.any_schema()
|
||||||
|
|
||||||
|
|
||||||
|
def auto_mock(module, attr, max_mocks=50):
|
||||||
|
"""Function that automatically mocks missing modules during imports."""
|
||||||
|
logger.info("Importing %s from %s", attr, module)
|
||||||
|
for _ in range(max_mocks):
|
||||||
|
try:
|
||||||
|
# First treat attr as an attr, then as a submodule
|
||||||
|
return getattr(importlib.import_module(module), attr,
|
||||||
|
importlib.import_module(f"{module}.{attr}"))
|
||||||
|
except importlib.metadata.PackageNotFoundError as e:
|
||||||
|
raise e
|
||||||
|
except ModuleNotFoundError as e:
|
||||||
|
logger.info("Mocking %s for argparse doc generation", e.name)
|
||||||
|
sys.modules[e.name] = PydanticMagicMock()
|
||||||
|
|
||||||
|
raise ImportError(
|
||||||
|
f"Failed to import {module}.{attr} after mocking {max_mocks} imports")
|
||||||
|
|
||||||
|
|
||||||
|
latency = auto_mock("vllm.benchmarks", "latency")
|
||||||
|
serve = auto_mock("vllm.benchmarks", "serve")
|
||||||
|
throughput = auto_mock("vllm.benchmarks", "throughput")
|
||||||
|
AsyncEngineArgs = auto_mock("vllm.engine.arg_utils", "AsyncEngineArgs")
|
||||||
|
EngineArgs = auto_mock("vllm.engine.arg_utils", "EngineArgs")
|
||||||
|
ChatCommand = auto_mock("vllm.entrypoints.cli.openai", "ChatCommand")
|
||||||
|
CompleteCommand = auto_mock("vllm.entrypoints.cli.openai", "CompleteCommand")
|
||||||
|
cli_args = auto_mock("vllm.entrypoints.openai", "cli_args")
|
||||||
|
run_batch = auto_mock("vllm.entrypoints.openai", "run_batch")
|
||||||
|
FlexibleArgumentParser = auto_mock("vllm.utils", "FlexibleArgumentParser")
|
||||||
|
|
||||||
|
|
||||||
class MarkdownFormatter(HelpFormatter):
|
class MarkdownFormatter(HelpFormatter):
|
||||||
|
|||||||
@ -14,20 +14,6 @@ ruff
|
|||||||
# Required for argparse hook only
|
# Required for argparse hook only
|
||||||
-f https://download.pytorch.org/whl/cpu
|
-f https://download.pytorch.org/whl/cpu
|
||||||
cachetools
|
cachetools
|
||||||
cbor2
|
|
||||||
cloudpickle
|
|
||||||
fastapi
|
|
||||||
msgspec
|
msgspec
|
||||||
openai
|
|
||||||
openai-harmony
|
|
||||||
partial-json-parser
|
|
||||||
pillow
|
|
||||||
psutil
|
|
||||||
pybase64
|
|
||||||
pydantic
|
pydantic
|
||||||
setproctitle
|
|
||||||
torch
|
torch
|
||||||
transformers
|
|
||||||
zmq
|
|
||||||
uvloop
|
|
||||||
prometheus-client
|
|
||||||
|
|||||||
@ -16,14 +16,17 @@ import msgspec
|
|||||||
import torch
|
import torch
|
||||||
|
|
||||||
from vllm.inputs import SingletonInputs
|
from vllm.inputs import SingletonInputs
|
||||||
from vllm.lora.request import LoRARequest
|
|
||||||
from vllm.multimodal import MultiModalKwargs, MultiModalPlaceholderDict
|
from vllm.multimodal import MultiModalKwargs, MultiModalPlaceholderDict
|
||||||
from vllm.pooling_params import PoolingParams
|
from vllm.pooling_params import PoolingParams
|
||||||
from vllm.sampling_params import RequestOutputKind, SamplingParams
|
from vllm.sampling_params import RequestOutputKind, SamplingParams
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
|
from vllm.lora.request import LoRARequest
|
||||||
from vllm.v1.worker.kv_connector_model_runner_mixin import (
|
from vllm.v1.worker.kv_connector_model_runner_mixin import (
|
||||||
KVConnectorOutput)
|
KVConnectorOutput)
|
||||||
|
else:
|
||||||
|
LoRARequest = Any
|
||||||
|
KVConnectorOutput = Any
|
||||||
|
|
||||||
VLLM_TOKEN_ID_ARRAY_TYPE = "l"
|
VLLM_TOKEN_ID_ARRAY_TYPE = "l"
|
||||||
|
|
||||||
@ -1138,7 +1141,7 @@ class IntermediateTensors:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
tensors: dict[str, torch.Tensor]
|
tensors: dict[str, torch.Tensor]
|
||||||
kv_connector_output: Optional["KVConnectorOutput"]
|
kv_connector_output: Optional[KVConnectorOutput]
|
||||||
|
|
||||||
def __init__(self, tensors):
|
def __init__(self, tensors):
|
||||||
# manually define this function, so that
|
# manually define this function, so that
|
||||||
|
|||||||
@ -27,19 +27,6 @@ from transformers.utils import CONFIG_NAME as HF_CONFIG_NAME
|
|||||||
|
|
||||||
from vllm import envs
|
from vllm import envs
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
# yapf conflicts with isort for this block
|
|
||||||
# yapf: disable
|
|
||||||
from vllm.transformers_utils.configs import (ChatGLMConfig, DeepseekVLV2Config,
|
|
||||||
EAGLEConfig, JAISConfig,
|
|
||||||
KimiVLConfig, MedusaConfig,
|
|
||||||
MLPSpeculatorConfig,
|
|
||||||
Nemotron_Nano_VL_Config,
|
|
||||||
NemotronConfig, OvisConfig,
|
|
||||||
RWConfig, SpeculatorsConfig,
|
|
||||||
Step3TextConfig, Step3VLConfig,
|
|
||||||
UltravoxConfig)
|
|
||||||
# yapf: enable
|
|
||||||
from vllm.transformers_utils.configs.mistral import adapt_config_dict
|
|
||||||
from vllm.transformers_utils.utils import check_gguf_file
|
from vllm.transformers_utils.utils import check_gguf_file
|
||||||
|
|
||||||
if envs.VLLM_USE_MODELSCOPE:
|
if envs.VLLM_USE_MODELSCOPE:
|
||||||
@ -67,24 +54,31 @@ def _get_hf_token() -> Optional[str]:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
_CONFIG_REGISTRY: dict[str, type[PretrainedConfig]] = {
|
class LazyConfigDict(dict):
|
||||||
"chatglm": ChatGLMConfig,
|
|
||||||
"deepseek_vl_v2": DeepseekVLV2Config,
|
def __getitem__(self, key):
|
||||||
"kimi_vl": KimiVLConfig,
|
import vllm.transformers_utils.configs as configs
|
||||||
"Llama_Nemotron_Nano_VL": Nemotron_Nano_VL_Config,
|
return getattr(configs, super().__getitem__(key))
|
||||||
"RefinedWeb": RWConfig, # For tiiuae/falcon-40b(-instruct)
|
|
||||||
"RefinedWebModel": RWConfig, # For tiiuae/falcon-7b(-instruct)
|
|
||||||
"jais": JAISConfig,
|
_CONFIG_REGISTRY: dict[str, type[PretrainedConfig]] = LazyConfigDict(
|
||||||
"mlp_speculator": MLPSpeculatorConfig,
|
chatglm="ChatGLMConfig",
|
||||||
"medusa": MedusaConfig,
|
deepseek_vl_v2="DeepseekVLV2Config",
|
||||||
"eagle": EAGLEConfig,
|
kimi_vl="KimiVLConfig",
|
||||||
"speculators": SpeculatorsConfig,
|
Llama_Nemotron_Nano_VL="Nemotron_Nano_VL_Config",
|
||||||
"nemotron": NemotronConfig,
|
RefinedWeb="RWConfig", # For tiiuae/falcon-40b(-instruct)
|
||||||
"ovis": OvisConfig,
|
RefinedWebModel="RWConfig", # For tiiuae/falcon-7b(-instruct)
|
||||||
"ultravox": UltravoxConfig,
|
jais="JAISConfig",
|
||||||
"step3_vl": Step3VLConfig,
|
mlp_speculator="MLPSpeculatorConfig",
|
||||||
"step3_text": Step3TextConfig,
|
medusa="MedusaConfig",
|
||||||
}
|
eagle="EAGLEConfig",
|
||||||
|
speculators="SpeculatorsConfig",
|
||||||
|
nemotron="NemotronConfig",
|
||||||
|
ovis="OvisConfig",
|
||||||
|
ultravox="UltravoxConfig",
|
||||||
|
step3_vl="Step3VLConfig",
|
||||||
|
step3_text="Step3TextConfig",
|
||||||
|
)
|
||||||
|
|
||||||
_CONFIG_ATTRS_MAPPING: dict[str, str] = {
|
_CONFIG_ATTRS_MAPPING: dict[str, str] = {
|
||||||
"llm_config": "text_config",
|
"llm_config": "text_config",
|
||||||
@ -461,6 +455,8 @@ def get_config(
|
|||||||
model, revision, **kwargs)
|
model, revision, **kwargs)
|
||||||
config_dict["max_position_embeddings"] = max_position_embeddings
|
config_dict["max_position_embeddings"] = max_position_embeddings
|
||||||
|
|
||||||
|
from vllm.transformers_utils.configs.mistral import adapt_config_dict
|
||||||
|
|
||||||
config = adapt_config_dict(config_dict)
|
config = adapt_config_dict(config_dict)
|
||||||
|
|
||||||
# Mistral configs may define sliding_window as list[int]. Convert it
|
# Mistral configs may define sliding_window as list[int]. Convert it
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user