[Doc] Improve documentation for multimodal CLI args (#16960)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung 2025-04-22 16:35:35 +08:00 committed by GitHub
parent e4d6144232
commit 8f7bace7c3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 17 additions and 10 deletions

View File

@ -54,13 +54,15 @@ if TYPE_CHECKING:
from vllm.transformers_utils.tokenizer_group.base_tokenizer_group import (
BaseTokenizerGroup)
Config = TypeVar("Config", bound=DataclassInstance)
ConfigType = type[DataclassInstance]
else:
QuantizationConfig = None
Config = TypeVar("Config")
ConfigType = type
logger = init_logger(__name__)
ConfigT = TypeVar("ConfigT", bound=ConfigType)
# This value is chosen to have a balance between ITL and TTFT. Note it is
# not optimized for throughput.
_DEFAULT_MAX_NUM_BATCHED_TOKENS = 2048
@ -162,7 +164,7 @@ def get_attr_docs(cls: type[Any]) -> dict[str, str]:
return out
def config(cls: type[Config]) -> type[Config]:
def config(cls: ConfigT) -> ConfigT:
"""
A decorator that ensures all fields in a dataclass have default values
and that each field has a docstring.
@ -181,7 +183,7 @@ def config(cls: type[Config]) -> type[Config]:
return cls
def get_field(cls: type[Config], name: str) -> Field:
def get_field(cls: ConfigType, name: str) -> Field:
"""Get the default factory field of a dataclass by name. Used for getting
default factory fields in `EngineArgs`."""
if not is_dataclass(cls):
@ -2749,6 +2751,9 @@ class MultiModalConfig:
The maximum number of input items allowed per prompt for each modality.
This should be a JSON string that will be parsed into a dictionary.
Defaults to 1 (V0) or 999 (V1) for each modality.
For example, to allow up to 16 images and 2 videos per prompt:
``{"images": 16, "videos": 2}``
"""
def compute_hash(self) -> str:

View File

@ -17,7 +17,7 @@ from typing_extensions import TypeIs
import vllm.envs as envs
from vllm import version
from vllm.config import (BlockSize, CacheConfig, CacheDType, CompilationConfig,
Config, ConfigFormat, DecodingConfig, Device,
ConfigFormat, ConfigType, DecodingConfig, Device,
DeviceConfig, DistributedExecutorBackend, HfOverrides,
KVTransferConfig, LoadConfig, LoadFormat, LoRAConfig,
ModelConfig, ModelImpl, MultiModalConfig,
@ -304,7 +304,7 @@ class EngineArgs:
"""Check if the class is a custom type."""
return cls.__module__ != "builtins"
def get_kwargs(cls: type[Config]) -> dict[str, Any]:
def get_kwargs(cls: ConfigType) -> dict[str, Any]:
cls_docs = get_attr_docs(cls)
kwargs = {}
for field in fields(cls):
@ -678,13 +678,15 @@ class EngineArgs:
'--mm-processor-kwargs',
default=None,
type=json.loads,
help=('Overrides for the multimodal input mapping/processing, '
'e.g., image processor. For example: ``{"num_crops": 4}``.'))
help=('Overrides for the multi-modal processor obtained from '
'``AutoProcessor.from_pretrained``. The available overrides '
'depend on the model that is being run.'
'For example, for Phi-3-Vision: ``{"num_crops": 4}``.'))
parser.add_argument(
'--disable-mm-preprocessor-cache',
action='store_true',
help='If true, then disables caching of the multi-modal '
'preprocessor/mapper. (not recommended)')
help='If True, disable caching of the processed multi-modal '
'inputs.')
# LoRA related configs
parser.add_argument('--enable-lora',