From 041e29471671a3d43afedb6a749421d56ef4d3e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=87=83?= Date: Thu, 20 Feb 2025 15:04:30 +0800 Subject: [PATCH] [Misc] add mm_processor_kwargs to extra_body for Qwen2.5-VL (#13533) --- vllm/entrypoints/openai/protocol.py | 4 ++++ vllm/entrypoints/openai/serving_engine.py | 2 ++ vllm/model_executor/models/qwen2_5_vl.py | 2 +- vllm/transformers_utils/processor.py | 12 +++++++++++- 4 files changed, 18 insertions(+), 2 deletions(-) diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index 2bcfdc2357761..98ea6a46133ff 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -312,6 +312,10 @@ class ChatCompletionRequest(OpenAIBaseModel): description=("Additional kwargs to pass to the template renderer. " "Will be accessible by the chat template."), ) + mm_processor_kwargs: Optional[Dict[str, Any]] = Field( + default=None, + description=("Additional kwargs to pass to the HF processor."), + ) guided_json: Optional[Union[str, dict, BaseModel]] = Field( default=None, description=("If specified, the output will follow the JSON schema."), diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py index 785117ca1d451..dfc3328677c75 100644 --- a/vllm/entrypoints/openai/serving_engine.py +++ b/vllm/entrypoints/openai/serving_engine.py @@ -451,6 +451,8 @@ class OpenAIServing: prompt_token_ids=prompt_inputs["prompt_token_ids"]) if mm_data is not None: engine_prompt["multi_modal_data"] = mm_data + if request.mm_processor_kwargs is not None: + engine_prompt["mm_processor_kwargs"] = request.mm_processor_kwargs return conversation, [request_prompt], [engine_prompt] diff --git a/vllm/model_executor/models/qwen2_5_vl.py b/vllm/model_executor/models/qwen2_5_vl.py index f16fa536791ea..ff10fcb4315cc 100644 --- a/vllm/model_executor/models/qwen2_5_vl.py +++ b/vllm/model_executor/models/qwen2_5_vl.py @@ -689,7 +689,7 @@ class Qwen2_5_VLProcessingInfo(Qwen2VLProcessingInfo): min_pixels: Optional[int] = None, max_pixels: Optional[int] = None, size: Optional[dict[str, int]] = None, - fps: Optional[float] = None, + fps: Optional[Union[float, List[float]]] = None, **kwargs: object, ) -> Qwen2_5_VLProcessor: if fps is not None: diff --git a/vllm/transformers_utils/processor.py b/vllm/transformers_utils/processor.py index 29fab16c25c11..1d09b99d50c06 100644 --- a/vllm/transformers_utils/processor.py +++ b/vllm/transformers_utils/processor.py @@ -23,6 +23,15 @@ class HashableDict(dict): return hash(frozenset(self.items())) +class HashableList(list): + """ + A list that can be hashed by lru_cache. + """ + + def __hash__(self) -> int: # type: ignore[override] + return hash(tuple(self)) + + def _merge_mm_kwargs(model_config: "ModelConfig", **kwargs): base_kwargs = model_config.mm_processor_kwargs if base_kwargs is None: @@ -36,7 +45,8 @@ def _merge_mm_kwargs(model_config: "ModelConfig", **kwargs): for key, value in merged_kwargs.items(): if isinstance(value, dict): merged_kwargs[key] = HashableDict(value) - + if isinstance(value, list): + merged_kwargs[key] = HashableList(value) return merged_kwargs