From 9ad0688e436a41e386fa49e81ee344cb59f7d23c Mon Sep 17 00:00:00 2001 From: Jee Jee Li Date: Wed, 10 Sep 2025 01:37:25 +0800 Subject: [PATCH] [Bugfix] Fix hidden_size for multimodal classification model (#24501) Signed-off-by: Jee Jee Li --- vllm/model_executor/models/adapters.py | 5 +++-- vllm/model_executor/models/utils.py | 7 +++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/models/adapters.py b/vllm/model_executor/models/adapters.py index c189208fa075..78ad9a433e31 100644 --- a/vllm/model_executor/models/adapters.py +++ b/vllm/model_executor/models/adapters.py @@ -255,7 +255,7 @@ def as_seq_cls_model(cls: _T) -> _T: from vllm.model_executor.models.interfaces import SupportsCrossEncoding from vllm.sequence import IntermediateTensors - from .utils import maybe_prefix + from .utils import get_model_hidden_size, maybe_prefix class ModelForSequenceClassification(_create_pooling_model_cls(cls), SupportsCrossEncoding): @@ -263,9 +263,10 @@ def as_seq_cls_model(cls: _T) -> _T: def _init_pooler(self, vllm_config: "VllmConfig", prefix: str = ""): config = vllm_config.model_config.hf_config quant_config = vllm_config.quant_config + hidden_size = get_model_hidden_size(config) self.score = ReplicatedLinear( - config.hidden_size, + hidden_size, config.num_labels, bias=False, params_dtype=torch.float32, diff --git a/vllm/model_executor/models/utils.py b/vllm/model_executor/models/utils.py index 28cfefac30dd..e716ec582baa 100644 --- a/vllm/model_executor/models/utils.py +++ b/vllm/model_executor/models/utils.py @@ -761,3 +761,10 @@ def fast_topk(values: torch.Tensor, topk: int, else: # Use topk for efficiency with larger k values return torch.topk(values, topk, dim=dim) + + +def get_model_hidden_size(hf_config: PretrainedConfig) -> int: + if hasattr(hf_config, "hidden_size"): + return hf_config.hidden_size + text_config = hf_config.get_text_config() + return text_config.hidden_size