diff --git a/vllm/config/model.py b/vllm/config/model.py index 92cd48402a65d..b595c8550491b 100644 --- a/vllm/config/model.py +++ b/vllm/config/model.py @@ -1726,19 +1726,12 @@ class ModelConfig: logger.debug_once("head dtype: %s", head_dtype) return head_dtype - @property - def hidden_size(self): - if hasattr(self.hf_config, "hidden_size"): - return self.hf_config.hidden_size - text_config = self.hf_config.get_text_config() - return text_config.hidden_size - @property def embedding_size(self): dense_modules = try_get_dense_modules(self.model, revision=self.revision) if dense_modules is not None: return dense_modules[-1]["out_features"] - return self.hidden_size + return self.get_hidden_size() def get_and_verify_max_len(self, max_model_len: int): # Consider max_model_len in tokenizer_config only when diff --git a/vllm/model_executor/models/adapters.py b/vllm/model_executor/models/adapters.py index 738400ae864a5..05f257feea3ee 100644 --- a/vllm/model_executor/models/adapters.py +++ b/vllm/model_executor/models/adapters.py @@ -301,7 +301,7 @@ def as_seq_cls_model(cls: _T) -> _T: quant_config = vllm_config.quant_config self.score = ReplicatedLinear( - model_config.hidden_size, + model_config.get_hidden_size(), text_config.num_labels, bias=False, params_dtype=vllm_config.model_config.head_dtype,