From 98ac0cb32d9462e50bd998f9f2eb6e4c09232c95 Mon Sep 17 00:00:00 2001 From: Isotr0py Date: Fri, 29 Aug 2025 12:41:20 +0800 Subject: [PATCH] [Bugfix] Use `ReplicatedLinear` for SequenceClassification head (#23836) Signed-off-by: Isotr0py --- tests/models/language/pooling/test_qwen3_reranker.py | 7 ++----- vllm/model_executor/models/adapters.py | 5 ++--- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/tests/models/language/pooling/test_qwen3_reranker.py b/tests/models/language/pooling/test_qwen3_reranker.py index 8c6537f3193f8..5dd2d9eae9115 100644 --- a/tests/models/language/pooling/test_qwen3_reranker.py +++ b/tests/models/language/pooling/test_qwen3_reranker.py @@ -96,8 +96,5 @@ def test_rerank_models_mteb_tp(vllm_runner, "tensor_parallel_size": 2, } - mteb_test_rerank_models(Qwen3RerankerHfRunner, - vllm_runner, - model_info, - vllm_extra_kwargs, - atol=1.2e-2) + mteb_test_rerank_models(Qwen3RerankerHfRunner, vllm_runner, model_info, + vllm_extra_kwargs) diff --git a/vllm/model_executor/models/adapters.py b/vllm/model_executor/models/adapters.py index 49e9a2d65ea11..50c2cd97f3d09 100644 --- a/vllm/model_executor/models/adapters.py +++ b/vllm/model_executor/models/adapters.py @@ -248,7 +248,7 @@ def as_seq_cls_model(cls: _T) -> _T: return cls # Lazy import - from vllm.model_executor.layers.linear import RowParallelLinear + from vllm.model_executor.layers.linear import ReplicatedLinear from vllm.model_executor.layers.pooler import (ClassifierPooler, DispatchPooler, Pooler, PoolingMethod, PoolingType) @@ -264,10 +264,9 @@ def as_seq_cls_model(cls: _T) -> _T: config = vllm_config.model_config.hf_config quant_config = vllm_config.quant_config - self.score = RowParallelLinear( + self.score = ReplicatedLinear( config.hidden_size, config.num_labels, - input_is_parallel=False, bias=False, params_dtype=torch.float32, quant_config=quant_config,