From 045b396d090f4a16fbba760bef86e9a24a7ba9ce Mon Sep 17 00:00:00 2001 From: Isotr0py Date: Sun, 12 Oct 2025 17:42:42 +0800 Subject: [PATCH] [Bugfix][CI/Build] Fix failing Mteb CI (#26638) Signed-off-by: Isotr0py --- tests/models/language/pooling_mteb_test/mteb_utils.py | 2 +- tests/models/language/pooling_mteb_test/test_jina.py | 5 +++++ .../models/language/pooling_mteb_test/test_st_projector.py | 1 + tests/models/utils.py | 1 + vllm/model_executor/layers/layernorm.py | 6 +++++- 5 files changed, 13 insertions(+), 2 deletions(-) diff --git a/tests/models/language/pooling_mteb_test/mteb_utils.py b/tests/models/language/pooling_mteb_test/mteb_utils.py index d96dc90416855..65ad49fad3653 100644 --- a/tests/models/language/pooling_mteb_test/mteb_utils.py +++ b/tests/models/language/pooling_mteb_test/mteb_utils.py @@ -191,7 +191,7 @@ def mteb_test_embed_models( with vllm_runner( model_info.name, runner="pooling", - max_model_len=None, + max_model_len=model_info.max_model_len, **vllm_extra_kwargs, ) as vllm_model: model_config = vllm_model.llm.llm_engine.model_config diff --git a/tests/models/language/pooling_mteb_test/test_jina.py b/tests/models/language/pooling_mteb_test/test_jina.py index 0a712b2542f3c..dbdf82af33c72 100644 --- a/tests/models/language/pooling_mteb_test/test_jina.py +++ b/tests/models/language/pooling_mteb_test/test_jina.py @@ -25,6 +25,11 @@ EMBEDDING_MODELS = [ mteb_score=0.824413164, architecture="XLMRobertaModel", is_matryoshka=True, + # The default max length of the model is 8194, which will crash + # CUDAGraph due to odd length for Gemm. We set it to 8192 to avoid + # avoid this issue. + max_model_len=8192, + dtype="float32", ) ] diff --git a/tests/models/language/pooling_mteb_test/test_st_projector.py b/tests/models/language/pooling_mteb_test/test_st_projector.py index 91b1ef828d0df..74fe4b9bcc03f 100644 --- a/tests/models/language/pooling_mteb_test/test_st_projector.py +++ b/tests/models/language/pooling_mteb_test/test_st_projector.py @@ -23,6 +23,7 @@ ST_PROJECTOR_MODELS = [ architecture="Gemma3TextModel", mteb_score=0.7473819294684156, enable_test=True, + dtype="float32", ), ] diff --git a/tests/models/utils.py b/tests/models/utils.py index 84697ad68d441..3d6e6cb89d62a 100644 --- a/tests/models/utils.py +++ b/tests/models/utils.py @@ -369,6 +369,7 @@ class ModelInfo: name: str architecture: str = "" dtype: str = "auto" + max_model_len: Optional[int] = None hf_dtype: str = "float32" hf_overrides: Optional[dict[str, Any]] = None default_pooling_type: str = "" diff --git a/vllm/model_executor/layers/layernorm.py b/vllm/model_executor/layers/layernorm.py index 6a49ae42ca895..910f145b1f8c2 100644 --- a/vllm/model_executor/layers/layernorm.py +++ b/vllm/model_executor/layers/layernorm.py @@ -318,7 +318,11 @@ class GemmaRMSNorm(CustomOp): """PyTorch-native implementation equivalent to forward().""" orig_dtype = x.dtype if residual is not None: - x = x + residual.float() if orig_dtype == torch.float16 else x + residual + x = ( + x.float() + residual.float() + if orig_dtype == torch.float16 + else x + residual + ) residual = x x = x.float()