From 79f2f1c2a1999d1e7a5202062bad4e115fd9d775 Mon Sep 17 00:00:00 2001 From: "Li, Jiang" Date: Fri, 20 Jun 2025 23:30:36 +0800 Subject: [PATCH] [CPU][CI] Fallback sliding window to v0 and fix CPU pooling model tests (#19901) Signed-off-by: jiang1.li --- tests/models/language/pooling/test_embedding.py | 7 ++++++- vllm/engine/arg_utils.py | 7 +++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/tests/models/language/pooling/test_embedding.py b/tests/models/language/pooling/test_embedding.py index e29b4f6e8bec9..5ef9f768c5744 100644 --- a/tests/models/language/pooling/test_embedding.py +++ b/tests/models/language/pooling/test_embedding.py @@ -1,5 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import os + import pytest from vllm.config import PoolerConfig @@ -33,7 +35,7 @@ def v1(run_with_both_engines): # To avoid this problem, for now we skip v0 since it will be # deprecated anyway. pytest.param("ssmits/Qwen2-7B-Instruct-embed-base", - marks=[pytest.mark.skip_v0]), + marks=[pytest.mark.skip_v0, pytest.mark.cpu_model]), # [Encoder-only] pytest.param("BAAI/bge-base-en-v1.5", marks=[ @@ -58,6 +60,9 @@ def test_models( model, monkeypatch, ) -> None: + if model == "intfloat/e5-mistral-7b-instruct" and current_platform.is_cpu( + ) and os.environ.get("VLLM_USE_V1", "0") == "1": + pytest.skip("CPU V1 doesn't support sliding window") if model == "BAAI/bge-multilingual-gemma2" and current_platform.is_rocm(): # ROCm Triton FA does not currently support sliding window attention diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 7a88e3269a5ed..bffc8ba8c907f 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -1449,6 +1449,13 @@ class EngineArgs: model_config=model_config) and _warn_or_fallback( current_platform.device_name): return False + + if (current_platform.is_cpu() + and model_config.get_sliding_window() is not None): + _raise_or_fallback(feature_name="sliding window (CPU backend)", + recommend_to_remove=False) + return False + ############################################################# return True