mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-16 16:46:11 +08:00
[CPU][CI] Fallback sliding window to v0 and fix CPU pooling model tests (#19901)
Signed-off-by: jiang1.li <jiang1.li@intel.com>
This commit is contained in:
parent
2e3e3c86dc
commit
79f2f1c2a1
@ -1,5 +1,7 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
|
import os
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from vllm.config import PoolerConfig
|
from vllm.config import PoolerConfig
|
||||||
@ -33,7 +35,7 @@ def v1(run_with_both_engines):
|
|||||||
# To avoid this problem, for now we skip v0 since it will be
|
# To avoid this problem, for now we skip v0 since it will be
|
||||||
# deprecated anyway.
|
# deprecated anyway.
|
||||||
pytest.param("ssmits/Qwen2-7B-Instruct-embed-base",
|
pytest.param("ssmits/Qwen2-7B-Instruct-embed-base",
|
||||||
marks=[pytest.mark.skip_v0]),
|
marks=[pytest.mark.skip_v0, pytest.mark.cpu_model]),
|
||||||
# [Encoder-only]
|
# [Encoder-only]
|
||||||
pytest.param("BAAI/bge-base-en-v1.5",
|
pytest.param("BAAI/bge-base-en-v1.5",
|
||||||
marks=[
|
marks=[
|
||||||
@ -58,6 +60,9 @@ def test_models(
|
|||||||
model,
|
model,
|
||||||
monkeypatch,
|
monkeypatch,
|
||||||
) -> None:
|
) -> None:
|
||||||
|
if model == "intfloat/e5-mistral-7b-instruct" and current_platform.is_cpu(
|
||||||
|
) and os.environ.get("VLLM_USE_V1", "0") == "1":
|
||||||
|
pytest.skip("CPU V1 doesn't support sliding window")
|
||||||
|
|
||||||
if model == "BAAI/bge-multilingual-gemma2" and current_platform.is_rocm():
|
if model == "BAAI/bge-multilingual-gemma2" and current_platform.is_rocm():
|
||||||
# ROCm Triton FA does not currently support sliding window attention
|
# ROCm Triton FA does not currently support sliding window attention
|
||||||
|
|||||||
@ -1449,6 +1449,13 @@ class EngineArgs:
|
|||||||
model_config=model_config) and _warn_or_fallback(
|
model_config=model_config) and _warn_or_fallback(
|
||||||
current_platform.device_name):
|
current_platform.device_name):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
if (current_platform.is_cpu()
|
||||||
|
and model_config.get_sliding_window() is not None):
|
||||||
|
_raise_or_fallback(feature_name="sliding window (CPU backend)",
|
||||||
|
recommend_to_remove=False)
|
||||||
|
return False
|
||||||
|
|
||||||
#############################################################
|
#############################################################
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user